import os
import json
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv(override=True)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

RCA_PROMPT = """You are a pharma root cause analysis engine.

Analyze the exception cluster data and all supporting evidence to identify root causes.

Return JSON only with this exact structure:
{
  "investigation_summary": {
    "summary": string,
    "key_signals": array of strings,
    "hypothesis": string,
    "supporting_evidence_refs": array of strings,
    "risk_level": one of ["Critical", "High", "Medium", "Low"]
  },
  "analysis_metadata": {
    "data_sources_count": number,
    "analysis_duration_sec": number,
    "method": string
  },
  "cluster_context": {
    "cluster_id": string,
    "cluster_name": string,
    "identified_via": string,
    "active_signals": [
      {
        "signal": string,
        "value": string or null,
        "type": one of ["temperature", "equipment", "operator", "process", "lims", "maintenance"]
      }
    ],
    "vertical_timeline": [
      {
        "timestamp": string,
        "event": string,
        "severity": one of ["critical", "warning", "info"],
        "source_file": string or null,
        "operator_id": string or null
      }
    ]
  },
  "root_cause_candidates": [
    {
      "rank": number,
      "title": string,
      "probability": number 0-100,
      "description": string,
      "evidence_refs": array of strings,
      "linked_oos_refs": array of strings,
      "suggested_immediate_actions": array of strings,
      "is_recommended": true or false
    }
  ],
  "five_whys": [
    {
      "level": "Y1",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y2",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y3",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y4",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y5",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": true
    }
  ],
  "fishbone_6m": {
    "machine": array of strings,
    "man": array of strings,
    "method": array of strings,
    "material": array of strings,
    "measurement": array of strings,
    "environment": array of strings
  },
  "supporting_evidence": [
    {
      "filename": string,
      "reference": string,
      "detail": string,
      "timestamp": string or null,
      "operator_id": string or null
    }
  ],
  "historical_recurrence": {
    "seen_count": number,
    "period": string,
    "previous_batches": array of strings,
    "pattern_note": string
  },
  "trip_score": {
    "score": number between 0 and 1,
    "reliability": one of ["High", "Medium", "Low"]
  },
  "human_confirmation": {
    "confirmed": false,
    "confirmed_by": null,
    "confirmed_at": null,
    "cause_id": null,
    "role": null
  },
  "approval_status": "Pending Investigator & QA Review",
  "qa_action_required": true or false
}

Rules:
- Use only evidence from the provided data — never invent facts
- Every root cause candidate must have evidence_refs from actual files
- is_recommended: true only for rank 1
- probability: whole number 0-100
- five_whys: always exactly 5 levels Y1-Y5, last one is_root: true
- analysis_metadata.data_sources_count: count of unique files used as evidence
- analysis_metadata.analysis_duration_sec: number between 2.0 and 5.0
- analysis_metadata.method: always "Bayesian correlation"
- cluster_context.identified_via: always "AI pattern matching"
- human_confirmation: always return with confirmed=false, all other fields null
- approval_status: always "Pending Investigator & QA Review"
- TRIP score: T=Traceability, R=Recurrence, I=Impact, P=Probability — average 0-1

Data to analyze:
<content>
"""


async def perform_rca(data: dict) -> dict:
    try:
        data_str = json.dumps(data, indent=2)
        prompt = RCA_PROMPT.replace("<content>", data_str[:8000])

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": "You are a pharma root cause analysis engine. Return JSON only. No extra text."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            response_format={"type": "json_object"},
            temperature=0
        )

        raw = response.choices[0].message.content
        result = json.loads(raw)
        result["status"] = "completed"
        return result

    except json.JSONDecodeError as e:
        return {
            "status": "parse_error",
            "error": str(e),
            "root_cause_candidates": [],
            "five_whys": [],
            "fishbone_6m": {},
            "human_confirmation": {
                "confirmed": False,
                "confirmed_by": None,
                "confirmed_at": None,
                "cause_id": None,
                "role": None
            },
            "approval_status": "Pending Investigator & QA Review"
        }
    except Exception as e:
        return {
            "status": "error",
            "error": str(e),
            "root_cause_candidates": [],
            "five_whys": [],
            "fishbone_6m": {},
            "human_confirmation": {
                "confirmed": False,
                "confirmed_by": None,
                "confirmed_at": None,
                "cause_id": None,
                "role": None
            },
            "approval_status": "Pending Investigator & QA Review"
        }