o
    q j!                     @   sh   d dl Z d dlZd dlmZ d dlmZ d dlmZ edd ee ddZ	d	Z
d
edefddZdS )    N)Path)OpenAI)load_dotenvT)overrideOPENAI_API_KEY)api_keyuK  You are a pharma root cause analysis engine.

Analyze the exception cluster data and all supporting evidence to identify root causes.

Return JSON only with this exact structure:
{
  "investigation_summary": {
    "summary": string,
    "key_signals": array of strings,
    "hypothesis": string,
    "supporting_evidence_refs": array of strings,
    "risk_level": one of ["Critical", "High", "Medium", "Low"]
  },
  "analysis_metadata": {
    "data_sources_count": number,
    "analysis_duration_sec": number,
    "method": string
  },
  "cluster_context": {
    "cluster_id": string,
    "cluster_name": string,
    "identified_via": string,
    "active_signals": [
      {
        "signal": string,
        "value": string or null,
        "type": one of ["temperature", "equipment", "operator", "process", "lims", "maintenance"]
      }
    ],
    "vertical_timeline": [
      {
        "timestamp": string,
        "event": string,
        "severity": one of ["critical", "warning", "info"],
        "source_file": string or null,
        "operator_id": string or null
      }
    ]
  },
  "root_cause_candidates": [
    {
      "rank": number,
      "title": string,
      "probability": number 0-100,
      "description": string,
      "evidence_refs": array of strings,
      "linked_oos_refs": array of strings,
      "suggested_immediate_actions": array of strings,
      "is_recommended": true or false
    }
  ],
  "five_whys": [
    {
      "level": "Y1",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y2",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y3",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y4",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": false
    },
    {
      "level": "Y5",
      "question": string,
      "answer": string,
      "evidence_source": string or null,
      "is_root": true
    }
  ],
  "fishbone_6m": {
    "machine": array of strings,
    "man": array of strings,
    "method": array of strings,
    "material": array of strings,
    "measurement": array of strings,
    "environment": array of strings
  },
  "supporting_evidence": [
    {
      "filename": string,
      "reference": string,
      "detail": string,
      "timestamp": string or null,
      "operator_id": string or null
    }
  ],
  "historical_recurrence": {
    "seen_count": number,
    "period": string,
    "previous_batches": array of strings,
    "pattern_note": string
  },
  "trip_score": {
    "score": number between 0 and 1,
    "reliability": one of ["High", "Medium", "Low"]
  },
  "human_confirmation": {
    "confirmed": false,
    "confirmed_by": null,
    "confirmed_at": null,
    "cause_id": null,
    "role": null
  },
  "approval_status": "Pending Investigator & QA Review",
  "qa_action_required": true or false
}

Rules:
- Use only evidence from the provided data — never invent facts
- Every root cause candidate must have evidence_refs from actual files
- is_recommended: true only for rank 1
- probability: whole number 0-100
- five_whys: always exactly 5 levels Y1-Y5, last one is_root: true
- analysis_metadata.data_sources_count: count of unique files used as evidence
- analysis_metadata.analysis_duration_sec: number between 2.0 and 5.0
- analysis_metadata.method: always "Bayesian correlation"
- cluster_context.identified_via: always "AI pattern matching"
- human_confirmation: always return with confirmed=false, all other fields null
- approval_status: always "Pending Investigator & QA Review"
- TRIP score: T=Traceability, R=Recurrence, I=Impact, P=Probability — average 0-1

Data to analyze:
<content>
datareturnc                    s  z9t j| dd}td|d d }tjjjddddd	|dgd
didd}|jd j	j
}t |}d|d< |W S  t jy_ } zdt|g g i dd d d d dddW  Y d }~S d }~w ty } zdt|g g i dd d d d dddW  Y d }~S d }~ww )N   )indentz	<content>i@  zgpt-4osystemzMYou are a pharma root cause analysis engine. Return JSON only. No extra text.)rolecontentusertypejson_objectr   )modelmessagesresponse_formattemperature	completedstatusparse_errorF)	confirmedconfirmed_byconfirmed_atcause_idr   z Pending Investigator & QA Review)r   errorroot_cause_candidates	five_whysfishbone_6mhuman_confirmationapproval_statusr   )jsondumps
RCA_PROMPTreplaceclientchatcompletionscreatechoicesmessager   loadsJSONDecodeErrorstr	Exception)r   data_strpromptresponserawresulte r7   '/var/www/html/fyndo/pharma/fyndo/rca.pyperform_rca   sf   

r9   )osr#   pathlibr   openair   dotenvr   getenvr'   r%   dictr9   r7   r7   r7   r8   <module>   s    
 