o
    ÇtjøY  ã                   @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ edd ee  d¡dZ	d	Z
d
Zdedefdd„Zdedefdd„Zdedefdd„ZdS )é    N)ÚPath)ÚOpenAI)Úload_dotenvT)ÚoverrideÚOPENAI_API_KEY)Úapi_keyuS  You are a pharma data validation engine.

Validate parameters[] against sop_limits[] specs.
Also validate lims_results[] â€” any Fail = DEVIATION.

Return JSON only:
{
  "batch_id": string or null,
  "product_name": string or null,
  "validation_rows": [
    {
      "source_id": "REF-XXXXX",
      "parameter_name": string,
      "target_value": string or null,
      "found_value": string or null,
      "validation_status": one of ["Valid", "Missing Value", "Unmapped Parameter", "Needs Review", "DEVIATION"],
      "confidence": number 0-100,
      "source_file": string,
      "issue_reason": string or null,
      "tab": one of ["parameters", "specifications"]
    }
  ],
  "compliance_score": number 0-100,
  "compliance_label": string,
  "alcoa_checklist": {
    "attributable": one of ["pass","warn","fail"],
    "legible": one of ["pass","warn","fail"],
    "contemporaneous": one of ["pass","warn","fail"],
    "original": one of ["pass","warn","fail"],
    "accurate": one of ["pass","warn","fail"],
    "complete": one of ["pass","warn","fail"],
    "consistent": one of ["pass","warn","fail"],
    "enduring": one of ["pass","warn","fail"],
    "available": one of ["pass","warn","fail"]
  },
  "missing_data_alerts": [
    {"type": string, "message": string, "severity": one of ["critical","warning","info"]}
  ],
  "ai_mapping_insights": {
    "auto_corrected": [{"description": string, "detail": string}],
    "suggestions": [{"description": string, "detail": string}],
    "warnings": [{"description": string, "detail": string}]
  },
  "summary": {
    "valid_count": number,
    "missing_count": number,
    "unmapped_count": number,
    "needs_review_count": number,
    "low_confidence_count": number
  }
}

Rules:
- parameters[] rows â†’ tab: "parameters"
- sop_limits[] rows â†’ tab: "specifications" â€” compare each spec lower/upper limit
- lims_results[] Fail rows â†’ tab: "parameters" with status DEVIATION
- DEVIATION: value outside lower/upper limit
- Missing Value: found_value is null
- compliance_label: "Pass" if 90+, "Minor Deviations Found" if 70-89, "Critical Issues" if below 70

Data:
<data>
uô  You are a pharma data validation engine.

Validate equipment_readings[] and extract operator information.

Return JSON only:
{
  "equipment_rows": [
    {
      "source_id": "EQ-XXXXX",
      "parameter_name": string,
      "target_value": string or null,
      "found_value": string or null,
      "validation_status": one of ["Valid", "Needs Review", "DEVIATION"],
      "confidence": number 0-100,
      "source_file": string,
      "issue_reason": string or null,
      "tab": "equipment"
    }
  ],
  "operator_rows": [
    {
      "source_id": "OP-XXXXX",
      "parameter_name": string,
      "target_value": string or null,
      "found_value": string or null,
      "validation_status": one of ["Valid", "Missing Value"],
      "confidence": number 0-100,
      "source_file": string,
      "issue_reason": string or null,
      "tab": "operators"
    }
  ],
  "source_document_rows": [
    {
      "source_id": "SRC-XXXXX",
      "parameter_name": string,
      "target_value": string or null,
      "found_value": string or null,
      "validation_status": "Valid",
      "confidence": number 0-100,
      "source_file": string,
      "issue_reason": null,
      "tab": "source_documents"
    }
  ]
}

Rules:
- equipment_rows: one row per equipment_reading â€” Alert status = DEVIATION
- operator_rows: one row per unique operator_id found â€” if operator_id null = Missing Value
- source_document_rows: one row per unique source file
- Fill operator_id from equipment_readings[].operator_id column

Data:
<data>
Úall_rowsÚreturnc                 C   sÂ   g }| D ]Z}|  dd¡}|dkrFdt|  dd¡ƒv s$dt|  dd¡ƒv r&dnd	}| |  d
d¡|  dd¡|  d¡p@|  d¡› d|dœ¡ q|dkr^| d|  dd¡|  d¡› dd	dœ¡ q|S )NÚvalidation_statusÚ úNeeds ReviewÚOOSÚissue_reasonÚCalibrationÚparameter_nameÚcriticalÚwarningÚtabÚunknownÚ	source_idz needs review)Útyper   ÚmessageÚseverityúMissing ValueÚmissing_valueu#    â€” value not found in source file)ÚgetÚstrÚappend)r   ÚalertsÚrowÚstatusÚsev© r"   ú,/var/www/html/fyndo/pharma/fyndo/validate.pyÚ_build_alerts†   s(   0


ü
ü€r$   c                 C   s  g }g }| D ]y}|  d¡dkr*| d|  d¡› d|  d¡› d|  dd	¡› d
œ¡ |  d¡dkrdt|  dd	¡ƒv rV| d|  d¡› d|  d¡› d|  d¡› dd
œ¡ |  d¡dkri| d|  dd¡d
œ¡ dt|  dd	¡ƒv r| d|  dd¡d
œ¡ qg ||dœS )Nr
   Ú	DEVIATIONzDEVIATION: r   zFound value Úfound_valuez is outside specified limits. r   r   )ÚdescriptionÚdetailr   r   zOOS Result: zResult z outside spec Útarget_valuez. OOS investigation required.r   Ú	operatorszOperator ID missing from BMRz.Operator found in Equipment Log but not in BMRr   zEquipment calibration overdueu3   Calibration overdue â€” equipment may be inaccurate©Úauto_correctedÚsuggestionsÚwarnings)r   r   r   )r   r.   r-   r   r"   r"   r#   Ú_build_insightsœ   s:   þþ
þ
þ€ýr/   Ú	flat_datac           7      ƒ   sx	  zo|   d¡|   d¡|   dg ¡|   dg ¡|   dg ¡dœ}tj|ddd d	… }t d
|¡‰ |   dg ¡|   dg ¡ttdd„ |   dg ¡D ƒƒƒdœ}tj|ddd d… }t d
|¡‰dd l}| 	| 
¡  d ‡ fdd„¡| 
¡  d ‡fdd„¡¡I d H \}}t |jd jj¡}t |jd jj¡}	|  dg ¡}
|	  dg ¡}|	  dg ¡}dd„ |   dg ¡D ƒ}tdd„ |D ƒdƒ}i }|   dg ¡D ]0}t|  dd¡ƒ ¡  ¡ }|rð|  dd¡}|  dd¡}|  d d¡}|› d!|› d"|›  ¡ ||< qÀ|
D ]%}t|  dd¡ƒ ¡  ¡ }||v r|| |d#< |  d$¡d%v r||d$< qó|   dg ¡}td&d„ |   dg ¡D ƒd'ƒ}|   dg ¡}td(d„ |   dg ¡D ƒd'ƒ}g }t|ƒD ]\}}|  d)d¡}|  d*d¡}|  d d¡}|sc|ro|› d!|› d"|›  ¡ nt|ƒ}|  d+d¡› d"|›  ¡ }|  d,¡d-k}|rŒd.n|  d,¡d/kr–d0nd1} | d2t|d3 ƒ d4¡› |  d5d6¡|||r±d0nd1d7|  d$¡pº||r¿d8nd d9d:œ	¡ qGtd;d„ |   dg ¡D ƒd<ƒ}!|   dg ¡}"td=d„ |"D ƒd ƒ}#td>d„ |"D ƒd?ƒ}$|#r-t|#  d@d¡ƒ}%dd l}&|& dA|%|&j¡}'|'rdB|' d3¡› dCndD}(| dEdF|#  dGdH¡› dI|(d0d7|$p'd?dJdKd:œ	¡ ttdLd„ |   dg ¡D ƒƒƒ})ttdMd„ |   dg ¡D ƒƒƒ}*g }+t|)ƒD ];\}},|,|*v }-|+ dNt|d3 ƒ dO¡› dP|,› dQdR|-rq|,ndS|-rwd1nd0dT|!p}d<|-r‚d n|,› dUdVd:œ	¡ qQg }.d3}/dW|dXdYd1dTdZœd[|pžd'dXdYd1d\dZœg}0|   d¡r»|0 d]|!p³d<dXdYd1d\dZœ¡ |"r×|0 d^|$pÅd?d_|#rË|(nd`|#rÑd0nd1d7dZœ¡ |   d¡ré|0 dadbdXdYd1dcdZœ¡ |   ddi ¡  de¡s|   ddi ¡  dfi ¡  dg¡r|0 dhdidXdYd1d\dZœ¡ |0D ]3}1|. djt|/ƒ dO¡› |1dk |1dl |1dm |1d, |1dn |1do |1d, d0kr7dpnd dqd:œ	¡ |/d37 }/q|
| | |+ |. }2drd„ |2D ƒ|d< dsd„ |2D ƒ|d9< dtd„ |2D ƒ|dK< dud„ |2D ƒ|dV< dvd„ |2D ƒ|dq< ddddddwœ}3|2D ]}|  dxd¡}4|4|3v r™|3|4  d37  < q„i d|  d¡“d|  d¡“dydz|  dd{¡› “d|d}t|2ƒ› d~“dt|2ƒ“d€t|2ƒ“d|2“d|  dg ¡“d9|  d9g ¡“dK|  dKg ¡“dV|  dVg ¡“dq|  dqg ¡“dd‚|3d id‚|3d9 id‚|3dK id‚|3dV id‚|3dq idwœ“dƒ|  dƒd¡“d„|  d„d¡“d…|  d…i ¡“d†t|2ƒ“t|2ƒd‡|3d |3d9 |3dK |3dV |3dq gdˆœtd‰d„ |2D ƒƒtdŠd„ |2D ƒƒtd‹d„ |2D ƒƒtdŒd„ |2D ƒƒtdd„ |2D ƒƒtdŽd„ |2D ƒƒdœdd‘œ¥}5|5W S  ty» }6 z=d’t|6ƒg d‚did‚did‚did‚did‚didwœdd“d”d”d”d”d”d”d”d”d”d•œ	g g g g d–œd‡g dˆœdddddd—œd˜œW  Y d }6~6S d }6~6ww )™NÚbatch_idÚproduct_nameÚ
parametersÚ
sop_limitsÚlims_results)r1   r2   r3   r4   r5   é   )Úindenti'  z<data>Úequipment_readingsÚmaintenance_recordsc                 s   ó&    | ]}|  d ¡r|  d d¡V  qdS ©Úsource_filer   N©r   ©Ú.0Úpr"   r"   r#   Ú	<genexpr>Ï   s   € þ

ÿz&validate_extraction.<locals>.<genexpr>)r8   r9   Úsource_filesi@  r   c                      ó,   t jjjdddddœdˆ dœgddid	d
S )Núgpt-4oi   ÚsystemúIYou are a pharma data validation engine. Return JSON only. No extra text.©ÚroleÚcontentÚuserr   Újson_objectr   ©ÚmodelÚ
max_tokensÚmessagesÚresponse_formatÚtemperature©ÚclientÚchatÚcompletionsÚcreater"   )Úprompt1r"   r#   Ú<lambda>Ú   ó    þøz%validate_extraction.<locals>.<lambda>c                      rC   )NrD   i¸  rE   rF   rG   rJ   r   rK   r   rL   rR   r"   )Úprompt2r"   r#   rX   ä   rY   Úvalidation_rowsÚequipment_rowsÚoperator_rowsc                 S   s   g | ]}|  d d¡‘qS )Úfilenamer   r=   ©r?   Úfr"   r"   r#   Ú
<listcomp>ù   s    z'validate_extraction.<locals>.<listcomp>c                 s   s,    | ]}d t |ƒv sdt |ƒv r|V  qdS )ÚBatchÚBMRN)r   r_   r"   r"   r#   rA   ú   s   €* ÚBatch_Record_BTC0048r   r   Úlower_limitÚupper_limitÚunitú-ú r)   r<   )r3   Nr   c                 s   r:   )r^   úLIMS_Results_BTC0048.csvNr=   r_   r"   r"   r#   rA     s   €$ rj   c                 s   ó*    | ]}|  d ¡rt|  d d¡ƒV  qdS r;   ©r   r   )r?   Úer"   r"   r#   rA     ó   €( Úspecification_lowÚspecification_highÚresult_valuer    ÚFailr%   ÚReviewr   ÚValidzLIMS-é   é   Ú	test_namezUnknown Testéd   u$   OOS â€” Result outside specificationÚspecifications)	r   r   r)   r&   r
   Ú
confidencer<   r   r   c                 s   rk   r;   rl   ©r?   Úeqr"   r"   r#   rA   1  rn   zEquipment_Logs_EQP_GRAN_01.csvc                 s   sD    | ]}d t | dd¡ƒ ¡ v sd t | dd¡ƒ ¡ v r|V  qdS )ÚOVERDUEr    r   ÚobservationN)r   r   Úupper©r?   Úmr"   r"   r#   rA   7  s   € ÿÿc                 s   rk   r;   rl   r€   r"   r"   r#   rA   <  rn   zMaintenance_Log_EQP_GRAN_01.csvr~   z(\d+)\s*days?\s*past\s*dueu   OVERDUE â€” z days past duer}   zREF-CAL-001u   Calibration Status â€” Úequipment_idÚEQP_GRAN_01u   Current (â‰¤6 months)u>   Calibration overdue â€” equipment may give inaccurate readingsÚ	equipmentc                 s   ó$    | ]}|  d ¡r|  d ¡V  qdS ©Úoperator_idNr=   r{   r"   r"   r#   rA   R  ó   € þ
ÿc                 s   r…   r†   r=   r>   r"   r"   r#   rA   W  rˆ   zOP-é   zOperator ID z in Equipment LogzPresent in BMRzMissing from BMR rowséb   z4 found in Equipment Log but not recorded in BMR rowsr*   zBatch Record BTC0048ÚPresentÚVerified)Únamer^   ÚtargetÚfoundr    rz   zLIMS Results BTC0048éc   zEquipment Log EQP_GRAN_01zMaintenance Log EQP_GRAN_01zCalibration CurrentÚCurrentzSOP Process GranulationzSOP_Process_Granulation.pdfé_   Údeviation_detailsÚdeviation_idÚdeviation_recordÚDeviation_IDzDeviation Report DEV-2024-0312zDeviation_DEV20240312.jsonzDOC-r   rŽ   r   rz   r^   zCalibration overdueÚsource_documentsc                 S   ó   g | ]}|  d ¡dkr|‘qS )r   r3   r=   ©r?   Úrr"   r"   r#   ra   À  ó    c                 S   r˜   )r   ry   r=   r™   r"   r"   r#   ra   Á  r›   c                 S   r˜   )r   r„   r=   r™   r"   r"   r#   ra   Â  r›   c                 S   r˜   )r   r*   r=   r™   r"   r"   r#   ra   Ã  r›   c                 S   r˜   )r   r—   r=   r™   r"   r"   r#   ra   Ä  r›   )r3   ry   r„   r*   r—   r   Úbatch_validation_titlezBatch Validation: zN/Ar'   z.Integrity check for automated extraction from z lab reports.Útotal_parametersÚtotal_validation_entriesÚtabsÚcountÚcompliance_scoreÚcompliance_labelÚalcoa_checklistÚmissing_data_alertszConfidence Heatmap)ÚlabelÚdatac                 s   ó"    | ]}|  d ¡dkrdV  qdS )r
   rt   ru   Nr=   r™   r"   r"   r#   rA   ñ  ó   €  c                 s   r§   )r
   r%   ru   Nr=   r™   r"   r"   r#   rA   ò  r¨   c                 s   r§   )r
   r   ru   Nr=   r™   r"   r"   r#   rA   ó  r¨   c                 s   r§   )r
   r   ru   Nr=   r™   r"   r"   r#   rA   ô  r¨   c                 s   r§   )r
   zUnmapped Parameterru   Nr=   r™   r"   r"   r#   rA   õ  r¨   c                 s   s$    | ]}|  d d¡dk rdV  qdS )rz   rx   éK   ru   Nr=   r™   r"   r"   r#   rA   ö  s   €" )Úvalid_countÚdeviation_countÚneeds_review_countÚmissing_countÚunmapped_countÚlow_confidence_countÚ	validated)Úai_mapping_insightsÚconfidence_heatmapÚsummaryr    ÚerrorÚErrorÚfail)	ÚattributableÚlegibleÚcontemporaneousÚoriginalÚaccurateÚcompleteÚ
consistentÚenduringÚ	availabler+   )rª   r­   r®   r¬   r¯   )r    r´   r[   rŸ   r¡   r¢   r£   r¤   r±   r²   r³   ) r   ÚjsonÚdumpsÚPROMPT_PART1ÚreplaceÚlistÚsetÚPROMPT_PART2ÚasyncioÚgatherÚget_event_loopÚrun_in_executorÚloadsÚchoicesr   rI   Únextr   ÚlowerÚstripÚ	enumerater   ÚzfillÚreÚsearchÚ
IGNORECASEÚgroupÚlenr$   r/   ÚsumÚ	Exception)7r0   Ú
part1_dataÚ	part1_strÚ
part2_dataÚ	part2_strrÇ   Ú	response1Ú	response2Úresult1Úresult2Ú
param_rowsÚ
equip_rowsÚop_rowsÚuploaded_filenamesÚbmr_filenameÚsop_mapÚsÚpnameÚloÚhirg   r   ÚlimsÚlims_filenameÚall_files_flatÚlims_file_realÚ	spec_rowsÚiÚlrÚtgtÚfvalÚis_failr    Úequip_filenamer9   Úoverdue_recordÚmaint_filenameÚobsrÒ   Ú
days_matchÚdays_strÚeq_operatorsÚparam_operatorsÚop_rows_pythonÚopÚin_bmrÚsrc_rows_pythonÚdoc_counterÚ
files_infoÚfir   Ú
tab_countsÚtÚresultrm   r"   )rW   rZ   r#   Úvalidate_extraction¾   sF  €


û



ÿý
õ€
€þþ,"
÷þýþ÷


ÿ


ÿ
÷ú	ú÷ú
ú
ú0
ú	÷
€
ÿ
þýüûúùø	÷
öõô




ûóìëêéûþúÕ.ûû
ýé€ÿr  )ÚosrÀ   Úpathlibr   Úopenair   Údotenvr   ÚgetenvrS   rÂ   rÆ   rÄ   r$   Údictr/   r  r"   r"   r"   r#   Ú<module>   s    
@;"