
    ڟicB                         d Z ddlZddlmZmZmZmZ ddlZddl	m
Z
 ddlmZmZmZmZmZ ddlmZ  ej&                  e      Z G d d      Zy)	z9Interaction evaluator for preference learning evaluation.    N)DictListAnyOptional)UserSimulator)accuracy	precisionrecallf1compute_threshold)DiversitySamplerc            	       H    e Zd ZdZ	 	 	 ddedededefdZd Zde	e
   fd	Zy
)InteractionEvaluatorz.Evaluator for interactive preference learning.	simulatortop_kroundssampling_strategyc                     || _         || _        || _        || _        || _        g | _        g | _        g | _        g | _        d| _	        d| _
        d| _        t        j                  d| d| d|        y)aY  Initialize interaction evaluator.
        
        Args:
            manager: PipelineManager instance
            simulator: UserSimulator instance
            top_k: Number of patterns to recommend each round
            rounds: Number of interaction rounds
            sampling_strategy: Pattern sampling strategy
                - "top": Select top-k highest scores (original)
                - "mixed": Mix of top, middle, and random (recommended)
                - "stratified": Stratified sampling by score ranges
                - "uncertainty": Select patterns near decision boundary
        Nz(InteractionEvaluator initialized: top_k=z	, rounds=z, sampling_strategy=)managerr   r   r   r   historyprecision_historyrecall_history
f1_historypre_interaction_metricsall_patternsground_truthloggerinfo)selfr   r   r   r   r   s         K   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/experiment/evaluator.py__init__zInteractionEvaluator.__init__   s    * "
!2!# '+$ ! >ugYvhVjk|j}~    c                    ddl m}m} | j                  j                  j                  ||      }|Ft        j                  dt        |       dt        j                  j                  |      dd       |}n| j                  j                  j                  |      }|s(| j                  j                  j                  |||      S  ||| j                  j                  j                   j#                         	      }t        j                  d
t        |       dt        j                  j                  |      dd       | j                  j$                  xr< | j                  j&                  duxr" | j                  j&                  j                   du}	g }
|D ]  }|j)                         }d|vr_|j+                  dg       }t-        |t.              r|j1                  d      }| j                  j                  j3                  |      }|||d<   |
j5                  |        |	rbt        j7                  dt        |       d        ||
|| j                  j&                  j                   | j                  j8                        }|S t        j7                  dt        |       d        ||
|d| j                  j8                        }|S )zScore patterns using manager's method which handles contrastive learning properly.
        
        This ensures that contrastive learning model is used when available.
        IMPORTANT: Use the UPDATED user vector that includes latest feedback.
        r   )score_patternsbuild_user_vectorNzUsing fused user vector (dim=z, norm=.4f)user_idinteraction_round)dimz$Built user vector from profile (dim=	embeddingpattern,zScoring z) patterns with contrastive learning model)modelalphaz5 patterns with baseline method (no contrastive model))preference.scorerr$   r%   r   learnerget_user_vectorr   debuglennplinalgnormmemoryget_user_profilescore_all_patternsembedderr/    get_sentence_embedding_dimensionstage3_use_contrastivetrainercopyget
isinstancestrsplitencode_patternappendr   stage3_alpha)r   patternsr)   r*   r$   r%   fused_vectoruser_vecuser_profileuse_contrastivepatterns_with_embeddingsr-   pattern_copypattern_listpattern_vecscoress                   r    _score_patterns_with_managerz1InteractionEvaluator._score_patterns_with_manager7   s    	H ||++;;GEVW#LL8\9J8K7SUS\S\SaSabnSopsRttuvw#H  <<..??HL||++>>#&7 ?   )LL))//PPRH LL?HgVXV_V_VdVdemVnorUsstuv  <<>> A,,..d:A,,..44D@ 	
 $& G"<<>L,.&{{9b9lC0#/#5#5c#:L"ll33BB<P*0;L-$++L9   KK(3x=/1Z[\#(ll**00ll//	F M KK(3x=/1fgh#(ll//	F Mr"   returnc                    t         j                  d       t         j                  d       | j                  j                  j	                  ddd      | _        t         j                  dt        | j
                         d       t         j                  d	       | j                  j                  | j
                        | _	        t         j                  d
t        | j                  j                                dt        | j                         d       | j                  j                  }| j                  j                  j                  |t        | j                  j                         | j                  j"                  rt        | j                  j"                        ng        d}	 | j                  j%                  || j&                        }|d   }t         j                  d|        | j-                  | j
                  |d      }|.t         j+                  d       dgt        | j
                        z  }t/        |      }t1        |      D 	ci c]  \  }}	||	|k\  rdnd }
}}	t3        | j                  |
      t5        | j                  |
      t7        | j                  |
      t9        | j                  |
      |d| _        t         j                  d| j:                  d   dd| j:                  d   dd| j:                  d   dd | j:                  d!   dd"|dd#       t=        | j&                        D ]t  }t         j                  d$|dz    d| j&                   d%       |dk(  rY|r%j?                  d&g       }|j?                  d'i       }n| j                  jA                  |d      }|j?                  d&g       }i }nf|r3d(tC               v r'r%|j?                  d&g       }|j?                  d'i       }n1| j                  jA                  |d      }|j?                  d&g       }i }|s-t         j+                  d)|dz    d*       | j
                  dd+ }i }g }|D ]  }|j?                  d,g       }tE        |tF              r|jI                  d-      }d-jK                  tM        |            }|j?                  ||j?                  dd            }	|jO                  |	        t        |      | jP                  k\  r]tS        jT                  ||| jP                  | jV                  .      }t         j                  d/t        |       d0| jV                   d1       n1|}t         j                  d2t        |       d3| jP                   d#       g g d4}|D ]  }| j                  jY                  |      }tE        |j?                  d,      t              r!d-jK                  |j?                  d,g             ntG        |j?                  d,d5            }|dk(  r|d6   jO                  |       |d7   jO                  |        t         j                  d8t        |d6          d9t        |d7          d:       |rb|| j&                  dz
  k  rP	 | j                  j[                  ||      }t         j                  d;t        |j?                  d&g              d<       n4|r2	 | j                  j[                  ||       t         j                  d>       | j                  j\                  <| j                  j\                  j^                  t         ja                  d@|dz    dA       | j-                  | j
                  ||dz         }|5t         j+                  dB|dz    dC       dgt        | j
                        z  }|rtc        jd                  |      }t         j                  d@|dz    dD|jg                         ddE|ji                         ddF|jk                         ddGtc        jl                  |      ddH|jo                         d       t/        |      }i }t1        |      D ]  \  }}	|	|k\  rdnd||<    t        dI |j                         D              }t         j                  d@|dz    dJ| dt        |       dK|dd#	       t3        | j                  |      }t5        | j                  |      }t7        | j                  |      }t9        | j                  |      }| jp                  jO                  |       | jr                  jO                  |       | jt                  jO                  |       | jv                  jO                  |       t         j                  d@|dz    dL|dd|dd|dd |dd"|dd#       w t         j                  dM| jp                  dN   dd| jr                  dN   dd| jt                  dN   dd | jv                  dN   d       | jp                  | jr                  | jt                  | jv                  | j:                  dOS # t(        $ r%}t         j+                  d|        d}Y d}~d}~ww xY wc c}	}w # t(        $ r'}t         j+                  d=|        d}d}Y d}~d}~ww xY w# t(        $ r#}t         j+                  d?|        Y d}~d}~ww xY w)Pa/  Run evaluation loop.
        
        Each round:
        1. Recommend patterns
        2. Simulate feedback
        3. Update model
        4. Predict ALL patterns
        5. Compute accuracy
        6. Store history
        
        Returns:
            List of accuracy scores for each round
        zStarting evaluation loop...zStep 1: Mining all patterns...g333333?   
confidence)min_participationmax_pattern_sizepriorityzMined z	 patternsz Step 2: Building ground truth...zGround truth: /z patterns likedz(I want to find good co-location patterns)iteration_rounds
session_idzInitialized iteration session: z8Failed to start iteration, using process_query instead: Nr   r(   z8No pre-interaction scores available, using zero baselineg           )r   r	   r
   r   	thresholdz;Pre-interaction metrics (round 0, no feedback) - Accuracy: r   r&   z, Precision: r	   z
, Recall: r
   z, F1: r   z (threshold: r'   z
=== Round z ===rH   similarity_scoresnext_resultzNo patterns available in round z&, using top patterns from all_patterns   r-   r.   )r   strategyz	Selected z patterns using z sampling strategyz
Using all z% available patterns (less than top_k=)positivenegative rc   rd   zSimulated feedback: z positive, z	 negativez&Model updated via next_iteration, got z patterns for next roundz%Failed to update via next_iteration: z-Model updated via next_iteration (last round)z3Failed to update via next_iteration on last round: zRound z/: Trainer model available, using it for scoringzNo scores available in round z, using random baselinez score stats: min=z, max=z, mean=z	, median=z, std=c              3   ,   K   | ]  }|d k(  s	d   yw)r]   N ).0vs     r    	<genexpr>z+InteractionEvaluator.run.<locals>.<genexpr>K  s     &Q2FQ!q&q2Fs   
z predictions: z# predicted as positive (threshold: z metrics - Accuracy: z/Evaluation complete. Final metrics - Accuracy: )r   r	   r
   r   pre_interaction)<r   r   r   minermine_patternsr   r5   r   build_ground_truthr   sumvaluesstage3_user_idr9   save_preference_featureslistlikeddislikedstart_iterationr   	ExceptionwarningrR   r   	enumerater   r	   r
   r   r   rangerA   process_querylocalsrB   rC   rD   joinsortedrF   r   r   sample_diverse_patternsr   label_patternnext_iterationr?   r/   r4   r6   arrayminmaxmeanmedianstdr   r   r   r   ) r   r)   querysession_resultr\   e
pre_scorespre_thresholdidxscorepre_predictionstall_ranked_patternsr_   resultr`   rQ   r-   rO   pattern_keyrecommended_patternsfeedbacklabelpattern_strscore_arrayr^   predictionspositive_predictionsaccprecrecf1_scores                                    r    runzInteractionEvaluator.run   s
    	12 	45 LL..<<!! = 

 	fS!2!234I>? 	67 NN==d>O>OPnS):):)A)A)C%D$EQs4K\K\G]F^^mno ,,--44%%&-1^^-D-DD(()"	

 ;	!\\99%RVR]R]9^N'5JKK9*FG 66 7 


 NNUVT%6%6!77J)*5 (
3
3
U u-!143 	 

 !!2!2OD"4#4#4oFT..@T&&8&(
$ 	55jA#F G66{CCH I33H=cB C//5c: ;(-Q0	
 t{{#AKK*QqSE4;;-t<= Av*8*<*<Z*L'(6(:(:;NPR(S% "\\77PT7UF*0**Z*D'(*% -68";*5//*b*I'(38KR(P% "\\77PT7UF*0**Z*D'(*%&!@1Eklm&*&7&7&<#$&! F.&{{9b9lC0#/#5#5c#:L!hhvl';<)--k7;;|UX;YZe$ / &'4::5'7'O'O'**!33	($ i,@(A'BBRSWSiSiRjj|}~ (;$j-A)B(CChimisishttuvw H
 044W=FPQXQ\Q\]fQgimFnchhw{{9b'ABtwx  yD  yD  EN  PR  yS  uTA:Z(//<Z(//< 0 KK.s8J3G/H.IUXYablYmUnTooxyz a$++/1'"&,,"="=j("SKKK"H[__]gikMlImHn  oG  !H  I ^LL//
HEKK"OQ ||##/DLL4H4H4N4N4ZvacU*YZ[66!!"#a% 7 F ~!>qseCZ[\T%6%6!77  hhv.fQqSE);KOO<Mc;RRXYdYhYhYjknXo p##.#3#3#5c":)BIIkDZ[^C_ `""-//"3C!8: ;
 *&1IK'/
U(-(:1C  0 $'&Q+2D2D2F&Q#Q KK&1^4H3I3{K[J\\  AJ  KN  @O  OP  Q  R 4,,k:CT..<D**K8C$++[9HLL$""))$/&&s+OO""8,KK&1%:3s)=QUVYPZ [""%c&#mIVY?Z[] ^Q $V 	EdllSUFVWZE[ \!!%!7!7!;C @ A"11"5c: ;//"-c24 	5
 //))//#;;
 	
e  	NNUVWUXYZJ	
D ! 'NN%J1##NO!%J"&K	' ! ^NN%XYZX[#\]]^sP   %Ai i3?Ai91j,	i0i++i09	j)j$$j),	k5kkN)rU   
   mixed)__name__
__module____qualname____doc__r   intrC   r!   rR   r   floatr   rg   r"   r    r   r      s\    8 !($@ !$@ 	$@
 $@ $@LHTe
T%[ e
r"   r   )r   loggingtypingr   r   r   r   numpyr6   experiment.simulatorr   experiment.metricsr   r	   r
   r   r   experiment.samplerr   	getLoggerr   r   r   rg   r"   r    <module>r      s>    ?  , ,  . Q Q /			8	$X
 X
r"   