
    Li/=                         S r SSKrSSKrSSKJrJrJrJrJ	r	  SSK
r
SSKJr  SSKJr  \
R                  " \5      r " S S5      rg)XUser preference learner using average vector method with positive and negative feedback.    N)ListOptionalDictAnyTuple)MemoryStore)PatternEmbedderc                      \ rS rSrSrSS\S\4S jjrS\\	\
R                     \	\
R                     4   4S jrS\	\
R                     4S jrS	\S\	\
R                     4S
 jrSS	\S\S\	\
R                     4S jjr  SS\\\\4      S	\	\   S\S\	\\      4S jjrS\\\\4      S\\   S	\	\   S\\   4S jrS\\\\4      S\
R                  S\\   4S jrS\
R                  S\
R                  S\S\
R                  4S jrSS	\S\S\	\
R                     4S jjr  SS\\\\4      S	\	\   S\S\	\\      4S jjrSrg) PreferenceLearner   r   decay_lambdause_preference_weightedc                 X    [        5       U l        [        5       U l        Xl        X l        g)u   Initialize preference learner.

Args:
    decay_lambda: Decay parameter for intent vector weight (α_t = e^(-λt))
    use_preference_weighted: If True, multiply similarity by preference weight (adaptive fusion, 2025-style)
N)r	   memoryr
   embedderr   r   )selfr   r   s      C   /home/ubuntu/codebase/yexijia/保研/iCoLoc/src/learning/learner.py__init__PreferenceLearner.__init__   s#     "m')('>$    returnc                 \   U R                   R                  5       n/ n/ nUR                  S/ 5       GH  nUR                  S0 5      nUR                  S/ 5      nU H_  n[        U[        5      (       a  UR                  S/ 5      nO[        U[
        5      (       a  UnOME  U(       d  MN  UR                  U5        Ma     UR                  S/ 5      n	U	 H_  n[        U[        5      (       a  UR                  S/ 5      nO[        U[
        5      (       a  UnOME  U(       d  MN  UR                  U5        Ma     GM     Sn
[        U5      S:  ad  U R                  R                  U5      nUbF  [        U5      S:  a7  [        R                  " USS9n
[        R                  S	[        U5       S
35        Sn[        U5      S:  ad  U R                  R                  U5      nUbF  [        U5      S:  a7  [        R                  " USS9n[        R                  S[        U5       S35        U
c  Uc  gX4$ )a  Build user preference vectors from historical feedback.

Returns:
    Tuple of (positive_vector, negative_vector), or (None, None) if no feedback
    - positive_vector: Average vector of liked patterns
    - negative_vector: Average vector of disliked patterns
sessionsfeedbacklikepatterndislikeNr   axiszBuilt positive vector from z liked patternszBuilt negative vector from z disliked patterns)NN)r   loadget
isinstancedictlistappendlenr   encode_patternsnpmeanloggerdebug)r   dataliked_patternsdisliked_patternssessionr   likedpattern_datar   dislikedpositive_vectorvectorsnegative_vectors                r   build_user_vectors$PreferenceLearner.build_user_vectors   s    {{! xx
B/G{{:r2H LL,E %lD11*..y"=Gd33*G7"))'2 !&  ||Ir2H (lD11*..y"=Gd33*G7%,,W5 !)% 0< ~"mm33NCG"s7|a'7"$'''"::3~;N:O_`  !A%mm334EFG"s7|a'7"$'''"::3?P;Q:RRdef"'>//r   c                 *    U R                  5       u  pU$ )zBuild user preference vector from historical feedback (backward compatibility).

Returns:
    User preference vector (average of liked patterns), or None if no feedback
)r7   )r   positive_vec_s      r   build_user_vector#PreferenceLearner.build_user_vector]   s     113r   user_idc                    U R                   R                  U5      nU(       d  gUR                  S/ 5      nUR                  S/ 5      n[        R                  " U R
                  R                  5       5      nU(       a:  [        R                  " U Vs/ s H  n[        R                  " U5      PM     snSS9n[        R                  " U R
                  R                  5       5      nU(       a:  [        R                  " U Vs/ s H  n[        R                  " U5      PM     snSS9nXW-
  n[        R                  R                  U5      S:X  a  gU$ s  snf s  snf )u   Compute user preference vector from feedback (μ⁺ - μ⁻).

Args:
    user_id: User identifier
    
Returns:
    Feedback-based user vector (positive_mean - negative_mean), or None if no feedback
Npositivenegativer   r   )r   get_user_profiler"   r)   zerosr    get_sentence_embedding_dimensionr*   arraylinalgnorm)	r   r>   user_profilepositive_vectorsnegative_vectorsmean_posvmean_neguser_vectors	            r   compute_feedback_vector)PreferenceLearner.compute_feedback_vectorf   s    {{33G<'++J;'++J; 88DMMJJLMww5EF5E5EFQOH 88DMMJJLMww5EF5E5EFQOH )99>>+&!+  G
  Gs    E  Etc                    U R                   R                  U5      nSnU(       a/  UR                  S5      (       a  [        R                  " US   5      nU R                  U5      nUc  Uc  gUc  U$ Uc  U$ [        R                  " U R                  * U-  5      nXd-  SU-
  U-  -   n[        R                  SUS SU 35        U$ )uN  Get fused user preference vector combining LLM intent and feedback.

Formula: u_t = α_t * u_llm + (1-α_t) * u_feedback
where α_t = e^(-λt)

Args:
    user_id: User identifier
    t: Interaction round number (0 for first interaction)
    
Returns:
    Fused user preference vector, or None if neither intent nor feedback available
Nu_llm   zFused user vector: alpha_t=z.4fz, t=)r   load_intentr"   r)   rE   rO   mathexpr   r+   r,   )r   r>   rQ   intent_datarS   
u_feedbackalpha_tu_ts           r   get_user_vector!PreferenceLearner.get_user_vector   s     kk--g6;??733HH[12E 11':
 =Z/ =L ((D---12 oW
 ::273-tA3GH
r   Npatternsinteraction_roundc                    U(       a7  U R                  X#5      nUb#  U R                  X5      nU R                  XU5      $ U R                  5       u  pgUc  Uc  gU Vs/ s H  oR	                  S/ 5      PM     n	nU	 V
s/ s H  o(       d  M  U
PM     nn
U(       d$  [
        R                  S5        S/[        U5      -  $ U R                  R                  U5      nUc  [
        R                  S5        g/ nU H  nSnUbk  [        R                  R                  U5      n[        R                  R                  U5      nUS:  a'  US:  a!  [        R                  " X5      UU-  -  nUU-  nUbk  [        R                  R                  U5      n[        R                  R                  U5      nUS:  a'  US:  a!  [        R                  " X5      UU-  -  nUU-  nUR                  U5        M     / nSnU	 H6  n
U
(       a  UR                  UU   5        US-  nM%  UR                  S5        M8     U R                  UUU5      $ s  snf s  sn
f )uL  Score patterns based on user preference vectors (fused LLM intent + feedback).

If user_id is provided and Stage0 is enabled, uses fused vector:
u_t = α_t * u_llm + (1-α_t) * u_feedback

Otherwise, falls back to original method:
score = similarity_to_positive - similarity_to_negative

Args:
    patterns: List of pattern dictionaries, each containing 'pattern' field
    user_id: Optional user identifier for Stage0 fusion
    interaction_round: Current interaction round number (for decay calculation)
    
Returns:
    List of similarity scores, or None if no user vectors available
Nr   zNo valid patterns to score.        z&Could not encode patterns for scoring.r   rT   )r\   _score_with_vector_apply_preference_weightsr7   r"   r+   infor'   r   r(   warningr)   rF   rG   dotr&   )r   r^   r>   r_   fused_vectorrawr:   negative_vecppattern_listsp_listvalid_pattern_listsp_vecsscoresrL   scorepositive_normv_normpositive_similaritynegative_normnegative_similarityfinal_scores	valid_idxs                          r   score_patterns PreferenceLearner.score_patterns   s/   & //KL'--hE55hWMM &*%<%<%>" L$8 8@@x!y"-x@ 5BLM&VvML"KK5653x=(( ../BC>NNCD AE ' "		| <*A:-!"3*,&&*AVmE[*\'00E ' "		| <*A:-!"3*,&&*AVmE[*\'00EMM% + 0 	#F##F9$56Q	##C( $ --hgNNg A Ms   H?=
IIro   c                   ^^ U R                   (       a  U(       a  [        U5      [        U5      :w  a  U$ U R                  R                  U5      nU(       d  U$ [	        UR                  S5      =(       d    / 5      m[	        UR                  S5      =(       d    / 5      m/ n[        U5       GH  u  pgUR                  S/ 5      n[        U[        5      (       aF  UR                  S5       V	s/ s H)  oR                  5       (       d  M  U	R                  5       PM+     nn	O0U(       a)  U V	s/ s H  n	[        U	5      R                  5       PM     nn	[        [        U5      S5      n
[        U4S jU 5       5      n[        U4S jU 5       5      nSX-
  U
-  S	-  -   n[        S	[        S
U5      5      nUR                  X&   U-  5        GM     U$ s  sn	f s  sn	f )zApply preference-weighted fusion (adaptive fusion of similarity and feature-level preference).
Aligns with 2025 work on adaptive fusion of multi-dimensional user preferences.
weight = 1 + (n_liked - n_disliked) / |pattern| * 0.5, clamped to [0.5, 1.5].
r   r   r   ,rT   c              3   6   >#    U  H  oT;   d  M
  S v   M     g7frT   N ).0f	liked_sets     r   	<genexpr>>PreferenceLearner._apply_preference_weights.<locals>.<genexpr>  s     =U9n!!U   		c              3   6   >#    U  H  oT;   d  M
  S v   M     g7fr}   r~   )r   r   disliked_sets     r   r   r     s     C1l1BQQr   g      ?g      ?g      ?)r   r'   r   get_preference_featuressetr"   	enumerater#   strsplitstripmaxsumminr&   )r   r^   ro   r>   pfoutirj   plistxnn_liked
n_dislikedwr   r   s                 @@r   rc   +PreferenceLearner._apply_preference_weights  sn    ++7c&kSQY]>ZM[[009Mv,"-	266),23h'DAEE)R(E%%%,1KK,<J,<q	,<J167AQ7CJ"A=U==GCCCJw+q0366ACS!%AJJvy1}% ( 
 K7s   ,GG*#GrN   c                    U Vs/ s H  o3R                  S/ 5      PM     nnU Vs/ s H  oU(       d  M  UPM     nnU(       d  S/[        U5      -  $ U R                  R                  U5      nUc  S/[        U5      -  $ / n[        R
                  R                  U5      n	U Hm  n
[        R
                  R                  U
5      nUS:  a4  U	S:  a.  [        R                  " X5      X-  -  nUR                  U5        M\  UR                  S5        Mo     / nSnU H5  nU(       a  UR                  X   5        US-  nM$  UR                  S5        M7     U$ s  snf s  snf )zScore patterns using a single user vector.

Args:
    patterns: List of pattern dictionaries
    user_vector: User preference vector
    
Returns:
    List of similarity scores
r   ra   r   rT   )	r"   r'   r   r(   r)   rF   rG   rf   r&   )r   r^   rN   rj   rk   rl   rm   rn   ro   	user_normrL   rr   
similarityrv   rw   s                  r   rb   $PreferenceLearner._score_with_vector$  sC    8@@x!y"-x@ 5BLM&VvML"53x=(( ../BC>53x=(( IINN;/	AYY^^A&Fzi!mVVA3v7IJ
j)c"  	#F##F$56Q	##C( $ E A Ms   E
EEllm_vecfb_vecalphac                     X1-  SU-
  U-  -   $ )u   Fuse LLM intent vector and feedback vector.

Formula: u_t = α * u_llm + (1-α) * u_feedback

Args:
    llm_vec: LLM intent vector
    fb_vec: Feedback-based vector
    alpha: Fusion weight (0-1)
    
Returns:
    Fused user preference vector
rT   r~   )r   r   r   r   s       r   fuse_vectorPreferenceLearner.fuse_vectorS  s     !e)v!555r   c                    U R                  U5      nU R                  R                  U5      nSnU(       a/  UR                  S5      (       a  [        R
                  " US   5      nUb  Ub  U R                  XSU5      $ Ub  U$ Ub  U$ g)zUpdate user vector iteratively (for Stage4).

Gets latest feedback and fuses with LLM intent vector.

Args:
    user_id: User identifier
    alpha: Fusion weight for LLM intent vs feedback
    
Returns:
    Updated fused user preference vector
NrS   )rO   r   rU   r"   r)   rE   r   )r   r>   r   rY   rX   rS   s         r   update_user_vector_iterative.PreferenceLearner.update_user_vector_iterativeb  s     11':
 kk--g6;??733HH[12E !7##Eu==#Lr   c                 "    U R                  XUS9$ )aG  Score all patterns for evaluation.

This is a wrapper around score_patterns for evaluation purposes.

Args:
    patterns: List of pattern dictionaries
    user_id: Optional user identifier
    interaction_round: Current interaction round number
    
Returns:
    List of similarity scores, or None if no user vectors available
)r>   r_   )rx   )r   r^   r>   r_   s       r   score_all_patterns$PreferenceLearner.score_all_patterns  s     ""8Pa"bbr   )r   r   r   r   )g?F)r   )Nr   )g333333?)__name__
__module____qualname____firstlineno____doc__floatboolr   r   r   r)   ndarrayr7   r<   r   rO   intr\   r   r   r   rx   rc   rb   r   r   r   __static_attributes__r~   r   r   r   r      s   b
?U 
?D 
?>0E(2::*>@T*T$U >0@8BJJ#7  s  x

7K  D(s (s (8BJJ;O (T W[/0TOtDcN'; TOhsm TO),TO5=d5k5JTOl$tCH~2F PTUZP[ ,4SM>B5k8-4S#X+? -bjj -]abg]h -^62:: 6rzz 6% 6TVT^T^ 6C  PXY[YcYcPd > [_45c4S#X+? c(SV- c.1c:B4;:Oc cr   r   )r   numpyr)   rV   typingr   r   r   r   r   loggingmemory.storer	   learning.embedderr
   	getLoggerr   r+   r   r~   r   r   <module>r      s;    ^   3 3  $ -			8	$Ac Acr   