
    Li/=                         d Z ddlZddlZddlmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ  e
j                  e      Z G d d      Zy)XUser preference learner using average vector method with positive and negative feedback.    N)ListOptionalDictAnyTuple)MemoryStore)PatternEmbedderc                   h   e Zd ZdZddedefdZdeee	j                     ee	j                     f   fdZdee	j                     fdZdedee	j                     fd	Zdded
edee	j                     fdZ	 	 ddeeeef      dee   dedeee      fdZdeeeef      dee   dee   dee   fdZdeeeef      de	j                  dee   fdZde	j                  de	j                  dede	j                  fdZddededee	j                     fdZ	 	 ddeeeef      dee   dedeee      fdZy)PreferenceLearnerr   decay_lambdause_preference_weightedc                 \    t               | _        t               | _        || _        || _        y)u   Initialize preference learner.

        Args:
            decay_lambda: Decay parameter for intent vector weight (α_t = e^(-λt))
            use_preference_weighted: If True, multiply similarity by preference weight (adaptive fusion, 2025-style)
        N)r	   memoryr
   embedderr   r   )selfr   r   s      G   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/learning/learner.py__init__zPreferenceLearner.__init__   s'     "m')('>$    returnc                    | j                   j                         }g }g }|j                  dg       D ]  }|j                  di       }|j                  dg       }|D ]M  }t        |t              r|j                  dg       }nt        |t
              r|}n:|s=|j                  |       O |j                  dg       }	|	D ]M  }t        |t              r|j                  dg       }nt        |t
              r|}n:|s=|j                  |       O  d}
t        |      dkD  rd| j                  j                  |      }|Gt        |      dkD  r9t        j                  |d      }
t        j                  d	t        |       d
       d}t        |      dkD  rd| j                  j                  |      }|Gt        |      dkD  r9t        j                  |d      }t        j                  dt        |       d       |
|y|
|fS )a5  Build user preference vectors from historical feedback.
        
        Returns:
            Tuple of (positive_vector, negative_vector), or (None, None) if no feedback
            - positive_vector: Average vector of liked patterns
            - negative_vector: Average vector of disliked patterns
        sessionsfeedbacklikepatterndislikeNr   axiszBuilt positive vector from z liked patternszBuilt negative vector from z disliked patterns)NN)r   loadget
isinstancedictlistappendlenr   encode_patternsnpmeanloggerdebug)r   dataliked_patternsdisliked_patternssessionr   likedpattern_datar   dislikedpositive_vectorvectorsnegative_vectors                r   build_user_vectorsz$PreferenceLearner.build_user_vectors   s    {{! xx
B/G{{:r2H LL,E %lD1*..y"=Gd3*G"))'2 !&  ||Ir2H (lD1*..y"=Gd3*G%,,W5 !)% 0< ~"mm33NCG"s7|a'7"$'''"::3~;N:O_`  !A%mm334EFG"s7|a'7"$'''"::3?P;Q:RRdef"'>//r   c                 ,    | j                         \  }}|S )zBuild user preference vector from historical feedback (backward compatibility).
        
        Returns:
            User preference vector (average of liked patterns), or None if no feedback
        )r5   )r   positive_vec_s      r   build_user_vectorz#PreferenceLearner.build_user_vector]   s     113ar   user_idc                    | j                   j                  |      }|sy|j                  dg       }|j                  dg       }t        j                  | j
                  j                               }|r8t        j                  |D cg c]  }t        j                  |       c}d      }t        j                  | j
                  j                               }|r8t        j                  |D cg c]  }t        j                  |       c}d      }||z
  }t        j                  j                  |      dk(  ry|S c c}w c c}w )u   Compute user preference vector from feedback (μ⁺ - μ⁻).
        
        Args:
            user_id: User identifier
            
        Returns:
            Feedback-based user vector (positive_mean - negative_mean), or None if no feedback
        Npositivenegativer   r   )r   get_user_profiler    r'   zerosr    get_sentence_embedding_dimensionr(   arraylinalgnorm)	r   r:   user_profilepositive_vectorsnegative_vectorsmean_posvmean_neguser_vectors	            r   compute_feedback_vectorz)PreferenceLearner.compute_feedback_vectorf   s    {{33G<'++J;'++J; 88DMMJJLMww5EF5E5EFQOH 88DMMJJLMww5EF5E5EFQOH )99>>+&!+  G
  Gs   D;,E tc                 l   | j                   j                  |      }d}|r)|j                  d      rt        j                  |d         }| j                  |      }||y||S ||S t        j                  | j                   |z        }||z  d|z
  |z  z   }t        j                  d|dd|        |S )u  Get fused user preference vector combining LLM intent and feedback.
        
        Formula: u_t = α_t * u_llm + (1-α_t) * u_feedback
        where α_t = e^(-λt)
        
        Args:
            user_id: User identifier
            t: Interaction round number (0 for first interaction)
            
        Returns:
            Fused user preference vector, or None if neither intent nor feedback available
        Nu_llm   zFused user vector: alpha_t=z.4fz, t=)r   load_intentr    r'   rA   rK   mathexpr   r)   r*   )r   r:   rL   intent_datarN   
u_feedbackalpha_tu_ts           r   get_user_vectorz!PreferenceLearner.get_user_vector   s     kk--g6;??73HH[12E 11':
 =Z/ =L ((D---12 oW
 ::273-tA3GH
r   Npatternsinteraction_roundc                 J   |r9| j                  ||      }|%| j                  ||      }| j                  |||      S | j                         \  }}||y|D cg c]  }|j	                  dg        }	}|	D 
cg c]  }
|
s|
	 }}
|s$t
        j                  d       dgt        |      z  S | j                  j                  |      }|t
        j                  d       yg }|D ]  }d}|it        j                  j                  |      }t        j                  j                  |      }|dkD  r&|dkD  r!t        j                  ||      ||z  z  }||z  }|it        j                  j                  |      }t        j                  j                  |      }|dkD  r&|dkD  r!t        j                  ||      ||z  z  }||z  }|j                  |        g }d}|	D ]/  }
|
r|j                  ||          |dz  }|j                  d       1 | j                  |||      S c c}w c c}
w )u  Score patterns based on user preference vectors (fused LLM intent + feedback).
        
        If user_id is provided and Stage0 is enabled, uses fused vector:
        u_t = α_t * u_llm + (1-α_t) * u_feedback
        
        Otherwise, falls back to original method:
        score = similarity_to_positive - similarity_to_negative
        
        Args:
            patterns: List of pattern dictionaries, each containing 'pattern' field
            user_id: Optional user identifier for Stage0 fusion
            interaction_round: Current interaction round number (for decay calculation)
            
        Returns:
            List of similarity scores, or None if no user vectors available
        Nr   zNo valid patterns to score.        z&Could not encode patterns for scoring.r   rO   )rW   _score_with_vector_apply_preference_weightsr5   r    r)   infor%   r   r&   warningr'   rB   rC   dotr$   )r   rX   r:   rY   fused_vectorrawr7   negative_vecppattern_listsp_listvalid_pattern_listsp_vecsscoresrH   scorepositive_normv_normpositive_similaritynegative_normnegative_similarityfinal_scores	valid_idxs                          r   score_patternsz PreferenceLearner.score_patterns   s>   & //9JKL'--hE55hWMM &*%<%<%>"l L$8 8@@x!y"-x@ 5BLM&VvML"KK5653x=(( ../BC>NNCD AE ' "		| <*A:-!"3*,&&L*AVmE[*\'00E ' "		| <*A:-!"3*,&&L*AVmE[*\'00EMM% + 0 	#F##F9$56Q	##C( $ --hgNNg A Ms   H7H ?H ri   c                 P   | j                   r|rt        |      t        |      k7  r|S | j                  j                  |      }|s|S t	        |j                  d      xs g       t	        |j                  d      xs g       g }t        |      D ]  \  }}|j                  dg       }t        |t              r>|j                  d      D 	cg c]#  }	|	j                         s|	j                         % }}	n(|r&|D 	cg c]  }	t        |	      j                          }}	t        t        |      d      }
t        fd|D              }t        fd|D              }d||z
  |
z  d	z  z   }t        d	t        d
|            }|j                  ||   |z         	 |S c c}	w c c}	w )a  Apply preference-weighted fusion (adaptive fusion of similarity and feature-level preference).
        Aligns with 2025 work on adaptive fusion of multi-dimensional user preferences.
        weight = 1 + (n_liked - n_disliked) / |pattern| * 0.5, clamped to [0.5, 1.5].
        r   r   r   ,rO   c              3   ,   K   | ]  }|v sd   ywrO   N ).0f	liked_sets     r   	<genexpr>z>PreferenceLearner._apply_preference_weights.<locals>.<genexpr>  s     =Ua9n!U   	c              3   ,   K   | ]  }|v sd   ywrv   rw   )rx   ry   disliked_sets     r   r{   z>PreferenceLearner._apply_preference_weights.<locals>.<genexpr>  s     C1l1BQr|   g      ?g      ?g      ?)r   r%   r   get_preference_featuressetr    	enumerater!   strsplitstripmaxsumminr$   )r   rX   ri   r:   pfoutird   plistxnn_liked
n_dislikedwr~   rz   s                 @@r   r]   z+PreferenceLearner._apply_preference_weights  so    ++7c&kSQY]>ZM[[009Mv,"-	266),23h'DAqEE)R(E%%,1KK,<J,<q	,<J167AQ7CJ"A=U==GCCCJw+q0366ACS!%AJJvay1}% ( 
 K7s   F$F? F#rJ   c                    |D cg c]  }|j                  dg        }}|D cg c]  }|s|	 }}|sdgt        |      z  S | j                  j                  |      }|dgt        |      z  S g }t        j
                  j                  |      }	|D ]j  }
t        j
                  j                  |
      }|dkD  r3|	dkD  r.t	        j                  |
|      ||	z  z  }|j                  |       Z|j                  d       l g }d}|D ]/  }|r|j                  ||          |dz  }|j                  d       1 |S c c}w c c}w )zScore patterns using a single user vector.
        
        Args:
            patterns: List of pattern dictionaries
            user_vector: User preference vector
            
        Returns:
            List of similarity scores
        r   r[   r   rO   )	r    r%   r   r&   r'   rB   rC   r`   r$   )r   rX   rJ   rd   re   rf   rg   rh   ri   	user_normrH   rl   
similarityrp   rq   s                  r   r\   z$PreferenceLearner._score_with_vector$  sK    8@@x!y"-x@ 5BLM&VvML"53x=(( ../BC>53x=(( IINN;/	AYY^^A&Fzi!mVVA{3v	7IJ
j)c"  	#F##F9$56Q	##C( $ E A Ms   D9D>D>llm_vecfb_vecalphac                     ||z  d|z
  |z  z   S )uK  Fuse LLM intent vector and feedback vector.
        
        Formula: u_t = α * u_llm + (1-α) * u_feedback
        
        Args:
            llm_vec: LLM intent vector
            fb_vec: Feedback-based vector
            alpha: Fusion weight (0-1)
            
        Returns:
            Fused user preference vector
        rO   rw   )r   r   r   r   s       r   fuse_vectorzPreferenceLearner.fuse_vectorS  s     w!e)v!555r   c                     | j                  |      }| j                  j                  |      }d}|r)|j                  d      rt	        j
                  |d         }||| j                  |||      S ||S ||S y)aD  Update user vector iteratively (for Stage4).
        
        Gets latest feedback and fuses with LLM intent vector.
        
        Args:
            user_id: User identifier
            alpha: Fusion weight for LLM intent vs feedback
            
        Returns:
            Updated fused user preference vector
        NrN   )rK   r   rP   r    r'   rA   r   )r   r:   r   rT   rS   rN   s         r   update_user_vector_iterativez.PreferenceLearner.update_user_vector_iterativeb  s     11':
 kk--g6;??73HH[12E !7##E:u==#Lr   c                 *    | j                  |||      S )a  Score all patterns for evaluation.
        
        This is a wrapper around score_patterns for evaluation purposes.
        
        Args:
            patterns: List of pattern dictionaries
            user_id: Optional user identifier
            interaction_round: Current interaction round number
            
        Returns:
            List of similarity scores, or None if no user vectors available
        )r:   rY   )rr   )r   rX   r:   rY   s       r   score_all_patternsz$PreferenceLearner.score_all_patterns  s     ""8WPa"bbr   )g?F)r   )Nr   )g333333?)__name__
__module____qualname____doc__floatboolr   r   r   r'   ndarrayr5   r9   r   rK   intrW   r   r   r   rr   r]   r\   r   r   r   rw   r   r   r   r      s   b
?U 
?D 
?>0E(2::*>@T*T$U >0@8BJJ#7  s  x

7K  D(s (s (8BJJ;O (T W[/0TOtDcN'; TOhsm TO),TO5=d5k5JTOl$tCH~2F PTUZP[ ,4SM>B5k8-4S#X+? -bjj -]abg]h -^62:: 6rzz 6% 6TVT^T^ 6C  PXY[YcYcPd > [_45c4S#X+? c(SV- c.1c:B4;:Ocr   r   )r   numpyr'   rQ   typingr   r   r   r   r   loggingmemory.storer	   learning.embedderr
   	getLoggerr   r)   r   rw   r   r   <module>r      s=    ^   3 3  $ -			8	$Ac Acr   