
    @iP                         d Z ddlZddlZddlmZmZmZmZm	Z	 ddl
m
Z
  ej                  e      Z G d d      Z G d d      Zy)	z:Iteration manager for Stage4 iterative interaction engine.    N)DictAnyListOptionalTuple)datetimec                   F    e Zd ZdZdededefdZdeeef   fdZ	de
fd	Zy
)IterationStatez-State object for tracking iteration progress.user_idquery
max_roundsc                 X    || _         || _        || _        d| _        d| _        g | _        y)zInitialize iteration state.
        
        Args:
            user_id: User identifier
            query: Original user query
            max_rounds: Maximum number of iteration rounds
        r   N)r   r   r   current_rounduser_vectorhistory)selfr   r   r   s       S   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/controller/iteration_manager.py__init__zIterationState.__init__   s0     
$    
round_datac                     | j                   j                  | j                  t        j                         j                         d|       y)zcAdd a round to history.
        
        Args:
            round_data: Data for this round
        )round	timestampN)r   appendr   r   now	isoformat)r   r   s     r   	add_roundzIterationState.add_round   s@     	''!113
 
 	r   returnc                 4    | j                   | j                  k\  S )zrCheck if iteration is complete.
        
        Returns:
            True if current_round >= max_rounds
        )r   r   )r   s    r   is_completezIterationState.is_complete*   s     !!T__44r   N)__name__
__module____qualname____doc__strintr   r   r   r   boolr     r   r   r
   r
      s?    7 C S  
DcN 
5T 5r   r
   c                      e Zd ZdZ	 	 	 ddefdZdededeej                     fdZ
	 dd	ed
eeeef      deeef   fdZd	edeeee   f   dej                  fdZ	 	 dd	ed
eeeef      deeef   fdZ	 	 dd	edeeee   f   dedeeef   fdZ	 ddededed
eeeef      deeef   f
dZ	 dd	ed
eeeef      deeef   fdZy)IterationManagerz5Manager for iterative preference refinement (Stage4).Nfusion_alphac	                 t    || _         || _        || _        || _        || _        || _        || _        || _        y)u  Initialize iteration manager.
        
        Args:
            miner: CoLocationMiner instance
            learner: PreferenceLearner instance
            memory: MemoryStore instance
            embedder: PatternEmbedder instance
            llm_client: Optional LLMClient for intent understanding
            intent_encoder: Optional IntentEncoder for Stage0
            intent_mapper: Optional IntentMapper for Stage0
            fusion_alpha: Weight for LLM intent vs feedback fusion (α)
        N)minerlearnermemoryembedder
llm_clientintent_encoderintent_mapperr+   )	r   r-   r.   r/   r0   r1   r2   r3   r+   s	            r   r   zIterationManager.__init__6   s?     
 $,*(r   r   r   r   c                 P   | j                   | j                  t        j                  d       y	 t        j                  d|dd  d       | j                   j	                  |      }|st        j                  d       y|j                  dg       }|rt        |      dk(  r<t        j                  d	       t        j                  d
|j                                 yt        j                  dt        |       d       t        j                  d|        t        j                  d       | j                  j                  |      }|t        j                  d       yt        j                  dt        |       dt        j                  j                  |      dd       | j                  j                  |||       t        j                  d       t        j                  dt        |       d       |S # t         $ r$}t        j                  d| d       Y d}~yd}~ww xY w)ux  Initialize user vector from LLM intent understanding (Stage0).
        
        This method implements the complete flow:
        1. User ambiguous query → LLM intent understanding
        2. Extract structured intent (business, pattern_preference, risk factors)
        3. Validate patterns against dataset POI types
        4. Map patterns to embeddings (feature selection)
        5. Compute initial user vector u_llm
        
        Args:
            query: User query text
            user_id: User identifier
            
        Returns:
            Initial user preference vector, or None if Stage0 not available
        Nz<Stage4: Stage0 not available, skipping intent initializationz1Stage4: Starting intent understanding for query: d   z...z=Stage4: Failed to parse intent from query - LLM returned Nonepattern_preferencer   z4Stage4: No pattern_preference found in parsed intentzStage4: Intent result keys: z+Stage4: Successfully parsed intent - found z preferred patternszStage4: Preferred patterns: zEStage4: Mapping intent patterns to user vector (feature selection)...z*Stage4: Failed to convert intent to vectorz1Stage4: Generated initial user vector u_llm (dim=z, norm=.4f)z#Stage4: Saved intent data to memoryzBStage4: Successfully initialized user vector from LLM intent (dim=z6Stage4: Failed to initialize user vector from intent: Texc_info)r2   r3   loggerinfoparsewarninggetlendebugkeys	to_vectornplinalgnormr/   save_intent	Exception)r   r   r   intent_resultr6   u_llmes          r   init_user_vectorz!IterationManager.init_user_vectorN   s   " &$*<*<*DKKVW(	KKKERVSVK=X[\] !//55e<M ^_ "/!2!23G!L%-?)@A)EUV;M<N<N<P;QRSKKEcJ\F]E^^qrsKK67I6JKL KK_`&&00?E}KLKKKCPUJ<W^_a_h_h_m_mns_tux^yyz{| KK##GUMBKK=>KK\]`af]g\hhijkL 	NNSTUSVWbfNg	s-   AG8 ?AG8 BG8 BG8 8	H%H  H%statemining_paramsc                    |xj                   dz  c_         t        j                  d|j                    d|j                          |dddd}| j                  j                  |j                  d	d      |j                  d
d      |j                  dd            }t        j                  d|j                    dt        |       d       |dd }|D ]*  }d|vs| j                  j                  |d         }||d<   , d}|j                  <| j                  j                  ||j                        }t        j                  d       nL| j                  j                  ||j                  |j                   dz
        }|t        j                  d       |}i }	|t        t!        ||      d d      }
|
D cg c]  \  }}|	 }}}|
D ]=  \  }}dj#                  t        |j                  dg                   }t%        |      |	|<   ? t        j                  dt        |       d       nt        j                  d       |j                  dd      }| j                  j'                  ||      }|j                   ||t        |      t        |      |	|j                  2t%        t(        j*                  j-                  |j                              ndd}|j/                  t        |      t        |      |d    d!       |S c c}}w )"a  Run one iteration round.
        
        Algorithm:
        1. Mine candidate patterns
        2. Rank patterns by current user vector
        3. Return ranked patterns for user feedback
        
        Args:
            state: Current iteration state
            mining_params: Optional mining parameters (if None, uses defaults)
            
        Returns:
            Dictionary containing ranked patterns and metadata
           zStage4: Starting round /N333333?   
confidencemin_participationmax_pattern_sizepriorityrV   rW   rX   Stage4: Round z mined z	 patterns   	embeddingpatternz/Stage4: Scored patterns using state user vectorr   interaction_roundz4Stage4: Scored patterns using learner's fused vectorc                     | d   S NrP   r(   xs    r   <lambda>z0IterationManager.run_one_round.<locals>.<lambda>       adr   Tkeyreverse,zStage4: Re-ranked z8Stage4: No user vector available, using original rankingmin_confidence      ?ri           )r   patternsrulestotal_patternstotal_rulessimilarity_scoresuser_vector_normrr   )ro   rp   rr   )r   r;   r<   r   r-   mine_patternsr?   r@   r0   encode_patternr   r.   _score_with_vectorscore_patternsr   sortedzipjoinfloatgenerate_rulesrD   rE   rF   r   )r   rM   rN   rm   top_patternsr\   pattern_vecscoresranked_patternsrq   rankedp_scorepattern_keyri   rn   round_results                     r   run_one_roundzIterationManager.run_one_round   s     	q -e.A.A-B!EDTDTCUVW   &)$%(M ::+++//0CSI*../A1E"&&z<@ , 
 	nU%8%8$9XyYZ  } $G')"mm::79;MN'2$ $ (\\44\5CTCTUFKKIK \\00"'"5"5"9 1 F
 !RT 'L&)"F
 .44VTQqVO4 #5!hhvaeeIr.B'CD16u!+. # KK,S-A,B)LMKKRS '**+;SA

))/.)Y (('!(mu:!2LQL]L]LibiinnU5F5F&G Hor
 	!(mu: ,-? @
 	 C 5s   8L feedbackc                    |j                  dg       }|j                  dg       }|D ]W  }|j                  d      }| j                  j                  |      }|2| j                  j                  |j                  |       Y |D ]W  }|j                  d      }| j                  j                  |      }|2| j                  j                  |j                  |       Y | j                  j                  |j                        }| j                  j                  |j                        }	d}
|	r)|	j                  d      rt        j                  |	d         }
|
H|F| j                  |
z  d| j                  z
  |z  z   }t        j                  d| j                   d       nx||}t        j                  d	       n^|
|
}t        j                  d
       nD| j                  j!                         }t        j"                  |      }t        j%                  d       ||_        t        j                  dt        j(                  j+                  |      dd       |S )uH  Update user vector based on feedback.
        
        Formula: u_t = α * u_llm + (1-α) * u_feedback
        
        Args:
            state: Current iteration state
            feedback: Dictionary with 'positive' and 'negative' pattern lists
            
        Returns:
            Updated user preference vector
        positivenegativerh   NrJ   rP   z!Stage4: Fused user vector (alpha=r8   z2Stage4: Using feedback vector only (no LLM intent)z2Stage4: Using LLM intent vector only (no feedback)z3Stage4: No user vector available, using zero vectorz"Stage4: Updated user vector (norm=r7   )r?   splitr0   rt   r/   add_positiver   add_negativer.   compute_feedback_vectorload_intentrD   arrayr+   r;   r<    get_sentence_embedding_dimensionzerosr>   r   rE   rF   )r   rM   r   positive_patternsnegative_patternsr   pattern_listr}   
u_feedbackintent_datarJ   u_tdims                r   update_user_vectorz#IterationManager.update_user_vector   s    %LLR8$LLR8 -K&,,S1L--66|DK&((D - -K&,,S1L--66|DK&((D	 - \\99%--H
 kk--emm<;??73HH[12E !7##e+q43D3D/D
.RRCKK;D<M<M;NaPQ#CKKLMCKKLM --@@BC((3-CNNPQ  89LS8QQRST
r   c           	          | j                  ||      }|j                  |d   |d   |d   |d   |d   |d   |j                         dS )ax  Run one iteration step: mine, rank, return results for user feedback.
        
        This method is designed for interactive iteration where:
        1. Mine and rank patterns
        2. Return results for user to provide feedback
        3. After feedback, call update_and_train() to update vector and train model
        
        Args:
            state: Current iteration state
            mining_params: Optional mining parameters
            trainer: Optional PreferenceTrainer instance for training after feedback
            
        Returns:
            Dictionary containing round results (patterns, rules, etc.)
        rm   rn   ro   rp   rq   rr   )r   rm   rn   ro   rp   rq   rr   is_final)r   r   r    )r   rM   rN   trainerr   s        r   run_one_iteration_stepz'IterationManager.run_one_iteration_step7  sj    & ))%? (($Z0!'**+;<'6!-.A!B ,-? @))+	
 		
r   training_epochsc                 4   | j                  ||      }d}|	 t        j                  d|j                   d       |j	                  |dd      }d||j                  dg       d}t        j                  d	|j                  d      r|j                  dg       d
   nd d       nddd}|+t        t        j                  j                  |            |dS d|dS # t        $ r2}t        j                  d| d       dt        |      d}Y d}~hd}~ww xY w)a  Update user vector based on feedback and immediately train model.
        
        Args:
            state: Current iteration state
            feedback: Dictionary with 'positive' and 'negative' pattern lists
            trainer: Optional PreferenceTrainer instance
            training_epochs: Number of epochs for training (default 5 for quick training)
            
        Returns:
            Dictionary with training results
        Nz#Stage4: Training model after round z	 feedback    )epochs
batch_size	save_pathTloss)trainedr   loss_historyz*Stage4: Model trained successfully (loss: zN/Ar8   zStage4: Training failed: r9   F)r   errorzNo trainer provided)r   reasonrl   )rr   training)r   r;   r<   r   trainr?   rH   r>   r%   rz   rD   rE   rF   )	r   rM   r   r   r   updated_vectortraining_resultr   rK   s	            r   update_and_trainz!IterationManager.update_and_trainW  sU     00A A%BUBUAVV_`a!--*!" (   $-$+KK$;#
 Hhohshstzh{U[]_I`acId  BG  IH  HI  J  K !/O JXIcbiinn^&D E'
 	
il'
 	
  !:1#>N$ V#s   BC 	D%(DDKc                 8   t         j                  d| d       t        |||      }| j                  ||      |_        g }|j                         sZ| j                  ||      }|j                  |       t         j                  d|j                   d| d       |j                         sZ| j                  ||      }||||d   |d   |j                  di       |j                  4t        t        j                  j                  |j                              d
S d	d
S )a  Run iterative preference refinement.
        
        Algorithm: Iterative Preference Refinement
        
        Input: Q (query), K (rounds)
        Output: P* (best patterns)
        
        Initialize u0
        for t=1..K:
          Generate Ct (candidate patterns)
          Rank Ct by ut
          Collect Ft (feedback)
          Update ut
        
        Return best
        
        Args:
            query: User query
            user_id: User identifier
            K: Number of iteration rounds
            mining_params: Optional mining parameters
            
        Returns:
            Dictionary containing final results and iteration history
        z)Stage4: Starting iterative refinement (K=r8   rY   rQ   z
 completedrm   rn   rq   rl   )r   total_roundsiteration_historyfinal_patternsfinal_rulesfinal_similarity_scoresrr   )r;   r<   r
   rL   r   r    r   r   r   _final_rankr?   rz   rD   rE   rF   )	r   r   r   r   rN   rM   r   r   final_results	            r   runzIterationManager.run  s(   6 	?s!DE wq1 !11%A  ##%--e]CL$$\2KK.)<)<(=QqcLM ##% ''}= !2*:6'0'3'7'78KR'PLQL]L]LibiinnU5F5F&G H
 	
 ps
 	
r   c           	      n   t         j                  d       |dddd}| j                  j                  |j	                  dd      |j	                  dd      |j	                  d	d            }|dd
 }|D ]*  }d|vs| j
                  j                  |d         }||d<   , d}|j                  '| j                  j                  ||j                        }n2| j                  j                  ||j                  |j                        }|}i }	|ot        t        ||      d d      }
|
D cg c]  \  }}|	 }}}|
D ]=  \  }}dj                  t        |j	                  dg                   }t!        |      |	|<   ? |j	                  dd      }| j                  j#                  ||      }|||	dS c c}}w )a  Generate final ranking after all iterations.
        
        Args:
            state: Final iteration state
            mining_params: Optional mining parameters
            
        Returns:
            Dictionary containing final ranked patterns and rules
        z Stage4: Generating final rankingNrR   rS   rT   rU   rV   rW   rX   rZ   r[   r\   r]   c                     | d   S r`   r(   ra   s    r   rc   z.IterationManager._final_rank.<locals>.<lambda>  rd   r   Tre   rh   ri   rj   rk   )rm   rn   rq   )r;   r<   r-   rs   r?   r0   rt   r   r.   ru   rv   r   r   rw   rx   ry   rz   r{   )r   rM   rN   rm   r|   r\   r}   r~   r   rq   r   r   r   r   r   ri   rn   s                    r   r   zIterationManager._final_rank  s    	67 %($%(M ::+++//0CSI*../A1E"&&z<@ , 
  }#G')"mm::79;MN'2$ $
 (\\44\5CTCTUF\\00"'"5"5 1 F 'L&)"F
 .44VTQqVO4"5!hhvaeeIr.B'CD16u!+. # '**+;SA

))/.)Y (!2
 	
 5s   +F1)NNNrR   )N)NN)NrS   )r!   r"   r#   r$   rz   r   r%   r   rD   ndarrayrL   r
   r   r   r   r   r   r   r&   r   r   r   r(   r   r   r*   r*   3   s   ?DH48'*)$)0=c =C =HRZZ<P =@ @Dh> h$,T#s(^$<hHLSRUXhT> >$(d3i$8>=?ZZ>B JN'+
N 
.6tCH~.F
04S#X
D !%/01
n 1
"&sDI~"61
 *-1
 6:#s(^1
h 7;9
 9
s 9
s 9
#DcN39
?CCH~9
x >BB
 B
"*4S>":B
FJ3PS8nB
r   r*   )r$   loggingnumpyrD   typingr   r   r   r   r   r   	getLoggerr!   r;   r
   r*   r(   r   r   <module>r      s@    @   3 3 			8	$%5 %5PT
 T
r   