
    @iP                     |    d Z ddlZddlZddlmZmZmZ ddlmZ ddl	m
Z
  ej                  e      Z G d d      Zy)z6Intent encoder for understanding user goals using LLM.    N)DictAnyOptional)IntentPrompt)	LLMClientc                       e Zd ZdZddedee   fdZdedee	ee
f      fdZd	e	ee
f   de	ee
f   fd
Zdedee	ee
f      fdZy)IntentEncoderz0Encoder for understanding user intent using LLM.N
llm_clientavailable_poi_typesc                 R    || _         |xs g | _        t        j                  | _        y)zInitialize intent encoder.
        
        Args:
            llm_client: LLM client instance
            available_poi_types: List of actual POI types in the dataset
        N)llmr   r   INTENT_PROMPTprompt_template)selfr
   r   s      I   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/llm/intent_encoder.py__init__zIntentEncoder.__init__   s&     #6#<" +99    queryreturnc                 ,   	 t         j                  d|dd  d       | j                  rMt         j                  dt        | j                         d       t	        j
                  || j                        }n1t         j                  d       | j                  j                  |      }t         j                  d	       d
}| j                  j                  ||      }t         j                  dt        |              t         j                  d       | j                  |      }|t         j                  d|dd         yt         j                  dt        |j                                       | j                  rxt         j                  d       t        |j                  dg             }| j!                  |      }t        |j                  dg             }t         j                  d| d| d       |j                  dd      }t        |j                  dg             }	t         j                  d| d|	        t         j                  d|j                  dg               |S # t"        $ r$}
t         j%                  d|
 d       Y d}
~
yd}
~
ww xY w)u  Parse user query to extract intent.
        
        This method implements Stage0 intent understanding:
        1. User ambiguous query (e.g., "我想开个烧烤店")
        2. LLM analyzes query and extracts structured intent
        3. Returns structured intent with business type, pattern preferences, etc.
        
        Args:
            query: User's natural language query
            
        Returns:
            Dictionary containing parsed intent, or None if parsing fails
        z8IntentEncoder: Starting intent understanding for query: Nd   z...z$IntentEncoder: Building prompt with z available POI typesz;IntentEncoder: Building prompt without POI type constraints)r   z6IntentEncoder: Calling LLM for intent understanding...z/You are an expert in spatial business analysis.)systemz(IntentEncoder: LLM raw response length: z@IntentEncoder: Extracting structured intent from LLM response...z?IntentEncoder: Failed to extract valid JSON from LLM response:    z0IntentEncoder: Extracted intent JSON with keys: z?IntentEncoder: Validating patterns against dataset POI types...pattern_preferencez#IntentEncoder: Pattern validation: u    → z valid patternsbusinesszN/Az6IntentEncoder: Successfully parsed intent - Business: z, Patterns: z#IntentEncoder: Preferred patterns: z%IntentEncoder: Error parsing intent: T)exc_info)loggerinfor   lenr   build_intent_promptr   formatr   generatedebug_extract_jsonwarninglistkeysget_validate_patterns	Exceptionerror)r   r   prompt
system_msgraw_responseintent_jsonoriginal_countvalidated_countr   pattern_countes              r   parsezIntentEncoder.parse   sV   -	KKRSXY]Z]S^R__bcd ''B3tG_G_C`Baauvw%99%AYAYZYZ--4454A KKPQJJ88,,VJ,GLLLCCDUCVWX KKZ[,,\:K"!`amnroras`tuvKKJ4P[P`P`PbKcJdef '']^!$[__5I2%N!O"55kB"%koo6JB&O"PA.AQQVWfVggvwx #z59H0Db IJMKKPQYPZZfgtfuvwKK=kooNbdf>g=hij 	LL@DtLT	s   D>I& D$I& &	J/JJr/   c                    d|vr|S g }t        | j                        }|d   D ]  }t        |t              s|D cg c]  }t        |t              s||v s| }}t        |      dk\  r|j                  |       Yt        |      dk(  sht        j                  d|         ||d<   t        |      dk(  rt        j                  d       |S c c}w )zValidate and filter pattern_preference to only include valid POI types.
        
        Args:
            intent_json: Parsed intent JSON
            
        Returns:
            Validated intent JSON with filtered patterns
        r         zSkipping single POI pattern: r   zbNo valid patterns found after filtering. Original patterns may have used POI types not in dataset.)
setr   
isinstancer&   strr   appendr   r#   r%   )r   r/   valid_patternspoi_setpatternpoi
valid_poiss          r   r)   z IntentEncoder._validate_patternsX   s      {2d../"#78Ggt, *1\#JsC4HST[^#J\ :!#%%j1ZA% <ZLIJ 9 -;()~!#NN  A ]s   CCCtextc                    	 |j                  d      }|dk(  ryd}|}t        |t        |            D ]'  }||   dk(  r|dz  }||   dk(  s|dz  }|dk(  s%|} n |dk7  rt        j	                  d       y|||dz    }t        j                  |      }t        |t              syd|v rUt        |d   t              sg |d<   |S |d   D cg c])  }t        |t              r|nt        |t              r|gng + c}|d<   |S c c}w # t
        j                  $ r"}	t        j	                  d	|	        Y d}	~	yd}	~	wt        $ r"}	t        j                  d
|	        Y d}	~	yd}	~	ww xY w)zExtract JSON object from LLM response.
        
        Args:
            text: Raw LLM response text
            
        Returns:
            Parsed JSON dictionary, or None if extraction fails
        {Nr   r7   }z!Unmatched braces in JSON responser   zJSON decode error: zError extracting JSON: )findranger   r   r%   jsonloadsr9   dictr&   r:   JSONDecodeErrorr*   r+   )
r   rA   	start_idxbrace_countend_idxijson_strintent_datapr3   s
             r   r$   zIntentEncoder._extract_json}   s   1	 		#IB KG9c$i07c>1$K!W^1$K"a'"# 1 aBCIgk2H**X.K k40 ${2!+.B"CTJ8:K 45  "--A!B9!BA (40Z3=OqcUWW!B9K 45
 9 ## 	NN045 	LL21#67	sX   D 2D 
D D 7-D %D D 
.D 8D  D E#D55E#EE#)N)__name__
__module____qualname____doc__r   r   r&   r   r:   r   r   r4   r)   r$    r   r   r	   r	      s    :	:9 	:8D> 	:;3 ;8DcN#; ;z#d38n #c3h #J:# :(4S>*B :r   r	   )rV   rH   loggingtypingr   r   r   llm.intent_promptr   
llm.clientr   	getLoggerrS   r   r	   rW   r   r   <module>r]      s8    <   & & *  			8	$j jr   