
    #i                     h    d Z ddlZddlZddlZddlmZmZ  ej                  e      Z	 G d d      Z
y)zGPreference parser for converting LLM output into structured parameters.    N)DictAnyc                       e Zd ZdZdddddZddeeef   fd	Zddeeef   d
eeef   fdZ	ded
eeef   fdZ
ded
efdZded
eeef   fdZdeeef   d
eeef   fdZy)PreferenceParserz?Parser for extracting structured parameters from LLM responses.g333333?   
confidenceg      ?)min_participationmax_pattern_sizeprioritymin_confidenceNdefault_paramsc                 0    | j                  |      | _        y)zInitialize parser with default parameters.
        
        Args:
            default_params: Default parameter values
        N)_normalize_default_paramsr   )selfr   s     D   /home/ubuntu/codebase/yexijia/保研/iCoLoc/src/preference/parser.py__init__zPreferenceParser.__init__   s     #<<^L    returnc                     | j                   j                         }|s|S ddddd}|j                         D ]  \  }}|j                  ||      }|||<    |S )z0Normalize config defaults to parser schema keys.r	   r
   r   r   )default_min_participationdefault_max_pattern_sizedefault_prioritydefault_min_confidence)DEFAULT_PARAMScopyitemsget)r   r   
normalizedkey_mappingkeyvalue
target_keys          r   r   z*PreferenceParser._normalize_default_params   so    ((--/
 *=(: *&6	
 )..0JC$c2J%*Jz" 1 r   textc                 *   | j                  |      }|r'	 t        j                  |      }| j                  |      S | j                  |      }| j                  |      S # t        j                  $ r"}t
        j                  d|        Y d}~Rd}~ww xY w)zParse LLM output into structured parameters.
        
        Args:
            text: Raw LLM response text
            
        Returns:
            Dictionary with parsed parameters
        zJSON decode error: N)_extract_jsonjsonloads_validate_and_fill_defaultsJSONDecodeErrorloggerwarning_extract_with_regex)r   r#   
json_matchparamses        r   parse_preferencez!PreferenceParser.parse_preference0   s     ''-
:J/77??
 ))$///77 '' :!4QC899:s   %A B0BBc                     d}t        j                  ||t         j                        }|D ]  }	 t        j                  |       |c S  y# t        j
                  $ r Y 4w xY w)zExtract JSON from text.
        
        Args:
            text: Input text
            
        Returns:
            Extracted JSON string or None
        z\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}N)refindallDOTALLr&   r'   r)   )r   r#   json_patternmatchesmatchs        r   r%   zPreferenceParser._extract_jsonF   s^     :**\4;E

5!	   '' s   A		AAc                    i }g d}|D ]I  }t        j                  ||t         j                        }|s+	 t        |j	                  d            |d<    n g d}|D ]I  }t        j                  ||t         j                        }|s+	 t        |j	                  d            |d<    n ddg}g dg d	g d
d}|D ]|  }t        j                  ||t         j                        }|s+|j	                  d      j                         |j                         D ]   \  }	}
t        fd|
D              s|	|d<    | ~ |S # t
        $ r Y =w xY w# t
        $ r Y w xY w)zExtract parameters using regex patterns.
        
        Args:
            text: Input text
            
        Returns:
            Dictionary with extracted parameters
        )z%min[_\s]*participation[:\s]*([0-9.]+)u   参与率[:\s]*([0-9.]+)zparticipation[:\s]*([0-9.]+)   r	   )z%max[_\s]*pattern[_\s]*size[:\s]*(\d+)u   模式大小[:\s]*(\d+)zpattern[_\s]*size[:\s]*(\d+)u   (\d+)[阶个]r
   zpriority[:\s]*["\']?(\w+)["\']?u    优先级[:\s]*["\']?(\w+)["\']?)r   u	   置信度conf)participationu	   参与率part)sizeu   大小u   规模r   r;   r=   c              3   &   K   | ]  }|v  
 y wN ).0kwr!   s     r   	<genexpr>z7PreferenceParser._extract_with_regex.<locals>.<genexpr>   s     :22;s   r   )
r2   search
IGNORECASEfloatgroup
ValueErrorintlowerr   any)r   r#   r.   participation_patternspatternr7   size_patternspriority_patternspriority_keywordsr    keywordsr!   s              @r   r,   z$PreferenceParser._extract_with_regex]   sb    "

 .GIIgtR]];E27A2GF./ .
 %GIIgtR]];E14U[[^1DF-. % //

 >C0
 )GIIgtR]];EA,,.%6%<%<%>MC:::-0z* &?	 ) O "   " s#   D;E;	EE	EEr.   c                    | j                   j                         }d|v r;	 t        |d         }d|cxk  rdk  r	n n||d<   nt        j	                  d|        d|v r;	 t        |d         }d|cxk  rdk  r	n n||d<   nt        j	                  d	|        d|v rCt        |d         j                         }g d}||v r||d<   |S t        j	                  d|        |S # t
        t        f$ r t        j	                  d|d           Y w xY w# t
        t        f$ r t        j	                  d
|d           Y w xY w)zValidate parameters and fill missing ones with defaults.
        
        Args:
            params: Parsed parameters
            
        Returns:
            Validated and complete parameter dictionary
        r	   g        g      ?z min_participation out of range: zInvalid min_participation: r
   r9      zmax_pattern_size out of range: zInvalid max_pattern_size: r   r>   zInvalid priority: )
r   r   rG   r*   r+   rI   	TypeErrorrJ   strrK   )r   r.   resultvalvalid_prioritiess        r   r(   z,PreferenceParser._validate_and_fill_defaults   sp    $$))+ &(\F#678#$$25F./NN%EcU#KL
 'Z&!345>r>14F-.NN%DSE#JK
 fZ()//1CF&&%(z"  !3C59:/ 	* \!<VDW=X<YZ[\ 	* Z!;FCU<V;WXYZs#   :C" :D "*DD*D?>D?r@   )__name__
__module____qualname____doc__r   r   rV   r   r   r   r0   r%   r,   r(   rA   r   r   r   r      s    I ! 	NMtCH~ MS#X RVWZ\_W_R` &8S 8T#s(^ 8,# # .> >S#X >@*$sCx. *T#s(^ *r   r   )r]   r&   loggingr2   typingr   r   	getLoggerrZ   r*   r   rA   r   r   <module>ra      s2    M   	 			8	$| |r   