
    @i'                         d Z ddlZddlZddlmZ ddlZddlZddlmZm	Z	 ddl
mZ  ej                  e      Z G d d      Zy)z;LLM client for loading and running local fine-tuned models.    N)Optional)AutoTokenizerTextIteratorStreamer)Threadc                   h    e Zd ZdZddefdZd Zd Zddedee   d	efd
Z	ddedee   d	efdZ
d Zy)	LLMClientz0Client for loading and running local LLM models.config_pathc                    t        |dd      5 }t        j                  |      | _        ddd       d| _        d| _        | j                  d   d   | _        | j                  d   d   | _        | j                          y# 1 sw Y   VxY w)	z}Initialize LLM client with configuration.
        
        Args:
            config_path: Path to configuration file
        rzutf-8)encodingN	inferencedevicemodeltemplate)	openyaml	safe_loadconfigr   	tokenizerr   r   _load_model)selfr	   fs      A   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/llm/client.py__init__zLLMClient.__init__   sv     +sW5..+DK 6 
kk+.x8G,Z8 65s   A??Bc                    	 ddl }d}||j                  vr|j                  j                  d|       ddlm} | j
                  d   }|d   |d   |j                  dd	      |j                  d
d      d}|j                  d      r|d   |d<    ||      | _        t        j                  d       y# t        t        f$ r3}t        j                  d| d       | j                          Y d}~yd}~wt        $ rG}t        j                  d|        t        j                  d       | j                          Y d}~yd}~ww xY w)z0Load the base model and optionally LoRA adapter.r   Nu5   /home/ubuntu/codebase/yexijia/保研/LlamaFactory/src)	ChatModelr   model_name_or_pathr   finetuning_typenonetrust_remote_codeT)r   r   r   r    adapter_name_or_pathz+Model loaded successfully with LlamaFactoryzLlamaFactory not available: z, using fallback methodz(Failed to load model with LlamaFactory: z+Falling back to direct transformers loading)syspathinsertllamafactory.chatr   r   get
chat_modelloggerinfoImportErrorModuleNotFoundErrorwarning_load_model_fallback	Exceptionerror)r   r"   llamafactory_pathr   model_configargses          r   r   zLLMClient._load_model!   s-   !	( W 0#453;;w/L '33G&H(4#/#3#34Ev#N%1%5%56I4%P	D  67/;<R/S+, (oDOKKEF01 	(NN9!<STU%%'' 	(LLCA3GHKKEF%%''	(s$   B-B0 0E ?)C--E 9=D;;E c                 8   ddl m} | j                  d   d   }t        j                  |d      | _        | j                  d   d   rt        j                  nt        j                  }|j	                  ||| j                  d	k(  rd
ndd      | _
        | j                  dk(  r*| j                  j                  | j                        | _
        | j
                  j                  %| j
                  j                  | j
                  _        t        j                  d       y)z,Fallback method using transformers directly.r   )AutoModelForCausalLMr   r   T)r    r   use_fp16cudaautoN)torch_dtype
device_mapr    cpuz"Model loaded using fallback method)transformersr5   r   r   from_pretrainedr   torchfloat16float32r   r   to	pad_token	eos_tokenr(   r)   )r   r5   
model_pathr9   s       r   r-   zLLMClient._load_model_fallbackF   s    5[[)*>?
 '66"
 (,{{;'?
'KemmQVQ^Q^)99#!%!6vD"	 : 

 ;;%t{{3DJ >>##+'+~~'?'?DNN$89    Npromptsystemreturnc                    	 t        | d      rpg }|r|j                  d|d       |j                  d|d       | j                  j                  |      }|r%|d   j                  }d|v rd|v r
g }d}	 |j                  d|      }|dk(  rncd}	|}
t        |t        |            D ]'  }||   dk(  r|	d	z  }	||   dk(  s|	d	z  }	|	dk(  s%|}
 n |	dk(  r|j                  ||
f       |
d	z   }nn{|r*|d   \  }}	 dd
l}|||d	z    }|j                  |       |}|S |j                  d      }|dk7  rHd}	t        |t        |            D ].  }||   dk(  r|	d	z  }	||   dk(  s|	d	z  }	|	dk(  s%|||d	z    } |S  |S y| j                  ||      S # j                  $ r" t        |      d	kD  r|d   \  }}|||d	z    }Y |S w xY w# t        $ r0}t        j                  d|        dt        |       cY d
}~S d
}~ww xY w)zGenerate response from the model.
        
        Args:
            prompt: Input prompt text
            system: Optional system message
            
        Returns:
            Generated text response
        r'   rG   )rolecontentuserr   {}   N zGeneration error: zError: )hasattrappendr'   chatresponse_textfindrangelenjsonloadsJSONDecodeError_generate_fallbackr.   r(   r/   str)r   rF   rG   messages	responsesrU   json_objectsstartstart_bracebrace_count	end_braceifirst_start	first_endrY   	json_text
last_startlast_endfirst_bracer3   s                       r   generatezLLMClient.generated   s   T	&t\* OOX&$IJF CD
 !OO00:	$-aL$>$>M m+}0D') !"*7*<*<S%*HK*b0 %*+K(3I%*;M8J%K#0#3s#:$/1$4K%21%5%<$/1$4K'2a'745	(- &L  +a/ , 3 3[)4L M(1A %' #. (5A!_2K	Y +,9+iPQk,R	 $

9 509* )( +8*<*<S*AK*b0./).{C<N)OA'4Q'73'>(3q(8)6q)9S)@(3q(8+6!+;<I+VWXYVY<ZM,1(( *P )( ..vv>>3 $(#7#7 Y#&|#4q#8;G;K$8J4A*XVWZ4XM  )()Y6  	&LL-aS12SVH%%	&sm   B?G 
G /G =F G AG %
G 0
G ;G ?G -G>G GG 	G>%G93G>9G>c                 L   |r| d| }n|}| j                   j                  |d      }| j                  dk(  r|j                  | j                        }| j                  d   }|d   |d   |d   |d	   | j                   j
                  | j                   j                  d
}t        j                         5   | j                  j                  |fi |}ddd       | j                   j                  d   |j                  d   d d      }|j                         S # 1 sw Y   IxY w)zFallback generation method.z

pt)return_tensorsr7   r   max_new_tokenstemperaturetop_p	do_sample)rp   rq   rr   rs   pad_token_ideos_token_idNr   rP   T)skip_special_tokens)r   encoder   rA   r   rt   ru   r>   no_gradr   rl   decodeshapestrip)	r   rF   rG   full_promptinputsinfer_configgeneration_configoutputsgenerated_texts	            r   r\   zLLMClient._generate_fallback   s"    #HD1K K &&{4&H;;& YYt{{+F {{;/*+;<'6!'*%k2 NN77 NN77
 ]]_)djj))&F4EFG  ..AJv||A'( $ / 

 ##%% _s   4DD#c                 $    | j                          y)z1Alias for _load_model for backward compatibility.N)r   )r   s    r   
load_modelzLLMClient.load_model   s    rE   )zconfig/config.yaml)N)__name__
__module____qualname____doc__r]   r   r   r-   r   rl   r\   r    rE   r   r   r      sc    :C #(J:<^&s ^&HSM ^&S ^&@"& "&hsm "&s "&HrE   r   )r   loggingostypingr   r>   r   r<   r   r   	threadingr   	getLoggerr   r(   r   r   rE   r   <module>r      s;    A  	    < 			8	$[ [rE   