
    @i'                         S r SSKrSSKrSSKJr  SSKrSSKrSSKJrJ	r	  SSK
Jr  \R                  " \5      r " S S5      rg)z;LLM client for loading and running local fine-tuned models.    N)Optional)AutoTokenizerTextIteratorStreamer)Threadc                   |    \ rS rSrSrSS\4S jjrS rS rSS\S	\	\   S
\4S jjr
SS\S	\	\   S
\4S jjrS rSrg)	LLMClient   z0Client for loading and running local LLM models.config_pathc                    [        USSS9 n[        R                  " U5      U l        SSS5        SU l        SU l        U R                  S   S   U l        U R                  S   S   U l        U R                  5         g! , (       d  f       N[= f)	z]Initialize LLM client with configuration.

Args:
    config_path: Path to configuration file
rzutf-8)encodingN	inferencedevicemodeltemplate)	openyaml	safe_loadconfigr   	tokenizerr   r   _load_model)selfr
   fs      A   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/llm/client.py__init__LLMClient.__init__   sv     +sW5..+DK 6 
kk+.x8G,Z8 65s   A==
Bc                     SSK nSnX!R                  ;  a  UR                  R                  SU5        SSKJn  U R
                  S   nUS   US   UR                  SS	5      UR                  S
S5      S.nUR                  S5      (       a  US   US'   U" U5      U l        [        R                  S5        g! [        [        4 a3  n[        R                  SU S35        U R                  5          SnAgSnAf[         aG  n[        R                  SU 35        [        R                  S5        U R                  5          SnAgSnAff = f)z0Load the base model and optionally LoRA adapter.r   Nu5   /home/ubuntu/codebase/yexijia/保研/LlamaFactory/src)	ChatModelr   model_name_or_pathr   finetuning_typenonetrust_remote_codeT)r   r   r    r"   adapter_name_or_pathz+Model loaded successfully with LlamaFactoryzLlamaFactory not available: z, using fallback methodz(Failed to load model with LlamaFactory: z+Falling back to direct transformers loading)syspathinsertllamafactory.chatr   r   get
chat_modelloggerinfoImportErrorModuleNotFoundErrorwarning_load_model_fallback	Exceptionerror)r   r$   llamafactory_pathr   model_configargses          r   r   LLMClient._load_model!   s.   !	( W 0#453;;w/L '33G&H(4#/#3#34Ev#N%1%5%56I4%P	D  677/;<R/S+, (oDOKKEF01 	(NN9!<STU%%'' 	(LLCA3GHKKEF%%''	(s$   B3B6 6E)C44E=EEc                 >   SSK Jn  U R                  S   S   n[        R                  " USS9U l        U R                  S   S   (       a  [        R                  O[        R                  nUR	                  UUU R                  S	:X  a  S
OSSS9U l
        U R                  S:X  a*  U R                  R                  U R                  5      U l
        U R
                  R                  c%  U R
                  R                  U R
                  l        [        R                  S5        g)z,Fallback method using transformers directly.r   )AutoModelForCausalLMr   r   T)r"   r   use_fp16cudaautoN)torch_dtype
device_mapr"   cpuz"Model loaded using fallback method)transformersr8   r   r   from_pretrainedr   torchfloat16float32r   r   to	pad_token	eos_tokenr*   r+   )r   r8   
model_pathr<   s       r   r/   LLMClient._load_model_fallbackF   s    5[[)*>?
 '66"
 (,{{;'?
'KemmQVQ^Q^)99#!%!6vD"	 : 

 ;;%t{{3DJ >>##+'+~~'?'?DNN$89    Npromptsystemreturnc                 L    [        U S5      (       Ga  / nU(       a  UR                  SUS.5        UR                  SUS.5        U R                  R                  U5      nU(       Ga9  US   R                  nSU;   Ga!  SU;   Ga  / nSn UR                  SU5      nUS:X  a  OiSn	Un
[        U[        U5      5       H+  nX[   S:X  a  U	S	-  n	M  X[   S:X  d  M  U	S	-  n	U	S:X  d  M)  Un
  O   U	S:X  a  UR                  X45        U
S	-   nOOM  U(       a(  US   u  p SS
KnX\US	-    nUR                  U5        UnU$ UR                  S5      nUS:w  aM  Sn	[        U[        U5      5       H2  nX[   S:X  a  U	S	-  n	M  X[   S:X  d  M  U	S	-  n	U	S:X  d  M)  UUUS	-    n  U$    U$ gU R                  X5      $ ! WR                   a#    [        U5      S	:  a  US   u  nnUUUS	-    n U$ f = f! [         a0  n[        R                  SU 35        S[        U5       3s S
nA$ S
nAff = f)zGenerate response from the model.

Args:
    prompt: Input prompt text
    system: Optional system message
    
Returns:
    Generated text response
r)   rK   )rolecontentuserr   {}   N zGeneration error: zError: )hasattrappendr)   chatresponse_textfindrangelenjsonloadsJSONDecodeError_generate_fallbackr0   r*   r1   str)r   rJ   rK   messages	responsesrY   json_objectsstartstart_bracebrace_count	end_braceifirst_start	first_endr]   	json_text
last_startlast_endfirst_bracer5   s                       r   generateLLMClient.generated   s   T	&t\** OOX&$IJF CD
 !OO00:	$-aL$>$>M m+}0D') !"*7*<*<S%*HK*b0 %*+K(3I%*;M8J%K#0#3s#:$/1$4K%2%5%<$/1$4K'2a'745	(- &L  +a/ , 3 3[4L M(1A %' #. (5A!_2K	Y +,9iPQk,R	 $

9 509* )( +8*<*<S*AK*b0./).{C<N)OA'4'73'>(3q(8)6)9S)@(3q(8+6!+;<I+VWXYVY<ZM,1(( *P )( ..v>>3 $(#7#7 Y#&|#4q#8;G;K$8J4A*XVWZ4XM  )()Y6  	&LL-aS12SVH%%	&sm   CG) 	G) %5G) F3 9G) ;AG) 	G) 
G) G) "G) 3/G&"G) %G&&G) )
H#3%HH#H#c                 X   U(       a  U SU 3nOUnU R                   R                  USS9nU R                  S:X  a  UR                  U R                  5      nU R                  S   nUS   US   US   US	   U R                   R
                  U R                   R                  S
.n[        R                  " 5          U R                  R                  " U40 UD6nSSS5        U R                   R                  WS   UR                  S   S SS9nUR                  5       $ ! , (       d  f       NK= f)zFallback generation method.z

pt)return_tensorsr:   r   max_new_tokenstemperaturetop_p	do_sample)ru   rv   rw   rx   pad_token_ideos_token_idNr   rT   T)skip_special_tokens)r   encoder   rD   r   ry   rz   rA   no_gradr   rp   decodeshapestrip)	r   rJ   rK   full_promptinputsinfer_configgeneration_configoutputsgenerated_texts	            r   r`   LLMClient._generate_fallback   s     #HD1K K &&{4&H;;& YYt{{+F {{;/*+;<'6!'*%k2 NN77 NN77
 ]]_jj))&F4EFG  ..AJv||A'( $ / 

 ##%% _s   8D
D)c                 $    U R                  5         g)z1Alias for _load_model for backward compatibility.N)r   )r   s    r   
load_modelLLMClient.load_model   s    rI   )r)   r   r   r   r   r   )zconfig/config.yaml)N)__name__
__module____qualname____firstlineno____doc__ra   r   r   r/   r   rp   r`   r   __static_attributes__ rI   r   r   r      sc    :C #(J:<^&s ^&HSM ^&S ^&@"& "&hsm "&s "&HrI   r   )r   loggingostypingr   rA   r   r?   r   r   	threadingr   	getLoggerr   r*   r   r   rI   r   <module>r      s9    A  	    < 			8	$[ [rI   