
    ~h!2                    6   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ erd dlmZ  ej@                  e!      Z"ddgZ#dddddddddddddgdddddddZ$dZd Z%e G d! d"             Z&e G d# d$             Z' G d% d&e(e      Z)e G d' d(             Z*d)e(e+z  d*e(e+z  fd+Z,e G d, d-             Z-e G d. d/             Z.e)j^                  fd0e0e.   d1e)d*e*fd2Z1e)jd                  e)j^                  e)jf                  e)jh                  iZ5d3ed*e+fd4Z6d5e(d6e0e(   d*e(fd7Z7i ddfd8e0e*   d9e+e)e)f   e+e(e(f   z  d:e8d;e8d*e0e+e(ef      f
d<Z9d=e(d>e(d?e(d*e'fd@Z:dAe(d*e8fdBZ; G dC dD      Z< G dE dFe<      Z= G dG dHe<      Z> G dI dJe<      Z? G dK dLe<      Z@ G dM dNe@      ZA G dO dPeA      ZB G dQ dRe@      ZC G dS dTe@      ZDeDZE G dU dVeD      ZFeFZG G dW dXe@      ZHeHZIg dYZJy)[    N)	Generator)deepcopy)asdict	dataclass)Enum)Thread)TYPE_CHECKINGAny   )
TokenUsage)Tool)RateLimiter_is_package_availableencode_image_base64make_image_urlparse_json_blob)StoppingCriteriaListcerebraszfireworks-aijson_schemaFz4A free form text description of the thought process.Thoughtstring)descriptiontitletypez3Valid Python code snippet implementing the thought.Code)thoughtcoder   r   ThoughtAndCodeAnswerobject)additionalProperties
propertiesrequiredr   r   T)schemanamestrict)r   r   c                 "    fd |       S )Nc                     t        | d      r9t        |       j                         D ci c]  \  }}|k7  s| |       c}}S | S c c}}w )N__dataclass_fields__)hasattrr   items)objkvconvert
ignore_keys      M/opt/mcp/mcp-sentiment/venv/lib/python3.12/site-packages/smolagents/models.pyr.   z1get_dict_from_nested_dataclasses.<locals>.convertC   sI    3./.4Sk.?.?.AUdaQ*_AwqzMUU
 Vs
   A	A	 )r+   r/   r.   s    `@r0    get_dict_from_nested_dataclassesr2   B   s    
 3<    c                   6    e Zd ZU eed<   eed<   dZedz  ed<   y)ChatMessageToolCallFunction	argumentsr$   Nr   )__name__
__module____qualname__r
   __annotations__strr   r1   r3   r0   r5   r5   K   s    N
I"Kt"r3   r5   c                   8    e Zd ZU eed<   eed<   eed<   defdZy)ChatMessageToolCallfunctionidr   returnc                     d| j                    dt        | j                  j                         dt        | j                  j                         S )NzCall: z
: Calling z with arguments: )r?   r;   r>   r$   r6   selfs    r0   __str__zChatMessageToolCall.__str__X   sD    y
3t}}/A/A+B*CCTUXY]YfYfYpYpUqTrssr3   N)r7   r8   r9   r5   r:   r;   rD   r1   r3   r0   r=   r=   R   s     ))G
It tr3   r=   c                   0    e Zd ZdZdZdZdZdZed        Z	y)MessageRoleuser	assistantsystemz	tool-callztool-responsec                 @    | D cg c]  }|j                    c}S c c}w N)value)clsrs     r0   roleszMessageRole.rolesc   s    !$%A%%%s   N)
r7   r8   r9   USER	ASSISTANTSYSTEM	TOOL_CALLTOOL_RESPONSEclassmethodrO   r1   r3   r0   rF   rF   \   s-    DIFI#M& &r3   rF   c            
           e Zd ZU eed<   dZeeeee	f      z  dz  ed<   dZ
ee   dz  ed<   dZe	dz  ed<   dZedz  ed<   d Zeddede	dz  dedz  d	d fd
       Zd Zd	efdZy)ChatMessageroleNcontent
tool_callsrawtoken_usagec                 B    t        j                  t        | d            S )Nr[   )r/   )jsondumpsr2   rB   s    r0   model_dump_jsonzChatMessage.model_dump_jsonp   s    zz:4ERSSr3   datar@   c                     |j                  d      r5|d   D cg c]"  }t        t        d	i |d   |d   |d         $ }}||d<    | |d   |j                  d      |j                  d      ||      S c c}w )
NrZ   r>   r?   r   r>   r?   r   rX   rY   )rX   rY   rZ   r[   r\   r1   )getr=   r5   )rM   ra   r[   r\   tcrZ   s         r0   	from_dictzChatMessage.from_dicts   s    88L!
 |,	  $8J2j>JrRVx^`ag^hJ  ",DfHHY'xx-#
 	
s   'A5c                     t        |       S rK   )r2   rB   s    r0   dictzChatMessage.dict   s    /55r3   c                 .   t        | j                        xs d}| j                  rj|dj                  | j                  D cg c]B  }t	        j
                  |j                  j                  |j                  j                  d      D c}      z  }|S c c}w )N 
)toolr6   )	r;   rY   rZ   joinr^   r_   r>   r$   r6   )rC   renderedrl   s      r0   render_as_markdownzChatMessage.render_as_markdown   s{    t||$*??		 !% JJ(:(:I`I`ab H s    AB
NN)r7   r8   r9   rF   r:   rY   r;   listrh   r
   rZ   r=   r[   r\   r   r`   rU   rf   ro   r1   r3   r0   rW   rW   h   s    
15GS4S#X''$.537J()D07Ct%)Kd")T 
T 
d
 

UYHY 
er 
 
"6	C 	r3   rW   r6   r@   c                 t    t        | t              r| S 	 t        j                  |       S # t        $ r | cY S w xY wrK   )
isinstancerh   r^   loads	Exception)r6   s    r0   parse_json_if_neededrv      s;    )T"	::i(( 		s   ) 77c                   b    e Zd ZU dZdZedz  ed<   dZedz  ed<   dZ	edz  ed<   dZ
edz  ed<   y)ChatMessageToolCallStreamDeltaz>Represents a streaming delta for tool calls during generation.Nindexr?   r   r>   )r7   r8   r9   __doc__ry   intr:   r?   r;   r   r>   r5   r1   r3   r0   rx   rx      s?    HE3:Bd
D#*37H)D07r3   rx   c                   P    e Zd ZU dZedz  ed<   dZee   dz  ed<   dZ	e
dz  ed<   y)ChatMessageStreamDeltaNrY   rZ   r\   )r7   r8   r9   rY   r;   r:   rZ   rq   rx   r\   r   r1   r3   r0   r}   r}      s4    GS4Z>BJ34t;B%)Kd")r3   r}   stream_deltasrX   c                     i }d}d}d}| D ]  }|j                   r2||j                   j                  z  }||j                   j                  z  }|j                  r||j                  z  }|j                  sj|j                  D ]d  }|j
                  H|j
                  |vr:t        |j                  |j                  t        dd            ||j
                  <   ||j
                     }|j                  r|j                  |_        |j                  r|j                  |_        |j                  s|j                  j                  rGt        |j                  j                        dkD  r%|j                  j                  |j                  _        |j                  j                  s$|j                  xj                  |j                  j                  z  c_        Yt        d|         t        |||j!                         D 	cg c]]  }	|	j                  rOt#        t        |	j                  j                  |	j                  j                        |	j                  xs dd      _ c}	t%        ||      	      S c c}	w )
zI
    Agglomerate a list of stream deltas into a single stream delta.
    rj   r   r$   r6   r?   r   r>   z/Tool call index is not provided in tool delta: r>   rc   input_tokensoutput_tokens)rX   rY   rZ   r\   )r\   r   r   rY   rZ   ry   rx   r?   r   r5   r>   r$   lenr6   
ValueErrorrW   valuesr=   r   )
r~   rX   accumulated_tool_callsaccumulated_contenttotal_input_tokenstotal_output_tokensstream_deltatool_call_delta	tool_calltool_call_stream_deltas
             r0   agglomerate_stream_deltasr      sT    IK% j##,":":"G"GG<#;#;#I#II<#7#77""#/#:#: j"((4&,,4JJHf.11!0!5!5%@bTV%WI./D/DE !77L7L MI&))'6'9'9	&++)8)=)=	&//*3388SAYAYA^A^=_bc=c6E6N6N6S6SI..3*33==%..88O<T<T<^<^^8$'VWfVg%hii+jj< # +A*G*G*I
 '%..  4/88==4==GG *,,2
 +-
 
s   A"I;rl   c                     t        | j                        }g }|j                         D ]-  \  }}|d   dk(  rd|d<   d|v r|d   r|j                  |       / d| j                  | j
                  d||ddd	S )
Nr   anyr   nullabler>   r   )r   r!   r"   )r$   r   
parameters)r   r>   )r   inputsr*   appendr$   r   )rl   r!   r"   keyrL   s        r0   get_tool_json_schemar      s    $++&JH &&( !
U=E!$E&Me#j(9OOC 	! II++ ($
 r3   rY   stop_sequencesc                 X    |D ]$  }| t        |       d  |k(  s| d t        |        } & | S rK   )r   )rY   r   stop_seqs      r0   remove_stop_sequencesr   	  sA    " 0CM>#$0.X/G0 Nr3   message_listrole_conversionsconvert_images_to_image_urlsflatten_messages_as_textc                 v   g }t        |       } | D ]%  }|j                  }|t        j                         vr$t	        d| dt        j                          d      ||v r
||   |_        t        |j                  t              r|j                  D ]  }t        |t              sJ dt        |      z          |d   dk(  s/|r
J d|       |r8|j                  dd	t        t        |j                  d                  id
       ut        |d         |d<    t        |      dkD  r|j                  |d   d   k(  rt        |j                  t              sJ dt        |j                        z          |r%|d   dxx   d|j                  d   d   z   z  cc<   |j                  D ]O  }|d   dk(  r.|d   d   d   d   dk(  r|d   d   d   dxx   d|d   z   z  cc<   9|d   d   j                  |       Q |r|j                  d   d   }	n|j                  }	|j                  |j                  |	d       ( |S )aw  
    Creates a list of messages to give as input to the LLM. These messages are dictionaries and chat template compatible with transformers LLM chat template.
    Subsequent messages with the same role will be concatenated to a single message.

    Args:
        message_list (`list[dict[str, str]]`): List of chat messages.
        role_conversions (`dict[MessageRole, MessageRole]`, *optional* ): Mapping to convert roles.
        convert_images_to_image_urls (`bool`, default `False`): Whether to convert images to image URLs.
        flatten_messages_as_text (`bool`, default `False`): Whether to flatten messages as text.
    zIncorrect role z, only z are supported for now.z%Error: this element should be a dict:r   imagez0Cannot use images with flatten_messages_as_text=	image_urlurl)r   r   r   rX   zError: wrong content:rY   rk   text)rX   rY   )r   rX   rF   rO   r   rs   rY   rq   rh   r;   updater   r   popr   r   )
r   r   r   r   output_message_listmessagerX   elementelrY   s
             r0   get_clean_message_listr     sx     13L)L ,||{((**tfGK<M<M<O;PPghii##+D1GLgoot,"?? Q!'40h2Y\_`g\h2hh06?g-7n;lSkRm9nn73(3.3^DWX_XcXcdkXlDm5n-o ,?ww?O+P(Q "#a'GLL<OPR<STZ<[,[goot4d6MPST[TcTcPd6dd4'#B'	2dW__Q=OPV=W6WW2!// FB&zV+0CB0G	0RSU0VW]0^bh0h+B/	:2>vF$QSTZQ[J[[F+B/	:AA"EF (!//!,V4!//&&#LL&O,Z r3   r   tool_name_keytool_arguments_keyc           
      b   t        |       \  }}	 ||   }|j                  |d       }t        |t              rt        |      }t        t        t        j                               dt        ||            S # t        $ r/}t        d|dt        |j	                                d      |d }~ww xY w)NzKey tool_name_key=z1 not found in the generated tool call. Got keys: z insteadr>   r   r   )r   ru   r   rq   keysrd   rs   r;   rv   r=   uuiduuid4r5   )r   r   r   tool_call_dictionary_	tool_nameetool_argumentss           r0   get_tool_call_from_textr   R  s    -d3!(7	
 *--.@$GN.#&-n=tzz|,)~V   !=""STXYmYrYrYtTuSvv~
	s   A6 6	B.?*B))B.model_idc                 \    | j                  d      d   }d}t        j                  ||       S )a[  
    Check if the model supports the `stop` parameter.

    Not supported with reasoning models openai/o3 and openai/o4-mini (and their versioned variants).

    Args:
        model_id (`str`): Model identifier (e.g. "openai/o3", "o4-mini-2025-04-16")

    Returns:
        bool: True if the model supports the stop parameter, False otherwise
    /r   z^(o3[-\d]*|o4-mini[-\d]*)$)splitrematch)r   
model_namepatterns      r0   supports_stop_parameterr   d  s0     $R(J+Gxx,,,r3   c                      e Zd Z	 	 	 	 ddededededz  fdZededz  fd       Zededz  fd	       Z		 	 	 	 	 	 dd
e
e   de
e   dz  deeef   dz  de
e   dz  deeef   dz  dedeez  dz  deeef   fdZ	 	 	 dd
e
e   de
e   dz  deeef   dz  de
e   dz  def
dZd ZdedefdZdefdZedeeef   dd fd       Zy)ModelNr   r   r   r   c                 f    || _         || _        || _        || _        d | _        d | _        || _        y rK   )r   r   r   kwargs_last_input_token_count_last_output_token_countr   )rC   r   r   r   r   r   s         r0   __init__zModel.__init__w  s;     )A%*"437$48%$,r3   r@   c                 N    t        j                  dt               | j                  S )NzAttribute last_input_token_count is deprecated and will be removed in version 1.20. Please use TokenUsage.input_tokens instead.)warningswarnFutureWarningr   rB   s    r0   last_input_token_countzModel.last_input_token_count  s$    :	

 +++r3   c                 N    t        j                  dt               | j                  S )NzAttribute last_output_token_count is deprecated and will be removed in version 1.20. Please use TokenUsage.output_tokens instead.)r   r   r   r   rB   s    r0   last_output_token_countzModel.last_output_token_count  s$    ;	

 ,,,r3   messagesr   response_formattools_to_call_fromcustom_role_conversionsr   tool_choicec                 t   |j                  d| j                        }	t        ||xs t        ||	      }
i | j                  d|
i}|t        | j                  xs d      r||d<   |||d<   |r3d|D cg c]  }t        |       c}i}|||d<   |j                  |       |j                  |       |S c c}w )	a$  
        Prepare parameters required for model invocation, handling parameter priorities.

        Parameter priority from high to low:
        1. Explicitly passed kwargs
        2. Specific parameters (stop_sequences, response_format, etc.)
        3. Default values in self.kwargs
        r   )r   r   r   r   rj   stopr   toolsr   )	r   r   r   tool_role_conversionsr   r   r   r   r   )rC   r   r   r   r   r   r   r   r   r   messages_as_dictscompletion_kwargsrl   tools_configs                 r0   _prepare_completion_kwargsz Model._prepare_completion_kwargs  s    ( $*::.H$JgJg#h 24M8M)E%=	

kk
)
 %&t}}':;,:!&)&3B/0 AST.t4TL &.9]+$$\2 	  (   Us   4B5c                     t        d      )a  Process the input messages and return the model's response.

        Parameters:
            messages (`list[dict[str, str | list[dict]]] | list[ChatMessage]`):
                A list of message dictionaries to be processed. Each dictionary should have the structure `{"role": "user/system", "content": "message content"}`.
            stop_sequences (`List[str]`, *optional*):
                A list of strings that will stop the generation if encountered in the model's output.
            response_format (`dict[str, str]`, *optional*):
                The response format to use in the model's response.
            tools_to_call_from (`List[Tool]`, *optional*):
                A list of tools that the model can use to generate responses.
            **kwargs:
                Additional keyword arguments to be passed to the underlying model.

        Returns:
            `ChatMessage`: A chat message object containing the model's response.
        z0This method must be implemented in child classesNotImplementedError)rC   r   r   r   r   r   s         r0   generatezModel.generate  s    2 ""TUUr3   c                 &     | j                   |i |S rK   )r   )rC   argsr   s      r0   __call__zModel.__call__  s    t}}d-f--r3   r   c                    t         j                  |_        |j                  sD|j                  J d       t        |j                  | j                  | j                        g|_        t        |j                        dkD  sJ d       |j                  D ]0  }t        |j                  j                        |j                  _        2 |S )zXSometimes APIs do not return the tool call as a specific object, so we need to parse it.z-Message contains no content and no tool callsr   z*No tool call was found in the model output)rF   rQ   rX   rZ   rY   r   r   r   r   rv   r>   r6   )rC   r   r   s      r0   parse_tool_callszModel.parse_tool_calls  s    ",,!!??._0__.'9K9KTMdMde"G 7%%&*X,XX* ++ 	^I+?	@R@R@\@\+]I(	^r3   c                     i | j                   d| j                  i}dD ]  }t        | |      st        | |      ||<     ddg}|D ]  }t        | |      st	        d| d         |S )zG
        Converts the model into a JSON-compatible dictionary.
        r   )custom_role_conversiontemperature
max_tokensprovidertimeoutapi_basetorch_dtype
device_maporganizationprojectazure_endpointtokenapi_keyz,For security reasons, we do not export the `z5` attribute of your model. Please export it manually.)r   r   r)   getattrprint)rC   model_dictionary	attributedangerous_attributesattribute_names        r0   to_dictzModel.to_dict  s    
kk


 	GI tY'.5dI.F +	G  !(32 	Nt^,B>BR  SH  I	
  r3   r   c           
      b     | di |j                         D ci c]  \  }}||
 c}}S c c}}w )Nr1   )r*   )rM   r   r,   r-   s       r0   rf   zModel.from_dict  s0    A'7'='='?@tq!ad@AA@s   +)Fr$   r6   N)NNNNFr"   NNN)r7   r8   r9   boolr;   r   propertyr{   r   r   rq   rW   rh   r   r
   r   r   r   r   r   rU   rf   r1   r3   r0   r   r   v  s    */#"-#-"&- -  	-
 *-  ,d
 , , -t - - ,015049=-2)35!{#5! S	D(5! c3h$.	5!
 !J-5! "&c3h$!65! '+5! 4Z$&5! 
c3h5!t ,01504V{#V S	D(V c3h$.	V
 !J-V 
V6.     @ Bc3h BG B Br3   r   c                        e Zd ZdZ	 ddeeef   dz  f fdZd Z	 	 	 dde	e
   de	e   dz  deeef   dz  d	e	e   dz  d
e
f
dZ xZS )	VLLMModela  Model to use [vLLM](https://docs.vllm.ai/) for fast LLM inference and serving.

    Parameters:
        model_id (`str`):
            The Hugging Face model ID to be used for inference.
            This can be a path or model identifier from the Hugging Face model hub.
        model_kwargs (`dict[str, Any]`, *optional*):
            Additional keyword arguments to pass to the vLLM model (like revision, max_model_len, etc.).
    Nmodel_kwargsc                    t        d      st        d      ddlm} ddlm} |xs i | _        t        |    di | || _	         |dd|i| j                  | _
        | j                  J  ||      | _        d| _        y )	NvllmzNPlease install 'vllm' extra to use VLLMModel: `pip install 'smolagents[vllm]'`r   )LLM)get_tokenizermodelFr1   )r   ModuleNotFoundErrorr  r  !vllm.transformers_utils.tokenizerr  r  superr   r   r  	tokenizer_is_vlm)rC   r   r  r   r  r  	__class__s         r0   r   zVLLMModel.__init__+  s     %V,%&vwwC(.B"6" =x=4+<+<=
zz%%%&x0r3   c                     dd l }dd l}ddlm}m}  |        | j
                   | j
                  j                  j                  `|j                           |        |j                  j                          y )Nr   )destroy_distributed_environmentdestroy_model_parallel)gctorchvllm.distributed.parallel_stater  r  r  
llm_enginemodel_executordriver_workercollectcudaempty_cache)rC   r  r  r  r  s        r0   cleanupzVLLMModel.cleanup?  sT    	

 	 ::!

%%44B


')

 r3   r   r   r   r   r@   c                 |   ddl m}  | j                  d|| j                   ||d|}|r
d|d   d   ind }|j	                  d      }|j	                  dg       }	|j	                  d	d       }
|j	                  d
d        | j
                  j                  ||
dd      } ||j                  dd      |j                  dd      |j                  dd      |	      }| j                  j                  |||      }|d   j                  d   j                  }t        |d   j                        | _        t        |d   j                  d   j                        | _        t#        t$        j&                  |||dt)        t        |d   j                        t        |d   j                  d   j                                    S )Nr   )SamplingParams)r   r   r   r   guided_jsonr   r#   r   r   r   r   TF)r   add_generation_prompttokenizenr   r   g        r   i   )r   r   r   r   )sampling_paramsguided_options_requestoutr   r   rX   rY   r[   r\   r1   )r  r  r   r  r   r  apply_chat_templaterd   r  r   outputsr   r   prompt_token_idsr   	token_idsr   rW   rF   rQ   r   )rC   r   r   r   r   r   r  r   r"  prepared_stop_sequencesr   promptr!  r$  output_texts                  r0   r   zVLLMModel.generateP  s    	(;D;; 
*.,,&6)1	

 
 _n-1OPX1Y!Zsw$((4"3"7"7"C!%%gt4mT233"&	 4 
 )jja 

=#6zz,5(	
 jj!!+#9 " 
 !fnnQ',,'*3q6+B+B'C$(+CFNN1,=,G,G(H%&&#:KL" Q!8!89!#a&.."3"="=>	
 	
r3   rK   r   )r7   r8   r9   rz   rh   r;   r
   r   r  rq   rW   r   r   __classcell__r  s   @r0   r  r     s     /3 38nt+(!( ,015048
{#8
 S	D(8
 c3h$.	8

 !J-8
 
8
r3   r  c                        e Zd ZdZ	 	 	 ddededeeef   dz  deeef   dz  f fdZ	 	 	 dde	e
   d	e	e   dz  d
eeef   dz  de	e   dz  de
f
dZ xZS )MLXModela  A class to interact with models loaded using MLX on Apple silicon.

    > [!TIP]
    > You must have `mlx-lm` installed on your machine. Please run `pip install smolagents[mlx-lm]` if it's not the case.

    Parameters:
        model_id (str):
            The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
        tool_name_key (str):
            The key, which can usually be found in the model's chat template, for retrieving a tool name.
        tool_arguments_key (str):
            The key, which can usually be found in the model's chat template, for retrieving tool arguments.
        trust_remote_code (bool, default `False`):
            Some models on the Hub require running remote code: for this model, you would have to set this flag to True.
        load_kwargs (dict[str, Any], *optional*):
            Additional keyword arguments to pass to the `mlx.lm.load` method when loading the model and tokenizer.
        apply_chat_template_kwargs (dict, *optional*):
            Additional keyword arguments to pass to the `apply_chat_template` method of the tokenizer.
        kwargs (dict, *optional*):
            Any additional keyword arguments that you want to use in model.generate(), for instance `max_tokens`.

    Example:
    ```python
    >>> engine = MLXModel(
    ...     model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-4bit",
    ...     max_tokens=10000,
    ... )
    >>> messages = [
    ...     {
    ...         "role": "user",
    ...         "content": "Explain quantum mechanics in simple terms."
    ...     }
    ... ]
    >>> response = engine(messages, stop_sequences=["END"])
    >>> print(response)
    "Quantum mechanics is the branch of physics that studies..."
    ```
    Nr   trust_remote_codeload_kwargsapply_chat_template_kwargsc                    t        d      st        d      dd l}|xs i | _        | j                  j	                  di       j	                  d|       |xs i | _        | j
                  j	                  dd       t        |   d
|dd|  |j                  | j                  fi | j                  \  | _
        | _        |j                  | _        d	| _        y )Nmlx_lmzSPlease install 'mlx-lm' extra to use 'MLXModel': `pip install 'smolagents[mlx-lm]'`r   tokenizer_configr1  r  T)r   r   Fr1   )r   r  r5  r2  
setdefaultr3  r
  r   loadr   r  r  stream_generateis_vlm)rC   r   r1  r2  r3  r   r5  r  s          r0   r   zMLXModel.__init__  s     %X.%e  	&,"##$6;FFGZ\mn*D*J'''223JDQT(TTVT%0V[[%S$BRBR%S"
DN%55r3   r   r   r   r   r@   c           
         |t        d       | j                  d|||d|}|j                  d      }|j                  dg       }|j                  dd       }|j                  dd         | j                  j                  |fd|i| j
                  }	d}
d | j                  | j                  | j                  fd	|	i|D ]2  }|
d
z  }
|j                  z  t        fd|D              s-d   n t        |	      | _        |
| _        t        t        j                  |dt!        t        |	      |
            S )Nz(MLX does not support structured outputs.r   r   r   r   r   r   r   r   rj   r+  r   c              3   J   K   | ]  }j                  |      xd k7    yw)r   N)rfind).0r   
stop_indexr   s     r0   	<genexpr>z$MLXModel.generate.<locals>.<genexpr>  s$     Ld$**T"22Jr9Ls    #r#  r   r%  r1   )r   r   r   r  r&  r3  r9  r  r   r   r   r   r   rW   rF   rQ   r   )rC   r   r   r   r   r   r   stopsr   
prompt_idsr   responser@  r   s               @@r0   r   zMLXModel.generate  sj    &GHH;D;; 
)1
 	
 %((4!%%fb1!%%gt4mT27T^^77qqQUQpQpq
,,,TZZpPZp^op 	HQMHMM!DLeLLKZ(	 (+:$(5%&&3DE" _+	
 	
r3   )FNNr   )r7   r8   r9   rz   r;   r   rh   r
   r   rq   rW   r   r   r-  r.  s   @r0   r0  r0    s    %T #(-1<@   #s(^d*	
 %)cNT$98 ,01504*
{#*
 S	D(*
 c3h$.	*

 !J-*
 
*
r3   r0  c                   V    e Zd ZdZ	 	 	 	 ddedz  dedz  dedz  def fdZdee   d	d
fdZ	 	 ddee	   dee   dz  dee
   dz  d	eeef   fdZ	 	 	 ddee	   dee   dz  deeef   dz  dee
   dz  d	e	f
dZ	 	 	 ddee	   dee   dz  deeef   dz  dee
   dz  d	ee   f
dZ xZS )TransformersModela=  A class that uses Hugging Face's Transformers library for language model interaction.

    This model allows you to load and use Hugging Face's models locally using the Transformers library. It supports features like stop sequences and grammar customization.

    > [!TIP]
    > You must have `transformers` and `torch` installed on your machine. Please run `pip install smolagents[transformers]` if it's not the case.

    Parameters:
        model_id (`str`):
            The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
            For example, `"Qwen/Qwen2.5-Coder-32B-Instruct"`.
        device_map (`str`, *optional*):
            The device_map to initialize your model with.
        torch_dtype (`str`, *optional*):
            The torch_dtype to initialize your model with.
        trust_remote_code (bool, default `False`):
            Some models on the Hub require running remote code: for this model, you would have to set this flag to True.
        kwargs (dict, *optional*):
            Any additional keyword arguments that you want to use in model.generate(), for instance `max_new_tokens` or `device`.
        **kwargs:
            Additional keyword arguments to pass to `model.generate()`, for instance `max_new_tokens` or `device`.
    Raises:
        ValueError:
            If the model name is not provided.

    Example:
    ```python
    >>> engine = TransformersModel(
    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    ...     device="cuda",
    ...     max_new_tokens=5000,
    ... )
    >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
    >>> response = engine(messages, stop_sequences=["END"])
    >>> print(response)
    "Quantum mechanics is the branch of physics that studies..."
    ```
    Nr   r   r   r1  c                    	 dd l }ddlm}m}m}	m}
m} |st        j                  dt               d}d}|j                  d      xs |j                  d      }|s||d<   t        j                  d	|        ||j                  j                         rd
nd}t        j!                  d|        d| _        	 |j%                  ||||      | _        |	j%                  ||      | _        d| _         || j(                  j*                  dd      | _        t5        | l  d| j"                   |d| y # t        $ r t        d      w xY w# t.        $ rf}dt1        |      v rM|j%                  ||||      | _        |
j%                  ||      | _         || j*                  dd      | _        n|Y d }~d }~wt2        $ r}t/        d|d|       |d }~ww xY w)Nr   )AutoModelForCausalLMAutoModelForImageTextToTextAutoProcessorAutoTokenizerTextIteratorStreamerzhPlease install 'transformers' extra to use 'TransformersModel': `pip install 'smolagents[transformers]'`zThe 'model_id' parameter will be required in version 2.0.0. Please update your code to pass this parameter to avoid future errors. For now, it defaults to 'HuggingFaceTB/SmolLM2-1.7B-Instruct'.z#HuggingFaceTB/SmolLM2-1.7B-Instructi   max_new_tokensr   zN`max_new_tokens` not provided, using this default value for `max_new_tokens`: r  cpuzUsing device: F)r   r   r1  )r1  T)skip_promptskip_special_tokensz Unrecognized configuration classz0Failed to load tokenizer and model for model_id=z: )r   r   r1   )r  transformersrH  rI  rJ  rK  rL  r  r   r   r   rd   loggerwarningr  is_availableinfor  from_pretrainedr  	processorr  streamerr   r;   ru   r
  r   )rC   r   r   r   r1  r   r  rH  rI  rJ  rK  rL  default_max_tokensrM  r   r  s                  r0   r   zTransformersModel.__init__!  s   	  MMQ 	 =H!$45QL9Q'9F#$NN`as`tu #(::#:#:#<%JnZL12	`4DD%'"3	 E DJ +::8Wh:iDNDL01I1IW[quvDM  	`dll2BX`Y_`g # 	%z 	J  	1SV;1AA) +&7	 B 
 "/!>!>x[l!>!m 4T^^QUko p   	`PxkQSTUSVWX^__	`s1   D3 8AE 3E	GAF55GGGr   r@   r   c                 H    ddl m}m}  G d d|      } | |||      g      S )Nr   )StoppingCriteriar   c                   *    e Zd Zdee   fdZd Zd Zy)?TransformersModel.make_stopping_criteria.<locals>.StopOnStringsstop_stringsc                 .    || _         || _        d| _        y Nrj   )r^  r  stream)rC   r^  r  s      r0   r   zHTransformersModel.make_stopping_criteria.<locals>.StopOnStrings.__init__k  s    $0!!* r3   c                     d| _         y r`  )ra  rB   s    r0   resetzETransformersModel.make_stopping_criteria.<locals>.StopOnStrings.resetp  s	     r3   c                     | j                   j                  |d   d   d      }| xj                  |z  c_        t        | j                  D cg c]  }| j                  j                  |       c}      ryyc c}w )Nr   r   TrP  F)r  decodera  r   r^  endswith)rC   	input_idsscoresr   	generatedstop_strings         r0   r   zHTransformersModel.make_stopping_criteria.<locals>.StopOnStrings.__call__s  sg     NN11)A,r2BX\1]	y(TM^M^_k,,[9_` `s   "A7N)r7   r8   r9   rq   r;   r   rc  r   r1   r3   r0   StopOnStringsr]  j  s    !T#Y !
!r3   rl  )rQ  r[  r   )rC   r   r  r[  r   rl  s         r0   make_stopping_criteriaz(TransformersModel.make_stopping_criteriag  s)    G	, 	  $]>9%M$NOOr3   r   r   c                     | j                   d||d|}|j                  d      }|j                  dd       }|j                  dd       }|j                  d      xsO |j                  d      xs< | j                  j                  d      xs | j                  j                  d      xs d}t	        | d      r| j
                  n| j                  j                  ||d	d
d
d
      }|j                  | j                  j                        }t	        |d      r|d   }t	        | d      r| j
                  j                  n| j                  }	|r| j                  ||	      nd }
||d<   t        d|d
|
d|S )N)r   r   r   r   r   rM  r   i   rW  ptT)r   return_tensorsr  r  return_dictrh  )r  )r   	use_cachestopping_criteriar1   )r   r   rd   r   r)   rW  r  r&  tor  devicerm  rh   )rC   r   r   r   r   r   r   rM  prompt_tensormodel_tokenizerrs  s              r0   _prepare_completion_argsz*TransformersModel._prepare_completion_args|  s    <D;; 
)
 
 %((4*..vt<!%%gt4 JJ'( zz,'{{/0 {{|,  	 ,34+E4>>nn"& o 
 &(():):;=+.)+6M6=dK6P$..22VZVdVdVdD''/'Rjn 	 /=*+ 
 /
  	
 	
r3   r   c                 R   |t        d       | j                  d|||d|}|d   j                  d   } | j                  j                  di |}|d|d f   }	t        | d      r| j                  j                  |	d      }
n| j                  j                  |	d      }
|t        |
|      }
|| _
        t        |	      | _        t        t        j                  |
|
|j!                         D ci c]  \  }}|dk7  s|| c}}d	t#        |t        |	      
            S c c}}w )NITransformers does not support structured outputs, use VLLMModel for this.r<  r   r   r   rW  Tre  r#  r   r%  r1   )r   rx  shaper  r   r)   rW  rf  r  r   r   r   r   rW   rF   rQ   r*   r   )rC   r   r   r   r   r   generation_kwargscount_prompt_tokensr$  generated_tokensr,  r   rL   s                r0   r   zTransformersModel.generate  sZ    &hii9D99 
)1
 	
 09??B!djj!! 

 q"5"6674%..//0@VZ/[K..//0@VZ/[K%/^LK':$(+,<(=%&&"CTCZCZC\%pZS%`cgo`oc5j%p #0!"23
 	

 &qs   0D#>D#c           	   +     K   |t        d       | j                  d||||d|}|d   j                  d   }|| _        t	        | j
                  j                  d| j                  i|      }|j                          d}	d}
| j                  D ],  }|
dz  }
|	r|nd}d	}	t        |d t        |d
             d}. |j                          |
| _        y w)Nrz  )r   r   r   r   r   r   rX  )targetr   Tr   Fr   )rY   rZ   r\   r1   )r   rx  r{  r   r   r  r   rX  startr}   r   rm   r   )rC   r   r   r   r   r   r|  r}  threadis_first_tokencount_generated_tokensnew_textr   s                r0   generate_streamz!TransformersModel.generate_stream  s     &hii9D99 
)+1	

 
 09??B':$ tzz22J;kYj;kl !" 
	$H"a'"2@.aL"N( &LPQR 
 #$
	$ 	 )?%s   CC)NNNFrp   r   )r7   r8   r9   rz   r;   r   r   rq   rm  rW   r   rh   r
   rx  r   r   r}   r  r-  r.  s   @r0   rF  rF    s   %R  $!%"&"'Da*Da $JDa 4Z	Da
  DaLPT#Y PNd P0 ,004	.
{#.
 S	D(.
 !J-	.
 
c3h.
f ,01504*
{#*
 S	D(*
 c3h$.	*

 !J-*
 
*
^ ,01504+?{#+? S	D(+? c3h$.	+?
 !J-+? 
)	*+?r3   rF  c            
       d     e Zd ZdZ	 	 	 d
dedeeef   dz  dedz  dedz  f fdZd Z	d	 Z
 xZS )ApiModelao  
    Base class for API-based language models.

    This class serves as a foundation for implementing models that interact with
    external APIs. It handles the common functionality for managing model IDs,
    custom role mappings, and API client connections.

    Parameters:
        model_id (`str`):
            The identifier for the model to be used with the API.
        custom_role_conversions (`dict[str, str`], **optional**):
            Mapping to convert  between internal role names and API-specific role names. Defaults to None.
        client (`Any`, **optional**):
            Pre-configured API client instance. If not provided, a default client will be created. Defaults to None.
        requests_per_minute (`float`, **optional**):
            Rate limit in requests per minute.
        **kwargs: Additional keyword arguments to pass to the parent class.
    Nr   r   clientrequests_per_minutec                     t        |   dd|i| |xs i | _        |xs | j                         | _        t        |      | _        y )Nr   r1   )r
  r   r   create_clientr  r   rate_limiter)rC   r   r   r  r  r   r  s         r0   r   zApiModel.__init__  sJ     	5(5f5'>'D"$4 2 2 4'(;<r3   c                     t        d      )z/Create the API client for the specific service.z8Subclasses must implement this method to create a clientr   rB   s    r0   r  zApiModel.create_client'  s    !"\]]r3   c                 8    | j                   j                          y)z,Apply rate limiting before making API calls.N)r  throttlerB   s    r0   _apply_rate_limitzApiModel._apply_rate_limit+  s    ""$r3   r   )r7   r8   r9   rz   r;   rh   r
   floatr   r  r  r-  r.  s   @r0   r  r    s^    , :>!,0== "&c3h$!6= d
	=
 #T\=^%r3   r  c                       e Zd ZdZ	 	 	 	 	 ddedz  dedz  dedz  deeef   dz  dedz  f
 fdZd	 Z	 	 	 dd
e	e
   de	e   dz  deeef   dz  de	e   dz  de
f
dZ	 	 	 dd
e	e
   de	e   dz  deeef   dz  de	e   dz  dee   f
dZ xZS )LiteLLMModela  Model to use [LiteLLM Python SDK](https://docs.litellm.ai/docs/#litellm-python-sdk) to access hundreds of LLMs.

    Parameters:
        model_id (`str`):
            The model identifier to use on the server (e.g. "gpt-3.5-turbo").
        api_base (`str`, *optional*):
            The base URL of the provider API to call the model.
        api_key (`str`, *optional*):
            The API key to use for authentication.
        custom_role_conversions (`dict[str, str]`, *optional*):
            Custom role conversion mapping to convert message roles in others.
            Useful for specific models that do not support specific message roles like "system".
        flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text.
            Defaults to `True` for models that start with "ollama", "groq", "cerebras".
        **kwargs:
            Additional keyword arguments to pass to the OpenAI API.
    Nr   r   r   r   r   c                     |st        j                  dt               d}|| _        || _        ||n|j                  d      }t        |   d|||d| y )NzThe 'model_id' parameter will be required in version 2.0.0. Please update your code to pass this parameter to avoid future errors. For now, it defaults to 'anthropic/claude-3-5-sonnet-20240620'.z$anthropic/claude-3-5-sonnet-20240620)ollamagroqr   r   r   r   r1   )r   r   r   r   r   
startswithr
  r   )rC   r   r   r   r   r   r   r  s          r0   r   zLiteLLMModel.__init__C  s|     MMR 	 >H  (3 %$$%CD 	!
 	 	
$;%=	
 		
r3   c                 J    	 ddl }|S # t        $ r}t        d      |d}~ww xY w)zCreate the LiteLLM client.r   NzWPlease install 'litellm' extra to use LiteLLMModel: `pip install 'smolagents[litellm]'`)litellmr  )rC   r  r   s      r0   r  zLiteLLMModel.create_clientb  s6    	  # 	%i	s    	""r   r   r   r   r@   c                 6    | j                   d||||| j                  | j                  | j                  d| j                  d	|}| j                           | j                  j                  di |}|j                  j                  | _
        |j                  j                  | _        t        j                  |j                  d   j                   j#                  h d      |t%        |j                  j                  |j                  j                              S )	NT)	r   r   r   r   r  r   r   r   r   r      rX   rY   rZ   includer   r[   r\   r1   )r   r   r   r   r   r  r  
completionusageprompt_tokensr   completion_tokensr   rW   rf   choicesr   
model_dumpr   rC   r   r   r   r   r   r   rD  s           r0   r   zLiteLLMModel.generatem  s     <D;; 
)+1--]]LL)-$($@$@
 
 	 )4;;))>,=>'/~~'C'C$(0(H(H%$$Q''22;\2]"%^^99&nn>> % 
 	
r3   c              +     K    | j                   d||||| j                  | j                  | j                  | j                  dd	|}| j                           | j                  j                  di |dddidD ]_  }t        |dd       rx|j                  j                  | _        |j                  j                  | _        t        dt        |j                  j                  |j                  j                               |j                   s|j                   d	   }|j"                  rt        |j"                  j$                  |j"                  j&                  rY|j"                  j&                  D 	cg c]9  }	t)        |	j*                  |	j,                  |	j.                  |	j0                  
      ; c}	nd        Et        |dd       rTt3        d|        y c c}	w w)NT)	r   r   r   r   r  r   r   r   r   include_usagera  stream_optionsr  rj   r   rY   r\   r   ry   r?   r   r>   rY   rZ   finish_reason#No content or tool calls in event: r1   )r   r   r   r   r   r  r  r  r   r  r  r   r  r   r}   r   r  deltarY   rZ   rx   ry   r?   r   r>   r   
rC   r   r   r   r   r   r   eventchoicer  s
             r0   r  zLiteLLMModel.generate_stream  s     <D;; 
)+1--]]LL$($@$@)-
 
 	 +T[[++u.?u^mos]tu 	XEugt,/4{{/H/H,050M0M-, *%*[[%>%>&+kk&C&C!  }}q)<<0 & 4 4 "<<22 *0)@)@$ !& ;&+kk#(88%*ZZ).	$ "  #6?DA(+Nug)VWW=	X $s   DG#A#G#3>G1G#G#NNNNNr   )r7   r8   r9   rz   r;   rh   r   r   r  rq   rW   r   r   r   r}   r  r-  r.  s   @r0   r  r  0  sE   (  $#"9=04
*
 *
 t	

 "&c3h$!6
 #'+
>	 ,01504 
{# 
 S	D( 
 c3h$.	 

 !J- 
 
 
J ,015043X{#3X S	D(3X c3h$.	3X
 !J-3X 
)	*3Xr3   r  c                   |     e Zd ZdZ	 	 	 d
dedeeeef      deeef   dz  deeef   dz  dedz  f
 fdZ	d	 Z
 xZS )LiteLLMRouterModelu"  Router‑based client for interacting with the [LiteLLM Python SDK Router](https://docs.litellm.ai/docs/routing).

    This class provides a high-level interface for distributing requests among multiple language models using
    the LiteLLM SDK's routing capabilities. It is responsible for initializing and configuring the router client,
    applying custom role conversions, and managing message formatting to ensure seamless integration with various LLMs.

    Parameters:
        model_id (`str`):
            Identifier for the model group to use from the model list (e.g., "model-group-1").
        model_list (`list[dict[str, Any]]`):
            Model configurations to be used for routing.
            Each configuration should include the model group name and any necessary parameters.
            For more details, refer to the [LiteLLM Routing](https://docs.litellm.ai/docs/routing#quick-start) documentation.
        client_kwargs (`dict[str, Any]`, *optional*):
            Additional configuration parameters for the Router client. For more details, see the
            [LiteLLM Routing Configurations](https://docs.litellm.ai/docs/routing).
        custom_role_conversions (`dict[str, str]`, *optional*):
            Custom role conversion mapping to convert message roles in others.
            Useful for specific models that do not support specific message roles like "system".
        flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text.
            Defaults to `True` for models that start with "ollama", "groq", "cerebras".
        **kwargs:
            Additional keyword arguments to pass to the LiteLLM Router completion method.

    Example:
    ```python
    >>> import os
    >>> from smolagents import CodeAgent, WebSearchTool, LiteLLMRouterModel
    >>> os.environ["OPENAI_API_KEY"] = ""
    >>> os.environ["AWS_ACCESS_KEY_ID"] = ""
    >>> os.environ["AWS_SECRET_ACCESS_KEY"] = ""
    >>> os.environ["AWS_REGION"] = ""
    >>> llm_loadbalancer_model_list = [
    ...     {
    ...         "model_name": "model-group-1",
    ...         "litellm_params": {
    ...             "model": "gpt-4o-mini",
    ...             "api_key": os.getenv("OPENAI_API_KEY"),
    ...         },
    ...     },
    ...     {
    ...         "model_name": "model-group-1",
    ...         "litellm_params": {
    ...             "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
    ...             "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
    ...             "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
    ...             "aws_region_name": os.getenv("AWS_REGION"),
    ...         },
    ...     },
    >>> ]
    >>> model = LiteLLMRouterModel(
    ...    model_id="model-group-1",
    ...    model_list=llm_loadbalancer_model_list,
    ...    client_kwargs={
    ...        "routing_strategy":"simple-shuffle"
    ...    }
    >>> )
    >>> agent = CodeAgent(tools=[WebSearchTool()], model=model)
    >>> agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
    ```
    Nr   
model_listclient_kwargsr   r   c                 J    d|i|xs i | _         t        |   d|||d| y )Nr  r  r1   r  r
  r   )rC   r   r  r  r   r   r   r  s          r0   r   zLiteLLMRouterModel.__init__  sH     *
"
 	 	
$;%=	
 		
r3   c                 n    	 ddl m}  |di | j                  S # t        $ r}t        d      |d }~ww xY w)Nr   )Routerz]Please install 'litellm' extra to use LiteLLMRouterModel: `pip install 'smolagents[litellm]'`r1   )litellm.routerr  r  r  )rC   r  r   s      r0   r  z LiteLLMRouterModel.create_client  sE    	-
 +**++	 # 	%o	s    	4/4r   )r7   r8   r9   rz   r;   rq   rh   r
   r   r   r  r-  r.  s   @r0   r  r    sy    <D 049=04

 c3h(
 CH~,	

 "&c3h$!6
 #'+
(,r3   r  c                   J    e Zd ZdZ	 	 	 	 	 	 	 	 	 ddededz  dedz  dedeeef   dz  deeef   dz  d	edz  d
edz  dedz  f fdZd Z		 	 	 dde
e   de
e   dz  deeef   dz  de
e   dz  def
dZ	 	 	 dde
e   de
e   dz  deeef   dz  de
e   dz  dee   f
dZ xZS )InferenceClientModela  A class to interact with Hugging Face's Inference Providers for language model interaction.

    This model allows you to communicate with Hugging Face's models using Inference Providers. It can be used in both serverless mode, with a dedicated endpoint, or even with a local URL, supporting features like stop sequences and grammar customization.

    Providers include Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.

    Parameters:
        model_id (`str`, *optional*, default `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
            The Hugging Face model ID to be used for inference.
            This can be a model identifier from the Hugging Face model hub or a URL to a deployed Inference Endpoint.
            Currently, it defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`, but this may change in the future.
        provider (`str`, *optional*):
            Name of the provider to use for inference. A list of supported providers can be found in the [Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
            Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order [here](https://hf.co/settings/inference-providers).
            If `base_url` is passed, then `provider` is not used.
        token (`str`, *optional*):
            Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference Providers'.
            If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'.
            If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration.
        timeout (`int`, *optional*, defaults to 120):
            Timeout for the API request, in seconds.
        client_kwargs (`dict[str, Any]`, *optional*):
            Additional keyword arguments to pass to the Hugging Face InferenceClient.
        custom_role_conversions (`dict[str, str]`, *optional*):
            Custom role conversion mapping to convert message roles in others.
            Useful for specific models that do not support specific message roles like "system".
        api_key (`str`, *optional*):
            Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClientModel`]
            follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
        bill_to (`str`, *optional*):
            The billing account to use for the requests. By default the requests are billed on the user's account. Requests can only be billed to
            an organization the user is a member of, and which has subscribed to Enterprise Hub.
        base_url (`str`, `optional`):
            Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClientModel`]
            follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
        **kwargs:
            Additional keyword arguments to pass to the Hugging Face InferenceClient.

    Raises:
        ValueError:
            If the model name is not provided.

    Example:
    ```python
    >>> engine = InferenceClientModel(
    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    ...     provider="nebius",
    ...     token="your_hf_token_here",
    ...     max_tokens=5000,
    ... )
    >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
    >>> response = engine(messages, stop_sequences=["END"])
    >>> print(response)
    "Quantum mechanics is the branch of physics that studies..."
    ```
    Nr   r   r   r   r  r   r   bill_tobase_urlc
                     ||t        d      ||n|}|t        j                  d      }i |xs i ||||||	d| _        t	        |   d||d|
 y )NzReceived both `token` and `api_key` arguments. Please provide only one of them. `api_key` is an alias for `token` to make the API compatible with OpenAI's client. It has the exact same behavior as `token`.HF_TOKEN)r  r   r   r   r  r  )r   r   r1   )r   osgetenvr  r
  r   )rC   r   r   r   r   r  r   r   r  r  r   r  s              r0   r   zInferenceClientModel.__init__\  s     !4> 
 *=IIj)E
"
  
 	f(D[f_efr3   c                 2    ddl m}  |di | j                  S )zCreate the Hugging Face client.r   )InferenceClientr1   )huggingface_hubr  r  )rC   r  s     r0   r  z"InferenceClientModel.create_client}  s    34!3!344r3   r   r   r   r   r@   c           	      P   |6| j                   d   t        vr!t        ddj                  t              z          | j                  d	|||d| j
                  d|}| j                           | j                  j                  d	i |}|j                  j                  | _        |j                  j                  | _        t        j                  t!        |j"                  d   j$                        |t'        |j                  j                  |j                  j                              S )
Nr   zKInferenceClientModel only supports structured outputs with these providers:z, T)r   r   r   r   r   r   r   r  r1   )r  STRUCTURED_GENERATION_PROVIDERSr   rm   r   r   r  r  chat_completionr  r  r   r  r   rW   rf   r   r  r   r   r  s           r0   r   zInferenceClientModel.generate  s    &4+=+=j+IQp+p]));<=  <D;; 
)1)-$($@$@
 
 	 .4;;..C1BC'/~~'C'C$(0(H(H%$$8##A&../"%^^99&nn>> % 
 	
r3   c              +     K    | j                   d||||| j                  | j                  dd|}| j                           | j                  j
                  j                  j                  di |dddidD ]_  }t        |dd       rx|j                  j                  | _        |j                  j                  | _        t        dt        |j                  j                  |j                  j                               |j                   s|j                   d	   }|j"                  rt        |j"                  j$                  |j"                  j&                  rY|j"                  j&                  D 	cg c]9  }	t)        |	j*                  |	j,                  |	j.                  |	j0                  
      ; c}	nd        Et        |dd       rTt3        d|        y c c}	w w)NTr   r   r   r   r  r   r   r  r  r  rj   r   r  r   r  r  r  r  r1   )r   r   r   r  r  chatcompletionscreater   r  r  r   r  r   r}   r   r  r  rY   rZ   rx   ry   r?   r   r>   r   r  s
             r0   r  z$InferenceClientModel.generate_stream  s     <D;; 	
)+1--$($@$@)-	
 	
 	 8T[[%%1188 

(,ot=T
  	XE ugt,/4{{/H/H,050M0M-, *%*[[%>%>&+kk&C&C!  }}q)<<0 & 4 4 "<<22 *0)@)@$ !& ;&+kk#(88%*ZZ).	$ "  #6?DA(+Nug)VWWA 	X$$s   DG!A#G!1>G/G!G!)	zQwen/Qwen2.5-Coder-32B-InstructNNx   NNNNNr   )r7   r8   r9   rz   r;   r{   rh   r
   r   r  rq   rW   r   r   r   r}   r  r-  r.  s   @r0   r  r  "  s   7v :# /39=""#gg *g Tz	g
 g CH~,g "&c3h$!6g tg tg *gB5 ,01504"
{#"
 S	D("
 c3h$.	"

 !J-"
 
"
N ,015043X{#3X S	D(3X c3h$.	3X
 !J-3X 
)	*3Xr3   r  c                   <    e Zd ZdZ	 	 	 	 	 	 	 ddededz  dedz  dedz  dedz  deeef   dz  d	eeef   dz  d
ef fdZd Z		 	 	 dde
e   de
e   dz  deeef   dz  de
e   dz  dee   f
dZ	 	 	 dde
e   de
e   dz  deeef   dz  de
e   dz  def
dZ xZS )OpenAIServerModela  This model connects to an OpenAI-compatible API server.

    Parameters:
        model_id (`str`):
            The model identifier to use on the server (e.g. "gpt-3.5-turbo").
        api_base (`str`, *optional*):
            The base URL of the OpenAI-compatible API server.
        api_key (`str`, *optional*):
            The API key to use for authentication.
        organization (`str`, *optional*):
            The organization to use for the API request.
        project (`str`, *optional*):
            The project to use for the API request.
        client_kwargs (`dict[str, Any]`, *optional*):
            Additional keyword arguments to pass to the OpenAI client (like organization, project, max_retries etc.).
        custom_role_conversions (`dict[str, str]`, *optional*):
            Custom role conversion mapping to convert message roles in others.
            Useful for specific models that do not support specific message roles like "system".
        flatten_messages_as_text (`bool`, default `False`):
            Whether to flatten messages as text.
        **kwargs:
            Additional keyword arguments to pass to the OpenAI API.
    Nr   r   r   r   r   r  r   r   c	                 T    i |xs i ||||d| _         t        
|   d|||d|	 y )N)r   r  r   r   r  r1   r  )rC   r   r   r   r   r   r  r   r   r   r  s             r0   r   zOpenAIServerModel.__init__  sR    
"
 (
 	 	
$;%=	
 		
r3   c                 ~    	 dd l } |j                  di | j                  S # t        $ r}t        d      |d }~ww xY w)Nr   zZPlease install 'openai' extra to use OpenAIServerModel: `pip install 'smolagents[openai]'`r1   )openair  OpenAIr  rC   r  r   s      r0   r  zOpenAIServerModel.create_client  sK    	 v}}2t1122 # 	%l	   " 	<7<r   r   r   r   r@   c              +     K    | j                   d||||| j                  | j                  dd|}| j                           | j                  j
                  j                  j                  di |dddidD ]^  }|j                  rx|j                  j                  | _
        |j                  j                  | _        t        dt        |j                  j                  |j                  j                               |j                  s|j                  d   }|j                   rt        |j                   j"                  |j                   j$                  rY|j                   j$                  D 	cg c]9  }	t'        |	j(                  |	j*                  |	j,                  |	j.                  	      ; c}	nd 
       Dt1        |dd       rSt3        d|        y c c}	w w)NTr  r  r  rj   r   r  r   r  r  r  r  r1   )r   r   r   r  r  r  r  r  r  r  r   r  r   r}   r   r  r  rY   rZ   rx   ry   r?   r   r>   r   r   r  s
             r0   r  z!OpenAIServerModel.generate_stream  s     <D;; 	
)+1--$($@$@)-	
 	
 	 8T[[%%1188 

(,ot=T
  	XE {{/4{{/H/H,050M0M-, *%*[[%>%>&+kk&C&C!  }}q)<<0 & 4 4 "<<22 *0)@)@$ !& ;&+kk#(88%*ZZ).	$ "  #6?DA(+Nug)VWWA 	X$$s   D
G A#G 0>G.G G c                 6    | j                   d
||||| j                  | j                  dd|}| j                           | j                  j
                  j                  j                  d
i |}t        |j                  dd      | _
        t        |j                  dd      | _        t        j                  |j                  d   j                  j!                  h d      |t#        |j                  j$                  |j                  j&                        	      S )NTr  r  r   r  r  r  r   r  r1   )r   r   r   r  r  r  r  r  r   r  r   r   rW   rf   r  r   r  r   r  r  r  s           r0   r   zOpenAIServerModel.generateO  s    <D;; 	
)+1--$($@$@)-	
 	
 	 64;;##//66K9JK (/x~~PQ'R$(/@SUV(W%$$Q''22;\2]"%^^99&nn>> % 
 	
r3   )NNNNNNFr   )r7   r8   r9   rz   r;   rh   r
   r   r   r  rq   rW   r   r   r}   r  r   r-  r.  s   @r0   r  r    sv   6  $"#'"/39=).

 *
 t	

 Dj
 t
 CH~,
 "&c3h$!6
 #'
43 ,015043X{#3X S	D(3X c3h$.	3X
 !J-3X 
)	*3Xp ,01504
{#
 S	D(
 c3h$.	

 !J-
 

r3   r  c                        e Zd ZdZ	 	 	 	 	 ddededz  dedz  dedz  deeef   dz  deeef   dz  f fd	Zd
 Z xZ	S )AzureOpenAIServerModela  This model connects to an Azure OpenAI deployment.

    Parameters:
        model_id (`str`):
            The model deployment name to use when connecting (e.g. "gpt-4o-mini").
        azure_endpoint (`str`, *optional*):
            The Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`. If not provided, it will be inferred from the `AZURE_OPENAI_ENDPOINT` environment variable.
        api_key (`str`, *optional*):
            The API key to use for authentication. If not provided, it will be inferred from the `AZURE_OPENAI_API_KEY` environment variable.
        api_version (`str`, *optional*):
            The API version to use. If not provided, it will be inferred from the `OPENAI_API_VERSION` environment variable.
        client_kwargs (`dict[str, Any]`, *optional*):
            Additional keyword arguments to pass to the AzureOpenAI client (like organization, project, max_retries etc.).
        custom_role_conversions (`dict[str, str]`, *optional*):
            Custom role conversion mapping to convert message roles in others.
            Useful for specific models that do not support specific message roles like "system".
        **kwargs:
            Additional keyword arguments to pass to the Azure OpenAI API.
    Nr   r   r   api_versionr  r   c                 b    |xs i }|j                  ||d       t        |   d||||d| y )N)r  r   )r   r   r  r   r1   )r   r
  r   )	rC   r   r   r   r  r  r   r   r  s	           r0   r   zAzureOpenAIServerModel.__init__  sR     &+*"0	
 	 	
'$;		

 	
r3   c                 ~    	 dd l } |j                  di | j                  S # t        $ r}t        d      |d }~ww xY w)Nr   z_Please install 'openai' extra to use AzureOpenAIServerModel: `pip install 'smolagents[openai]'`r1   )r  r  AzureOpenAIr  r  s      r0   r  z$AzureOpenAIServerModel.create_client  sM    	 "v!!7D$6$677 # 	%q	r  r  )
r7   r8   r9   rz   r;   rh   r
   r   r  r-  r.  s   @r0   r  r  t  s    . &*""&/39=

 d

 t	

 4Z
 CH~,
 "&c3h$!6
28r3   r  c                   >    e Zd ZdZ	 	 	 ddedeeef   dz  deeef   dz  f fdZ	 	 	 	 	 	 ddee	   dee   dz  d	eeef   dz  d
ee
   dz  deeef   dz  dedeeeef   z  dz  def fdZd Z	 	 	 ddee	   dee   dz  d	eeef   dz  d
ee
   dz  de	f
dZ xZS )AmazonBedrockServerModela<  
    A model class for interacting with Amazon Bedrock Server models through the Bedrock API.

    This class provides an interface to interact with various Bedrock language models,
    allowing for customized model inference, guardrail configuration, message handling,
    and other parameters allowed by boto3 API.

    Parameters:
        model_id (`str`):
            The model identifier to use on Bedrock (e.g. "us.amazon.nova-pro-v1:0").
        client (`boto3.client`, *optional*):
            A custom boto3 client for AWS interactions. If not provided, a default client will be created.
        client_kwargs (dict[str, Any], *optional*):
            Keyword arguments used to configure the boto3 client if it needs to be created internally.
            Examples include `region_name`, `config`, or `endpoint_url`.
        custom_role_conversions (`dict[str, str]`, *optional*):
            Custom role conversion mapping to convert message roles in others.
            Useful for specific models that do not support specific message roles like "system".
            Defaults to converting all roles to "user" role to enable using all the Bedrock models.
        flatten_messages_as_text (`bool`, default `False`):
            Whether to flatten messages as text.
        **kwargs
            Additional keyword arguments passed directly to the underlying API calls.

    Examples:
        Creating a model instance with default settings:
        ```python
        >>> bedrock_model = AmazonBedrockServerModel(
        ...     model_id='us.amazon.nova-pro-v1:0'
        ... )
        ```

        Creating a model instance with a custom boto3 client:
        ```python
        >>> import boto3
        >>> client = boto3.client('bedrock-runtime', region_name='us-west-2')
        >>> bedrock_model = AmazonBedrockServerModel(
        ...     model_id='us.amazon.nova-pro-v1:0',
        ...     client=client
        ... )
        ```

        Creating a model instance with client_kwargs for internal client creation:
        ```python
        >>> bedrock_model = AmazonBedrockServerModel(
        ...     model_id='us.amazon.nova-pro-v1:0',
        ...     client_kwargs={'region_name': 'us-west-2', 'endpoint_url': 'https://custom-endpoint.com'}
        ... )
        ```

        Creating a model instance with inference and guardrail configurations:
        ```python
        >>> additional_api_config = {
        ...     "inferenceConfig": {
        ...         "maxTokens": 3000
        ...     },
        ...     "guardrailConfig": {
        ...         "guardrailIdentifier": "identify1",
        ...         "guardrailVersion": 'v1'
        ...     },
        ... }
        >>> bedrock_model = AmazonBedrockServerModel(
        ...     model_id='anthropic.claude-3-haiku-20240307-v1:0',
        ...     **additional_api_config
        ... )
        ```
    Nr   r  r   c                 @   |xs i | _         |xsz t        j                  t        j                  t        j                  t        j                  t        j
                  t        j                  t        j                  t        j                  i}t        |    d||d|d| y )NF)r   r   r   r  r1   )	r  rF   rR   rP   rQ   rS   rT   r
  r   )rC   r   r  r  r   r   r  s         r0   r   z!AmazonBedrockServerModel.__init__  s     +0b
 #: #
 0 0!!;#3#3!!;#3#3%%{'7'7	>
 	 	
$;%*		

 	
r3   r   r   r   r   r   r   r@   c           	          t        |   d|d|||d|}	|	j                  dd       |	j                  dg       D ]!  }
|
j                  dg       D ]
  }d|v s|d=  # d| j                  i|	S )	a  
        Overrides the base method to handle Bedrock-specific configurations.

        This implementation adapts the completion keyword arguments to align with
        Bedrock's requirements, ensuring compatibility with its unique setup and
        constraints.
        N)r   r   r   r   r   
toolConfigr   rY   r   modelIdr1   )r
  r   r   rd   r   )rC   r   r   r   r   r   r   r   r   r   r   rY   r  s               r0   r   z3AmazonBedrockServerModel._prepare_completion_kwargs  s    $ "G> 
1$;)E
 
 	lD1 ),,Z< 	(G";;y"5 (W$(	( t}}

 	
r3   c                 ~    	 dd l } |j                  di | j                  S # t        $ r}t        d      |d }~ww xY w)Nr   zcPlease install 'bedrock' extra to use AmazonBedrockServerModel: `pip install 'smolagents[bedrock]'`)zbedrock-runtime)boto3r  r  r  )rC   r  r   s      r0   r  z&AmazonBedrockServerModel.create_client<  sK    	 u||D1C1CDD # 	%u	r  c           	         |t        d       | j                  d||| j                  dd|}| j                           | j                  j
                  di |}|d   d   d   d   d   |d   d   d<   |d	   d
   | _        |d	   d   | _        t        j                  |d   d   |t        |d	   d
   |d	   d               S )Nz/Amazon Bedrock does not support response_formatT)r   r   r   r   outputr   rY   r   r   r  inputTokensoutputTokensr   r  r1   )r   r   r   r  r  converser   r   rW   rf   r   r  s           r0   r   z!AmazonBedrockServerModel.generateF  s    &NOO"A$"A"A #
1$($@$@)-	#

 #
 	 '4;;''<*;< 4<H3Ei3PQZ3[\]3^_e3f9%i0'/'8'G$(0(9.(I%$$Xy)"%g.}=&w/? % 
 	
r3   r   )NNNNFN)r7   r8   r9   rz   r;   rh   r
   r   rq   rW   r   r   r   r  r   r-  r.  s   @r0   r  r    se   BN /39=

 CH~,	

 "&c3h$!6
> ,015049=-237)
{#)
 S	D()
 c3h$.	)

 !J-)
 "&c3h$!6)
 '+)
 4S>)D0)
 
)
VE ,01504!
{#!
 S	D(!
 c3h$.	!

 !J-!
 
!
r3   r  )rF   r   r   r   r0  rF  r  r  r  r  r  OpenAIModelr  r  AzureOpenAIModelr  AmazonBedrockModelrW   rK   )Kr^   loggingr  r   r   r   collections.abcr   copyr   dataclassesr   r   enumr   	threadingr   typingr	   r
   
monitoringr   r   r   utilsr   r   r   r   r   rQ  r   	getLoggerr7   rR  r  CODEAGENT_RESPONSE_FORMATr2   r5   r=   r;   rF   rW   rh   rv   rx   r}   rQ   rq   r   rS   rT   rP   r   r   r   r   r   r   r   r   r  r0  rF  r  r  r  r  r  r  r  r  r  r  __all__r1   r3   r0   <module>r     sW     	 	   %  )   % "  k k 1 
		8	$#-~">  %* $Z&$ $Y#$ #F++!
$ ') 6 # # # t t t	&#t 	& ( ( (VC$J 3:  8 8 8 * * * FQEZEZ;./;7B;;~ ;00{// t  ,3 S	 c  IK).%*	?{#?;34tCH~E? #'? #	?
 
$sCx.?D# c s Wj $-c -d -$gB gBTh
 h
Vk
u k
\J? J?Z'%u '%TRX8 RXjZ, Z,zxX8 xXvQ
 Q
h  68. 68r * w
x w
t . r3   