Improve tool call parsing, allow argument extraction from multiple keys

sysradium · sysradium · commit a7895752afea · 2025-03-23T22:03:41.000+01:00
diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
@@ -602,7 +602,7 @@ def execute_tool_call(self, tool_name: str, arguments: Union[Dict[str, str], str
             elif tool_name in self.managed_agents:
                 error_msg = (
                     f"Error in calling team member: {e}\nYou should only ask this team member with a correct request.\n"
-                    f"As a reminder, this team member's description is the following:\n{available_tools[tool_name]}"
+                    f"As a reminder, this team member's description is the following:\n{available_tools[tool_name].description}"
                 )
                 raise AgentExecutionError(error_msg, self.logger)
 
diff --git a/src/smolagents/models.py b/src/smolagents/models.py
@@ -116,14 +116,17 @@ def dict(self):
         return json.dumps(get_dict_from_nested_dataclasses(self))
 
 
-def parse_json_if_needed(arguments: Union[str, dict]) -> Union[str, dict]:
+def parse_json_if_needed(arguments: Union[str, dict] | None) -> Union[str, dict] | None:
+    if arguments is None:
+        return None
+
     if isinstance(arguments, dict):
         return arguments
-    else:
-        try:
-            return json.loads(arguments)
-        except Exception:
-            return arguments
+
+    try:
+        return json.loads(arguments)
+    except Exception:
+        return arguments
 
 
 class MessageRole(str, Enum):
@@ -228,16 +231,23 @@ def get_clean_message_list(
     return output_message_list
 
 
-def get_tool_call_from_text(text: str, tool_name_key: str, tool_arguments_key: str) -> ChatMessageToolCall:
+def get_tool_call_from_text(
+    text: str, tool_name_key: str, tool_arguments_key: Union[list[str], str]
+) -> ChatMessageToolCall:
+    if isinstance(tool_arguments_key, str):
+        tool_arguments_key = [tool_arguments_key]
+
     tool_call_dictionary, _ = parse_json_blob(text)
     try:
         tool_name = tool_call_dictionary[tool_name_key]
-    except Exception as e:
+    except KeyError as e:
         raise ValueError(
             f"Key {tool_name_key=} not found in the generated tool call. Got keys: {list(tool_call_dictionary.keys())} instead"
         ) from e
-    tool_arguments = tool_call_dictionary.get(tool_arguments_key, None)
+
+    tool_arguments = next((tool_call_dictionary[k] for k in tool_arguments_key if k in tool_call_dictionary), None)
     tool_arguments = parse_json_if_needed(tool_arguments)
+
     return ChatMessageToolCall(
         id=str(uuid.uuid4()),
         type="function",
@@ -250,12 +260,16 @@ def __init__(
         self,
         flatten_messages_as_text: bool = False,
         tool_name_key: str = "name",
-        tool_arguments_key: str = "arguments",
+        tool_arguments_keys: list[str] | None = None,
         **kwargs,
     ):
         self.flatten_messages_as_text = flatten_messages_as_text
         self.tool_name_key = tool_name_key
-        self.tool_arguments_key = tool_arguments_key
+
+        self.tool_arguments_keys = tool_arguments_keys
+        if tool_arguments_keys is None:
+            self.tool_arguments_keys = ["arguments", "parameters"]
+
         self.kwargs = kwargs
         self.last_input_token_count = None
         self.last_output_token_count = None
@@ -488,7 +502,7 @@ def __call__(
         )
         if tools_to_call_from:
             chat_message.tool_calls = [
-                get_tool_call_from_text(output_text, self.tool_name_key, self.tool_arguments_key)
+                get_tool_call_from_text(output_text, self.tool_name_key, self.tool_arguments_keys)
             ]
         return chat_message
 
@@ -586,6 +600,8 @@ def __call__(
         for _ in self.stream_generate(self.model, self.tokenizer, prompt=prompt_ids, **completion_kwargs):
             self.last_output_token_count += 1
             text += _.text
+
+            found_stop_sequence = False
             for stop_sequence in prepared_stop_sequences:
                 stop_sequence_start = text.rfind(stop_sequence)
                 if stop_sequence_start != -1:
@@ -804,7 +820,7 @@ def __call__(
         )
         if tools_to_call_from:
             chat_message.tool_calls = [
-                get_tool_call_from_text(output_text, self.tool_name_key, self.tool_arguments_key)
+                get_tool_call_from_text(output_text, self.tool_name_key, self.tool_arguments_keys)
             ]
         return chat_message
 
@@ -818,9 +834,11 @@ def postprocess_message(self, message: ChatMessage, tools_to_call_from) -> ChatM
         message.role = MessageRole.ASSISTANT  # Overwrite role if needed
         if tools_to_call_from:
             if not message.tool_calls:
-                message.tool_calls = [
-                    get_tool_call_from_text(message.content, self.tool_name_key, self.tool_arguments_key)
-                ]
+                message.tool_calls = (
+                    [get_tool_call_from_text(message.content, self.tool_name_key, self.tool_arguments_keys)]
+                    if message.content
+                    else []
+                )
             for tool_call in message.tool_calls:
                 tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
         return message
diff --git a/src/smolagents/utils.py b/src/smolagents/utils.py
@@ -136,25 +136,47 @@ def make_json_serializable(obj: Any) -> Any:
 
 
 def parse_json_blob(json_blob: str) -> Tuple[Dict[str, str], str]:
-    "Extracts the JSON blob from the input and returns the JSON data and the rest of the input."
+    "Extracts the first valid JSON blob from the input and returns the JSON data and the rest of the input."
+    first_accolade_index = json_blob.find("{")
+    if first_accolade_index == -1:
+        raise ValueError("No JSON object found in input.")
+
+    brace_count = 0
+    end_index = None
+
+    # Iterate from first '{' to find balanced closing '}'
+    for i in range(first_accolade_index, len(json_blob)):
+        char = json_blob[i]
+        if char == "{":
+            brace_count += 1
+        elif char == "}":
+            brace_count -= 1
+            if brace_count == 0:
+                end_index = i
+                break
+
+    if end_index is None:
+        raise ValueError("No complete JSON object found: braces are not balanced.")
+
+    json_str = json_blob[first_accolade_index : end_index + 1]
     try:
-        first_accolade_index = json_blob.find("{")
-        last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1]
-        json_data = json_blob[first_accolade_index : last_accolade_index + 1]
-        json_data = json.loads(json_data, strict=False)
-        return json_data, json_blob[:first_accolade_index]
+        json_data = json.loads(json_str)
     except json.JSONDecodeError as e:
         place = e.pos
-        if json_blob[place - 1 : place + 2] == "},\n":
+        if json_str[place - 1 : place + 2] == "},\n":
             raise ValueError(
                 "JSON is invalid: you probably tried to provide multiple tool calls in one action. PROVIDE ONLY ONE TOOL CALL."
             )
         raise ValueError(
             f"The JSON blob you used is invalid due to the following error: {e}.\n"
-            f"JSON blob was: {json_blob}, decoding failed on that specific part of the blob:\n"
-            f"'{json_blob[place - 4 : place + 5]}'."
+            f"JSON blob was: {json_str}, decoding failed on that specific part of the blob:\n"
+            f"'{json_str[place - 4 : place + 5]}'."
         )
 
+    # The prefix BEFORE JSON (if needed)
+    prefix = json_blob[:first_accolade_index]
+    return json_data, prefix
+
 
 def parse_code_blobs(text: str) -> str:
     """Extract code blocs from the LLM's output.
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -457,3 +457,9 @@ def test_get_tool_call_from_text_numeric_args(self):
         result = get_tool_call_from_text(text, "name", "arguments")
         assert result.function.name == "calculator"
         assert result.function.arguments == 42
+
+    def test_get_tool_call_from_text_with_mutltiple_argument_keys(self):
+        text = '{"name": "calculator", "parameters": 42}'
+        result = get_tool_call_from_text(text, "name", ["arguments", "parameters"])
+        assert result.function.name == "calculator"
+        assert result.function.arguments == 42

Original file line number	Diff line number	Diff line change
`@@ -602,7 +602,7 @@ def execute_tool_call(self, tool_name: str, arguments: Union[Dict[str, str], str`
`602`	`602`	`elif tool_name in self.managed_agents:`
`603`	`603`	`error_msg = (`
`604`	`604`	`f"Error in calling team member: {e}\nYou should only ask this team member with a correct request.\n"`
`605`		`- f"As a reminder, this team member's description is the following:\n{available_tools[tool_name]}"`
	`605`	`+ f"As a reminder, this team member's description is the following:\n{available_tools[tool_name].description}"`
`606`	`606`	`)`
`607`	`607`	`raise AgentExecutionError(error_msg, self.logger)`
`608`	`608`