Merge pull request #957 from ScrapeGraphAI/pre/beta

VinciGit00 · web-flow · commit 0ad6abe2200c · 2025-03-21T09:25:06.000+01:00
Pre/beta
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.43.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.43.0...v1.43.1-beta.1) (2025-03-21)
+
+
+### Bug Fixes
+
+* Fixes schema option not working ([df1645c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/df1645c5ebc6bc2362992fec3887dcbedf519ba9))
+
 ## [1.43.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.42.1...v1.43.0) (2025-03-13)
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.43.0"
+version = "1.43.1b1"
 
 
 
@@ -31,7 +31,8 @@ dependencies = [
     "async-timeout>=4.0.3",
     "simpleeval>=1.0.0",
     "jsonschema>=4.23.0",
-    "duckduckgo-search>=7.2.1"
+    "duckduckgo-search>=7.2.1",
+    "pydantic>=2.10.2",
 ]
 
 readme = "README.md"
diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
@@ -2,8 +2,8 @@
 GenerateAnswerNode Module
 """
 
-import time
 import json
+import time
 from typing import List, Optional
 
 from langchain.prompts import PromptTemplate
@@ -105,10 +105,7 @@ def process(self, state: dict) -> dict:
             raise ValueError("No user prompt found in state")
 
         # Create the chain input with both content and question keys
-        chain_input = {
-            "content": content,
-            "question": user_prompt
-        }
+        chain_input = {"content": content, "question": user_prompt}
 
         try:
             response = self.invoke_with_timeout(self.chain, chain_input, self.timeout)
@@ -167,25 +164,13 @@ def execute(self, state: dict) -> dict:
             and not self.script_creator
             or self.is_md_scraper
         ):
-            template_no_chunks_prompt = (
-                TEMPLATE_NO_CHUNKS_MD + "\n\nIMPORTANT: " + format_instructions
-            )
-            template_chunks_prompt = (
-                TEMPLATE_CHUNKS_MD + "\n\nIMPORTANT: " + format_instructions
-            )
-            template_merge_prompt = (
-                TEMPLATE_MERGE_MD + "\n\nIMPORTANT: " + format_instructions
-            )
+            template_no_chunks_prompt = TEMPLATE_NO_CHUNKS_MD
+            template_chunks_prompt = TEMPLATE_CHUNKS_MD
+            template_merge_prompt = TEMPLATE_MERGE_MD
         else:
-            template_no_chunks_prompt = (
-                TEMPLATE_NO_CHUNKS + "\n\nIMPORTANT: " + format_instructions
-            )
-            template_chunks_prompt = (
-                TEMPLATE_CHUNKS + "\n\nIMPORTANT: " + format_instructions
-            )
-            template_merge_prompt = (
-                TEMPLATE_MERGE + "\n\nIMPORTANT: " + format_instructions
-            )
+            template_no_chunks_prompt = TEMPLATE_NO_CHUNKS
+            template_chunks_prompt = TEMPLATE_CHUNKS
+            template_merge_prompt = TEMPLATE_MERGE
 
         if self.additional_info is not None:
             template_no_chunks_prompt = self.additional_info + template_no_chunks_prompt
@@ -210,8 +195,14 @@ def execute(self, state: dict) -> dict:
                     chain, {"question": user_prompt}, self.timeout
                 )
             except (Timeout, json.JSONDecodeError) as e:
-                error_msg = "Response timeout exceeded" if isinstance(e, Timeout) else "Invalid JSON response format"
-                state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
+                error_msg = (
+                    "Response timeout exceeded"
+                    if isinstance(e, Timeout)
+                    else "Invalid JSON response format"
+                )
+                state.update(
+                    {self.output[0]: {"error": error_msg, "raw_response": str(e)}}
+                )
                 return state
 
             state.update({self.output[0]: answer})
@@ -241,7 +232,11 @@ def execute(self, state: dict) -> dict:
                 async_runner, {"question": user_prompt}, self.timeout
             )
         except (Timeout, json.JSONDecodeError) as e:
-            error_msg = "Response timeout exceeded during chunk processing" if isinstance(e, Timeout) else "Invalid JSON response format in chunk processing"
+            error_msg = (
+                "Response timeout exceeded during chunk processing"
+                if isinstance(e, Timeout)
+                else "Invalid JSON response format in chunk processing"
+            )
             state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
             return state
 
@@ -261,7 +256,11 @@ def execute(self, state: dict) -> dict:
                 self.timeout,
             )
         except (Timeout, json.JSONDecodeError) as e:
-            error_msg = "Response timeout exceeded during merge" if isinstance(e, Timeout) else "Invalid JSON response format during merge"
+            error_msg = (
+                "Response timeout exceeded during merge"
+                if isinstance(e, Timeout)
+                else "Invalid JSON response format during merge"
+            )
             state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
             return state
 
diff --git a/tests/graphs/smart_scraper_openai_test.py b/tests/graphs/smart_scraper_openai_test.py
@@ -6,6 +6,7 @@
 
 import pytest
 from dotenv import load_dotenv
+from pydantic import BaseModel
 
 from scrapegraphai.graphs import SmartScraperGraph
 
@@ -53,3 +54,27 @@ def test_get_execution_info(graph_config):
     graph_exec_info = smart_scraper_graph.get_execution_info()
 
     assert graph_exec_info is not None
+
+
+def test_get_execution_info_with_schema(graph_config):
+    """Get the execution info with schema"""
+
+    class ProjectSchema(BaseModel):
+        title: str
+        description: str
+
+    class ProjectListSchema(BaseModel):
+        projects: list[ProjectSchema]
+
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all the projects with their description.",
+        source="https://perinim.github.io/projects/",
+        config=graph_config,
+        schema=ProjectListSchema,
+    )
+
+    smart_scraper_graph.run()
+
+    graph_exec_info = smart_scraper_graph.get_execution_info()
+
+    assert graph_exec_info is not None
diff --git a/uv.lock b/uv.lock