-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchat_handler.py
64 lines (53 loc) · 2.09 KB
/
chat_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import asyncio
from openai import AsyncOpenAI
import os
import nltk
from nltk.corpus import wordnet
from rag_engine import RAGEngine # Import the class, not an instance
from dotenv import load_dotenv
import tiktoken
# Load environment variables
load_dotenv()
# Set up your OpenAI API configuration
client = AsyncOpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_API_BASE")
)
# Download NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')
def expand_query(query: str) -> str:
expanded_terms = []
for word in query.split():
synonyms = wordnet.synsets(word)
if synonyms:
expanded_terms.append(synonyms[0].lemmas()[0].name())
else:
expanded_terms.append(word)
return " ".join(expanded_terms)
def num_tokens_from_string(string: str, encoding_name: str) -> int:
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
async def handle_chat_message(message: str, rag_engine: RAGEngine):
# Use the provided rag_engine instance
expanded_message = expand_query(message)
relevant_docs = await rag_engine.query(expanded_message)
context = " ".join(doc['content'] for doc in relevant_docs)
# Limit context to approximately 6000 tokens (leaving room for the system message and user question)
while num_tokens_from_string(context, "cl100k_base") > 6000:
context = context[:int(len(context)*0.9)] # Reduce context by 10% each iteration
messages = [
{"role": "system", "content": "You are a helpful assistant. Use the provided context to answer questions."},
{"role": "user", "content": f"Context: {context}\n\nQuestion: {message}"}
]
stream = await client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
stream=True,
max_tokens=1000 # Limit the response length
)
async for chunk in stream:
if chunk.choices[0].delta.content is not None:
yield f"data: {chunk.choices[0].delta.content}\n\n"
await asyncio.sleep(0.1)