Skip to content

Commit 4e86d75

Browse files
authored
Merge pull request #165 from rohrao/patch-1
Update main.py for new API Catalog models
2 parents 0be681f + c484e5a commit 4e86d75

File tree

3 files changed

+79
-53
lines changed

3 files changed

+79
-53
lines changed

community/5_mins_rag_no_gpu/README.md

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# RAG in 5 Minutes
2+
3+
This implementation is tied to the [YouTube video on NVIDIA Developer](https://youtu.be/N_OOfkEWcOk).
4+
5+
This is a simple standalone implementation showing a minimal RAG pipeline that uses models available from [NVIDIA API Catalog](https://catalog.ngc.nvidia.com/ai-foundation-models).
6+
The catalog enables you to experience state-of-the-art LLMs accelerated by NVIDIA.
7+
Developers get free credits for 10K requests to any of the models.
8+
9+
The example uses an [integration package to LangChain](https://python.langchain.com/docs/integrations/providers/nvidia) to access the models.
10+
NVIDIA engineers develop, test, and maintain the open source integration.
11+
This example uses a simple [Streamlit](https://streamlit.io/) based user interface and has a one-file implementation.
12+
Because the example uses the models from the NVIDIA API Catalog, you do not need a GPU to run the example.
13+
14+
### Steps
15+
16+
1. Create a python virtual environment and activate it:
17+
18+
```comsole
19+
python3 -m virtualenv genai
20+
source genai/bin/activate
21+
```
22+
23+
1. From the root of this repository, `GenerativeAIExamples`, install the requirements:
24+
25+
```console
26+
pip install -r community/5_mins_rag_no_gpu/requirements.txt
27+
```
28+
29+
1. Add your NVIDIA API key as an environment variable:
30+
31+
```console
32+
export NVIDIA_API_KEY="nvapi-*"
33+
```
34+
35+
If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`.
36+
37+
1. Run the example using Streamlit:
38+
39+
```console
40+
streamlit run community/5_mins_rag_no_gpu/main.py
41+
```
42+
43+
1. Test the deployed example by going to `http://<host_ip>:8501` in a web browser.
44+
45+
Click **Browse Files** and select your knowledge source.
46+
After selecting, click **Upload!** to complete the ingestion process.
47+
48+
You are all set now! Try out queries related to the knowledge base using text from the user interface.

community/5_mins_rag_no_gpu/main.py

+30-52
Original file line numberDiff line numberDiff line change
@@ -16,62 +16,45 @@
1616
# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational models.
1717
# It uses a simple Streamlit UI and one file implementation of a minimalistic RAG pipeline.
1818

19-
############################################
20-
# Component #1 - Document Loader
21-
############################################
22-
2319
import streamlit as st
2420
import os
21+
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
22+
from langchain.text_splitter import CharacterTextSplitter
23+
from langchain_community.document_loaders import DirectoryLoader
24+
from langchain_community.vectorstores import FAISS
25+
import pickle
26+
from langchain_core.output_parsers import StrOutputParser
27+
from langchain_core.prompts import ChatPromptTemplate
2528

26-
st.set_page_config(layout = "wide")
29+
st.set_page_config(layout="wide")
2730

31+
# Component #1 - Document Upload
2832
with st.sidebar:
2933
DOCS_DIR = os.path.abspath("./uploaded_docs")
3034
if not os.path.exists(DOCS_DIR):
3135
os.makedirs(DOCS_DIR)
3236
st.subheader("Add to the Knowledge Base")
3337
with st.form("my-form", clear_on_submit=True):
34-
uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files = True)
38+
uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files=True)
3539
submitted = st.form_submit_button("Upload!")
3640

3741
if uploaded_files and submitted:
3842
for uploaded_file in uploaded_files:
3943
st.success(f"File {uploaded_file.name} uploaded successfully!")
40-
with open(os.path.join(DOCS_DIR, uploaded_file.name),"wb") as f:
44+
with open(os.path.join(DOCS_DIR, uploaded_file.name), "wb") as f:
4145
f.write(uploaded_file.read())
4246

43-
############################################
4447
# Component #2 - Embedding Model and LLM
45-
############################################
48+
llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
49+
document_embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage")
4650

47-
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
48-
49-
# make sure to export your NVIDIA AI Playground key as NVIDIA_API_KEY!
50-
llm = ChatNVIDIA(model="ai-llama3-70b")
51-
document_embedder = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="passage")
52-
query_embedder = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="query")
53-
54-
############################################
5551
# Component #3 - Vector Database Store
56-
############################################
57-
58-
from langchain.text_splitter import CharacterTextSplitter
59-
from langchain_community.document_loaders import DirectoryLoader
60-
from langchain_community.vectorstores import FAISS
61-
import pickle
62-
6352
with st.sidebar:
64-
# Option for using an existing vector store
6553
use_existing_vector_store = st.radio("Use existing vector store if available", ["Yes", "No"], horizontal=True)
6654

67-
# Path to the vector store file
6855
vector_store_path = "vectorstore.pkl"
69-
70-
# Load raw documents from the directory
7156
raw_documents = DirectoryLoader(DOCS_DIR).load()
7257

73-
74-
# Check for existing vector store file
7558
vector_store_exists = os.path.exists(vector_store_path)
7659
vectorstore = None
7760
if use_existing_vector_store == "Yes" and vector_store_exists:
@@ -81,9 +64,9 @@
8164
st.success("Existing vector store loaded successfully.")
8265
else:
8366
with st.sidebar:
84-
if raw_documents:
67+
if raw_documents and use_existing_vector_store == "Yes":
8568
with st.spinner("Splitting documents into chunks..."):
86-
text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
69+
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=200)
8770
documents = text_splitter.split_documents(raw_documents)
8871

8972
with st.spinner("Adding document chunks to vector database..."):
@@ -96,10 +79,7 @@
9679
else:
9780
st.warning("No documents available to process!", icon="⚠️")
9881

99-
############################################
10082
# Component #4 - LLM Response Generation and Chat
101-
############################################
102-
10383
st.subheader("Chat with your AI Assistant, Envie!")
10484

10585
if "messages" not in st.session_state:
@@ -109,34 +89,32 @@
10989
with st.chat_message(message["role"]):
11090
st.markdown(message["content"])
11191

112-
from langchain_core.output_parsers import StrOutputParser
113-
from langchain_core.prompts import ChatPromptTemplate
114-
115-
prompt_template = ChatPromptTemplate.from_messages(
116-
[("system", "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."), ("user", "{input}")]
117-
)
118-
user_input = st.chat_input("Can you tell me what NVIDIA is known for?")
119-
llm = ChatNVIDIA(model="ai-llama3-70b")
92+
prompt_template = ChatPromptTemplate.from_messages([
93+
("system", "You are a helpful AI assistant named Envie. If provided with context, use it to inform your responses. If no context is available, use your general knowledge to provide a helpful response."),
94+
("human", "{input}")
95+
])
12096

12197
chain = prompt_template | llm | StrOutputParser()
12298

123-
if user_input and vectorstore!=None:
99+
user_input = st.chat_input("Can you tell me what NVIDIA is known for?")
100+
101+
if user_input:
124102
st.session_state.messages.append({"role": "user", "content": user_input})
125-
retriever = vectorstore.as_retriever()
126-
docs = retriever.invoke(user_input)
127103
with st.chat_message("user"):
128104
st.markdown(user_input)
129105

130-
context = ""
131-
for doc in docs:
132-
context += doc.page_content + "\n\n"
133-
134-
augmented_user_input = "Context: " + context + "\n\nQuestion: " + user_input + "\n"
135-
136106
with st.chat_message("assistant"):
137107
message_placeholder = st.empty()
138108
full_response = ""
139109

110+
if vectorstore is not None and use_existing_vector_store == "Yes":
111+
retriever = vectorstore.as_retriever()
112+
docs = retriever.invoke(user_input)
113+
context = "\n\n".join([doc.page_content for doc in docs])
114+
augmented_user_input = f"Context: {context}\n\nQuestion: {user_input}\n"
115+
else:
116+
augmented_user_input = f"Question: {user_input}\n"
117+
140118
for response in chain.stream({"input": augmented_user_input}):
141119
full_response += response
142120
message_placeholder.markdown(full_response + "▌")

community/5_mins_rag_no_gpu/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ streamlit==1.30.0
22
faiss-cpu==1.7.4
33
langchain==0.1.20
44
unstructured[all-docs]==0.11.2
5-
langchain-nvidia-ai-endpoints==0.0.19
5+
langchain-nvidia-ai-endpoints

0 commit comments

Comments
 (0)