-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample_rag_application.py
51 lines (42 loc) · 2.23 KB
/
sample_rag_application.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Prompt Used:
###
#"I want to build a Retrieval-Augmented Generation (RAG) chatbot using LangChain to process a PDF document. The chatbot should:
#Load the PDF file and extract its content using LangChain's PyPDFLoader.
#Split the text into chunks of 1000 characters with an overlap of 200 characters using RecursiveCharacterTextSplitter.
#Create embeddings for the text using OpenAIEmbeddings and store them in a FAISS vector store.
#Set up a retrieval-based pipeline using LangChain's RetrievalQA with ChatOpenAI as the LLM.
#Allow me to query the chatbot, and it should provide answers based on the document content.
#Here's the file name: 'Cape Coral, FL Resiliency Plan 2017.pdf'.
#Please also include secure handling for the OpenAI API key using Python's getpass and os modules. Write this as complete Python code."
###
#Note: Due to rapid development of LangChain, the import statements generated by LLMs are often outdated. This code has been updated to use the latest LangChain imports.
import os
import getpass
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
# Securely input the OpenAI API key
api_key = getpass.getpass("Enter your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = api_key
# Load the PDF file
pdf_loader = PyPDFLoader("Cape Coral, FL Resiliency Plan 2017.pdf")
documents = pdf_loader.load()
# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
# Create embeddings for the text
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(chunks, embeddings)
# Set up the retrieval-based pipeline
llm = ChatOpenAI(temperature=0.7)
retrieval_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
# Query the chatbot
while True:
query = input("Enter your query (or 'exit' to quit): ")
if query.lower() == 'exit':
break
response = retrieval_chain.invoke(query)
print("Response:", response)