diff --git a/cookbook/rag_with_faiss.ipynb b/cookbook/rag_with_faiss.ipynb index e14365a3af63a..ef48b0e558710 100644 --- a/cookbook/rag_with_faiss.ipynb +++ b/cookbook/rag_with_faiss.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "c96cd71a-510d-46a2-a06b-8839818e2196", "metadata": {}, "outputs": [ @@ -22,78 +22,77 @@ "text": [ "Requirement already satisfied: langchain in /home/dheerajreddy/lctest/lib/python3.10/site-packages (0.3.14)\n", "Requirement already satisfied: langchain-community in /home/dheerajreddy/lctest/lib/python3.10/site-packages (0.3.14)\n", + "Requirement already satisfied: openai in /home/dheerajreddy/lctest/lib/python3.10/site-packages (1.59.8)\n", "Requirement already satisfied: langchain_openai in /home/dheerajreddy/lctest/lib/python3.10/site-packages (0.3.0)\n", "Requirement already satisfied: faiss-cpu in /home/dheerajreddy/lctest/lib/python3.10/site-packages (1.9.0.post1)\n", "Requirement already satisfied: pypdf in /home/dheerajreddy/lctest/lib/python3.10/site-packages (5.1.0)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.0.37)\n", "Requirement already satisfied: langsmith<0.3,>=0.1.17 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.2.11)\n", - "Requirement already satisfied: requests<3,>=2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.32.3)\n", "Requirement already satisfied: numpy<2,>=1.22.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (1.26.4)\n", "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (3.11.11)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.10.5)\n", - "Requirement already satisfied: langchain-core<0.4.0,>=0.3.29 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.30)\n", "Requirement already satisfied: PyYAML>=5.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (6.0.2)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.5)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (9.0.0)\n", + "Requirement already satisfied: requests<3,>=2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.32.3)\n", "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (4.0.3)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.10.5)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.0.37)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (9.0.0)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.5)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.29 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.30)\n", + "Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (0.4.0)\n", "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (0.6.7)\n", "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (2.7.1)\n", - "Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (0.4.0)\n", + "Requirement already satisfied: tqdm>4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.11 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (4.12.2)\n", + "Requirement already satisfied: sniffio in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (1.3.1)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (0.8.2)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (0.28.1)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (4.8.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (1.9.0)\n", "Requirement already satisfied: tiktoken<1,>=0.7 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain_openai) (0.8.0)\n", - "Requirement already satisfied: openai<2.0.0,>=1.58.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain_openai) (1.59.8)\n", "Requirement already satisfied: packaging in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from faiss-cpu) (24.2)\n", - "Requirement already satisfied: typing_extensions>=4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pypdf) (4.12.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n", "Requirement already satisfied: propcache>=0.2.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n", + "Requirement already satisfied: idna>=2.8 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.10)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n", "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.25.1)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n", + "Requirement already satisfied: httpcore==1.* in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (1.0.7)\n", + "Requirement already satisfied: certifi in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (2024.12.14)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.29->langchain) (1.33)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (1.0.0)\n", "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (3.10.14)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (0.28.1)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.8.0)\n", - "Requirement already satisfied: sniffio in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.67.1)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (0.8.2)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.9.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (1.0.0)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n", "Requirement already satisfied: python-dotenv>=0.21.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community) (1.0.1)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2024.12.14)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.10)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.4.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.3.0)\n", "Requirement already satisfied: greenlet!=0.4.17 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n", "Requirement already satisfied: regex>=2022.1.18 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from tiktoken<1,>=0.7->langchain_openai) (2024.11.6)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.58.1->langchain_openai) (1.2.2)\n", - "Requirement already satisfied: httpcore==1.* in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain) (0.14.0)\n", "Requirement already satisfied: jsonpointer>=1.9 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.29->langchain) (3.0.0)\n", "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n" ] } ], "source": [ - "! pip install -U langchain langchain-community langchain_openai faiss-cpu pypdf # (newest versions required for multi-modal)" + "! pip install -U langchain langchain-community openai langchain_openai faiss-cpu pypdf # (newest versions required for multi-modal)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "d033c505-c805-49cd-905d-97edf606113d", "metadata": {}, "outputs": [], "source": [ "#Import all necessary libraries\n", - "# from langchain_community.chat_models import ChatOpenAI\n", + "import requests\n", "from langchain_openai import ChatOpenAI\n", "from langchain_community.vectorstores import FAISS\n", - "# from langchain_community.embeddings import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n", "from langchain_core.prompts import PromptTemplate\n", "from langchain_community.document_loaders import PyPDFLoader,TextLoader\n", @@ -105,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "4196d408-efe1-442c-8d8a-856a002ab8ac", "metadata": { "scrolled": true @@ -129,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "c4eb93a9-6e6a-4ab9-822b-1e5106e30258", "metadata": {}, "outputs": [], @@ -154,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "9d8e77bc-0e5e-4d8f-8475-d9248b048817", "metadata": {}, "outputs": [], @@ -212,19 +211,42 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "c25b33fe-8039-4bbd-a3af-c829179325b5", + "execution_count": 7, + "id": "57fb42dd-684a-4c80-917d-37feaa41a6ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PDF downloaded successfully as Nike_q4_report.pdf\n", + "PDF downloaded successfully as puma_q4_report.pdf\n" + ] + } + ], "source": [ - "#add all the file paths, you want to create a retrieval chatbot for\n", - "file_paths = ['../docs/docs/example_data/nike-q3-2024-earnings.pdf',\n", - " '../docs/docs/example_data/puma-q3-2024-earnings.pdf']" + "# Download sample files for RAG\n", + "def download_pdf(url, filename):\n", + " response = requests.get(url)\n", + " if response.status_code == 200:\n", + " with open(filename, 'wb') as file:\n", + " file.write(response.content)\n", + " print(f\"PDF downloaded successfully as {filename}\")\n", + " else:\n", + " print(f\"Failed to download PDF. Status code: {response.status_code}\")\n", + "\n", + "download_urls = ['https://s1.q4cdn.com/806093406/files/doc_financials/2024/q3/FY24-Q3-Combined-NIKE-Press-Release-Schedules-FINAL.pdf',\n", + " 'https://about.puma.com/sites/default/files/financial-report/2024/puma-q3-2024-release-english-final.pdf']\n", + "\n", + "filepaths = ['Nike_q4_report.pdf','puma_q4_report.pdf']\n", + "\n", + "for i in range(len(filepaths)):\n", + " download_pdf(download_urls[i], filepaths[i])\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "3b0ced3b-890d-4fd4-9b3e-849e73451210", "metadata": {}, "outputs": [ @@ -232,21 +254,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "['../docs/docs/example_data/nike-q3-2024-earnings.pdf', '../docs/docs/example_data/puma-q3-2024-earnings.pdf']\n", - "Processing file: ../docs/docs/example_data/nike-q3-2024-earnings.pdf\n", - "Processing file: ../docs/docs/example_data/puma-q3-2024-earnings.pdf\n", + "['Nike_q4_report.pdf', 'puma_q4_report.pdf']\n", + "Processing file: Nike_q4_report.pdf\n", + "Processing file: puma_q4_report.pdf\n", "49\n" ] } ], "source": [ "#Create Vector embeddings\n", - "process_documents(file_paths)" + "process_documents(filepaths)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "e261b2c3-d62e-428f-a615-2643e2c3f79d", "metadata": {}, "outputs": [ @@ -266,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "id": "723f652b-ca89-4f9e-9b39-60c451336d2e", "metadata": {}, "outputs": [ @@ -274,13 +296,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "It is difficult to determine which company's financials are better based on the given context as both Nike and Puma present different aspects of their financial performance. Nike reported revenues of $12.4 billion in the third quarter of fiscal year 2024, while Puma emphasized its focus on managing short-term challenges without compromising long-term momentum and positive feedback on upcoming product releases. Additional analysis and comparison of financial statements would be needed to determine which company's financial condition is better.\n" + "Based on the provided context, PUMA appears to be performing better than Nike. PUMA has achieved various successes in athletics and sports partnerships, as well as positive feedback from consumers and retail partners. The brand's momentum and growth are highlighted, indicating a strong performance compared to Nike.\n" ] } ], "source": [ "#query the documents\n", - "question = 'Among Nike and Puma whose financials are better'\n", + "question = 'Which company is performing better in between Nike and Puma'\n", "response = qa_chain.invoke(question)\n", "print(response)" ] @@ -288,7 +310,7 @@ { "cell_type": "code", "execution_count": null, - "id": "57fb42dd-684a-4c80-917d-37feaa41a6ea", + "id": "5f5f6a9f-02fb-42e1-8268-bfaef2956a3d", "metadata": {}, "outputs": [], "source": [] diff --git a/docs/docs/example_data/nike-q3-2024-earnings.pdf b/docs/docs/example_data/nike-q3-2024-earnings.pdf deleted file mode 100644 index 78f41618697be..0000000000000 Binary files a/docs/docs/example_data/nike-q3-2024-earnings.pdf and /dev/null differ diff --git a/docs/docs/example_data/puma-q3-2024-earnings.pdf b/docs/docs/example_data/puma-q3-2024-earnings.pdf deleted file mode 100644 index 350cbf5722f1f..0000000000000 Binary files a/docs/docs/example_data/puma-q3-2024-earnings.pdf and /dev/null differ