From e0217b008587a5913015ddc3044f9e77e59889fa Mon Sep 17 00:00:00 2001 From: Kaustav Ghosh Date: Thu, 30 Jan 2025 01:57:56 +0530 Subject: [PATCH 1/4] Update Azure RAG notebook with environment variable support and improved error handling - Added support for loading credentials from .env file - Improved Azure OpenAI configuration with optional environment variables - Enhanced error handling for OpenAI API calls - Updated notebook to use python-dotenv for credential management - Increased embedding dimensions from 1536 to 3072 in index configuration - Minor text corrections in markdown cells --- azure/.env.sample | 15 + .../RAG_with_Couchbase_and_AzureOpenAI.ipynb | 2824 +++++++++++++---- azure/azure_index.json | 2 +- 3 files changed, 2147 insertions(+), 694 deletions(-) create mode 100644 azure/.env.sample diff --git a/azure/.env.sample b/azure/.env.sample new file mode 100644 index 00000000..0ce62469 --- /dev/null +++ b/azure/.env.sample @@ -0,0 +1,15 @@ +AZURE_OPENAI_KEY = "" +AZURE_OPENAI_ENDPOINT = "" +AZURE_OPENAI_EMBEDDING_DEPLOYMENT = "" +AZURE_OPENAI_CHAT_DEPLOYMENT = "" +AZURE_OPENAI_API_VERSION = "" + +CB_HOST="" +CB_USERNAME="" +CB_PASSWORD="" +CB_BUCKET_NAME="" + +INDEX_NAME="" +SCOPE_NAME="" +COLLECTION_NAME="" +CACHE_COLLECTION="" \ No newline at end of file diff --git a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb index ef7391a3..26fab0a8 100644 --- a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb +++ b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb @@ -33,7 +33,7 @@ "\n", "## Create and Deploy Your Free Tier Operational cluster on Capella\n", "\n", - "To get started with Couchbase Capella, create an account and use it to deploy a forever free tier operational cluster. This account provides you with a environment where you can explore and learn about Capella with no time constraint.\n", + "To get started with Couchbase Capella, create an account and use it to deploy a forever free tier operational cluster. This account provides you with an environment where you can explore and learn about Capella with no time constraint.\n", "\n", "To know more, please follow the [instructions](https://docs.couchbase.com/cloud/get-started/create-account.html).\n", "\n", @@ -70,135 +70,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Collecting datasets\n", - " Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)\n", - "Collecting langchain-couchbase\n", - " Downloading langchain_couchbase-0.1.1-py3-none-any.whl.metadata (1.9 kB)\n", - "Collecting langchain-openai\n", - " Downloading langchain_openai-0.1.23-py3-none-any.whl.metadata (2.6 kB)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.15.4)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n", - "Collecting pyarrow>=15.0.0 (from datasets)\n", - " Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", - " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.1.4)\n", - "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.5)\n", - "Collecting xxhash (from datasets)\n", - " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess (from datasets)\n", - " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n", - "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets) (2024.6.1)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.5)\n", - "Requirement already satisfied: huggingface-hub>=0.21.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.24.6)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n", - "Collecting couchbase<5.0.0,>=4.2.1 (from langchain-couchbase)\n", - " Downloading couchbase-4.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (23 kB)\n", - "Collecting langchain-core<0.3,>=0.2.0 (from langchain-couchbase)\n", - " Downloading langchain_core-0.2.38-py3-none-any.whl.metadata (6.2 kB)\n", - "Collecting openai<2.0.0,>=1.40.0 (from langchain-openai)\n", - " Downloading openai-1.43.1-py3-none-any.whl.metadata (22 kB)\n", - "Collecting tiktoken<1,>=0.7 (from langchain-openai)\n", - " Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.21.2->datasets) (4.12.2)\n", - "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3,>=0.2.0->langchain-couchbase)\n", - " Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\n", - "Collecting langsmith<0.2.0,>=0.1.75 (from langchain-core<0.3,>=0.2.0->langchain-couchbase)\n", - " Downloading langsmith-0.1.115-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.2.0->langchain-couchbase) (2.8.2)\n", - "Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain-core<0.3,>=0.2.0->langchain-couchbase)\n", - " Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (1.7.0)\n", - "Collecting httpx<1,>=0.23.0 (from openai<2.0.0,>=1.40.0->langchain-openai)\n", - " Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n", - "Collecting jiter<1,>=0.4.0 (from openai<2.0.0,>=1.40.0->langchain-openai)\n", - " Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (1.3.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.8)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.8.30)\n", - "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<1,>=0.7->langchain-openai) (2024.5.15)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.1)\n", - "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.1)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.40.0->langchain-openai) (1.2.2)\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai<2.0.0,>=1.40.0->langchain-openai)\n", - " Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n", - "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.40.0->langchain-openai)\n", - " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", - "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3,>=0.2.0->langchain-couchbase)\n", - " Downloading jsonpointer-3.0.0-py2.py3-none-any.whl.metadata (2.3 kB)\n", - "Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3,>=0.2.0->langchain-couchbase)\n", - " Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.2.0->langchain-couchbase) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.2.0->langchain-couchbase) (2.20.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", - "Downloading datasets-2.21.0-py3-none-any.whl (527 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m527.3/527.3 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading langchain_couchbase-0.1.1-py3-none-any.whl (13 kB)\n", - "Downloading langchain_openai-0.1.23-py3-none-any.whl (51 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.0/52.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading couchbase-4.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.1/5.1 MB\u001b[0m \u001b[31m59.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading langchain_core-0.2.38-py3-none-any.whl (396 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m396.4/396.4 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading openai-1.43.1-py3-none-any.whl (365 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m365.7/365.7 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.9/318.9 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", - "Downloading langsmith-0.1.115-py3-none-any.whl (290 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.2/290.2 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tenacity-8.5.0-py3-none-any.whl (28 kB)\n", - "Downloading jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n", - "Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: xxhash, tenacity, pyarrow, orjson, jsonpointer, jiter, h11, dill, couchbase, tiktoken, multiprocess, jsonpatch, httpcore, httpx, openai, langsmith, datasets, langchain-core, langchain-openai, langchain-couchbase\n", - " Attempting uninstall: tenacity\n", - " Found existing installation: tenacity 9.0.0\n", - " Uninstalling tenacity-9.0.0:\n", - " Successfully uninstalled tenacity-9.0.0\n", - " Attempting uninstall: pyarrow\n", - " Found existing installation: pyarrow 14.0.2\n", - " Uninstalling pyarrow-14.0.2:\n", - " Successfully uninstalled pyarrow-14.0.2\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", - "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed couchbase-4.3.1 datasets-2.21.0 dill-0.3.8 h11-0.14.0 httpcore-1.0.5 httpx-0.27.2 jiter-0.5.0 jsonpatch-1.33 jsonpointer-3.0.0 langchain-core-0.2.38 langchain-couchbase-0.1.1 langchain-openai-0.1.23 langsmith-0.1.115 multiprocess-0.70.16 openai-1.43.1 orjson-3.10.7 pyarrow-17.0.0 tenacity-8.5.0 tiktoken-0.7.0 xxhash-3.5.0\n" + "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ - "!pip install datasets langchain-couchbase langchain-openai" + "%pip install --quiet datasets langchain-couchbase langchain-openai python-dotenv" ] }, { @@ -222,21 +99,21 @@ "import getpass\n", "import json\n", "import logging\n", + "import os\n", "import sys\n", "import time\n", - "from datetime import timedelta\n", "from uuid import uuid4\n", + "from datetime import timedelta\n", "\n", "from couchbase.auth import PasswordAuthenticator\n", "from couchbase.cluster import Cluster\n", - "from couchbase.exceptions import (\n", - " CouchbaseException,\n", - " InternalServerFailureException,\n", - " QueryIndexAlreadyExistsException,\n", - ")\n", + "from couchbase.exceptions import (CouchbaseException,\n", + " InternalServerFailureException,\n", + " QueryIndexAlreadyExistsException)\n", "from couchbase.management.search import SearchIndex\n", "from couchbase.options import ClusterOptions\n", "from datasets import load_dataset\n", + "from dotenv import load_dotenv\n", "from langchain_core.documents import Document\n", "from langchain_core.globals import set_llm_cache\n", "from langchain_core.output_parsers import StrOutputParser\n", @@ -291,40 +168,24 @@ "id": "PFGyHll18mSe", "outputId": "50d09055-cf2e-4d8a-d025-cc1a6a2e3193" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Enter your Azure OpenAI Key: ··········\n", - "Enter your Azure OpenAI Endpoint: https://first-couchbase-instance.openai.azure.com/\n", - "Enter your Azure OpenAI Embedding Deployment: text-embedding-ada-002\n", - "Enter your Azure OpenAI Chat Deployment: gpt-4o\n", - "Enter your Couchbase host (default: couchbase://localhost): couchbases://cb.hlcup4o4jmjr55yf.cloud.couchbase.com\n", - "Enter your Couchbase username (default: Administrator): vector-search-rag-demos\n", - "Enter your Couchbase password (default: password): ··········\n", - "Enter your Couchbase bucket name (default: vector-search-testing): \n", - "Enter your index name (default: vector_search_azure): \n", - "Enter your scope name (default: shared): \n", - "Enter your collection name (default: azure): \n", - "Enter your cache collection name (default: cache): \n" - ] - } - ], + "outputs": [], "source": [ - "AZURE_OPENAI_KEY = getpass.getpass('Enter your Azure OpenAI Key: ')\n", - "AZURE_OPENAI_ENDPOINT = input('Enter your Azure OpenAI Endpoint: ')\n", - "AZURE_OPENAI_EMBEDDING_DEPLOYMENT = input('Enter your Azure OpenAI Embedding Deployment: ')\n", - "AZURE_OPENAI_CHAT_DEPLOYMENT = input('Enter your Azure OpenAI Chat Deployment: ')\n", + "load_dotenv()\n", + "\n", + "AZURE_OPENAI_KEY = os.getenv('AZURE_OPENAI_KEY') or getpass.getpass('Enter your Azure OpenAI Key: ')\n", + "AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT') or input('Enter your Azure OpenAI Endpoint: ')\n", + "AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT') or input('Enter your Azure OpenAI Embedding Deployment: ')\n", + "AZURE_OPENAI_CHAT_DEPLOYMENT = os.getenv('AZURE_OPENAI_CHAT_DEPLOYMENT') or input('Enter your Azure OpenAI Chat Deployment: ')\n", + "AZURE_OPENAI_API_VERSION = os.getenv('AZURE_OPENAI_API_VERSION') or input('Enter your Azure OpenAI API Version/Model Version: (for example: 2024-11-20 for gpt-4o) ') or '2024-11-20'\n", "\n", - "CB_HOST = input('Enter your Couchbase host (default: couchbase://localhost): ') or 'couchbase://localhost'\n", - "CB_USERNAME = input('Enter your Couchbase username (default: Administrator): ') or 'Administrator'\n", - "CB_PASSWORD = getpass.getpass('Enter your Couchbase password (default: password): ') or 'password'\n", - "CB_BUCKET_NAME = input('Enter your Couchbase bucket name (default: vector-search-testing): ') or 'vector-search-testing'\n", - "INDEX_NAME = input('Enter your index name (default: vector_search_azure): ') or 'vector_search_azure'\n", - "SCOPE_NAME = input('Enter your scope name (default: shared): ') or 'shared'\n", - "COLLECTION_NAME = input('Enter your collection name (default: azure): ') or 'azure'\n", - "CACHE_COLLECTION = input('Enter your cache collection name (default: cache): ') or 'cache'\n", + "CB_HOST = os.getenv('CB_HOST') or input('Enter your Couchbase host (default: couchbase://localhost): ') or 'couchbase://localhost'\n", + "CB_USERNAME = os.getenv('CB_USERNAME') or input('Enter your Couchbase username (default: Administrator): ') or 'Administrator'\n", + "CB_PASSWORD = os.getenv('CB_PASSWORD') or getpass.getpass('Enter your Couchbase password (default: password): ') or 'password'\n", + "CB_BUCKET_NAME = os.getenv('CB_BUCKET_NAME') or input('Enter your Couchbase bucket name (default: vector-search-testing): ') or 'vector-search-testing'\n", + "INDEX_NAME = os.getenv('INDEX_NAME') or input('Enter your index name (default: vector_search_azure): ') or 'vector_search_azure'\n", + "SCOPE_NAME = os.getenv('SCOPE_NAME') or input('Enter your scope name (default: shared): ') or 'shared'\n", + "COLLECTION_NAME = os.getenv('COLLECTION_NAME') or input('Enter your collection name (default: azure): ') or 'azure'\n", + "CACHE_COLLECTION = os.getenv('CACHE_COLLECTION') or input('Enter your cache collection name (default: cache): ') or 'cache'\n", "\n", "# Check if the variables are correctly loaded\n", "if not all([AZURE_OPENAI_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_EMBEDDING_DEPLOYMENT, AZURE_OPENAI_CHAT_DEPLOYMENT]):\n", @@ -357,7 +218,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:29:16,632 - INFO - Successfully connected to Couchbase\n" + "2025-01-30 01:19:35,508 - INFO - Successfully connected to Couchbase\n" ] } ], @@ -399,18 +260,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:29:17,029 - INFO - Collection 'azure' already exists.Skipping creation.\n", - "2024-09-06 07:29:17,095 - INFO - Primary index present or created successfully.\n", - "2024-09-06 07:29:17,775 - INFO - All documents cleared from the collection.\n", - "2024-09-06 07:29:17,841 - INFO - Collection 'cache' already exists.Skipping creation.\n", - "2024-09-06 07:29:17,907 - INFO - Primary index present or created successfully.\n", - "2024-09-06 07:29:17,973 - INFO - All documents cleared from the collection.\n" + "2025-01-30 01:19:37,927 - INFO - Collection 'azure' already exists.Skipping creation.\n", + "2025-01-30 01:19:39,332 - INFO - Primary index present or created successfully.\n", + "2025-01-30 01:19:42,061 - INFO - All documents cleared from the collection.\n", + "2025-01-30 01:19:43,393 - INFO - Collection 'cache' already exists.Skipping creation.\n", + "2025-01-30 01:19:44,621 - INFO - Primary index present or created successfully.\n", + "2025-01-30 01:19:44,865 - INFO - All documents cleared from the collection.\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -473,6 +334,8 @@ "\n", "Semantic search requires an efficient way to retrieve relevant documents based on a user's query. This is where the Couchbase **Vector Search Index** comes into play. In this step, we load the Vector Search Index definition from a JSON file, which specifies how the index should be structured. This includes the fields to be indexed, the dimensions of the vectors, and other parameters that determine how the search engine processes queries based on vector similarity.\n", "\n", + "This Azure vector search index configuration requires specific default settings to function properly. This tutorial uses the bucket named `vector-search-testing` with the scope `shared` and collection `azure`. The configuration is set up for vectors with exactly `3072 dimensions`, using dot product similarity and optimized for recall. If you want to use a different bucket, scope, or collection, you will need to modify the index configuration accordingly.\n", + "\n", "For more information on creating a vector search index, please follow the [instructions](https://docs.couchbase.com/cloud/vector-search/create-vector-search-index-ui.html).\n" ] }, @@ -487,233 +350,40 @@ "id": "y7xiCrOc8mmj", "outputId": "833d3fd1-f4e8-4869-f1e8-b4848136cd71" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Upload your index definition file\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " Upload widget is only available when the cell has been executed in the\n", - " current browser session. Please rerun this cell to enable.\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Saving azure_index.json to azure_index.json\n" - ] - } - ], + "outputs": [], "source": [ "# If you are running this script locally (not in Google Colab), uncomment the following line\n", "# and provide the path to your index definition file.\n", "\n", "# index_definition_path = '/path_to_your_index_file/azure_index.json' # Local setup: specify your file path here\n", "\n", - "# If you are running in Google Colab, use the following code to upload the index definition file\n", - "from google.colab import files\n", - "print(\"Upload your index definition file\")\n", - "uploaded = files.upload()\n", - "index_definition_path = list(uploaded.keys())[0]\n", + "# # Version for Google Colab\n", + "# def load_index_definition_colab():\n", + "# from google.colab import files\n", + "# print(\"Upload your index definition file\")\n", + "# uploaded = files.upload()\n", + "# index_definition_path = list(uploaded.keys())[0]\n", "\n", - "try:\n", - " with open(index_definition_path, 'r') as file:\n", - " index_definition = json.load(file)\n", - "except Exception as e:\n", - " raise ValueError(f\"Error loading index definition from {index_definition_path}: {str(e)}\")" + "# try:\n", + "# with open(index_definition_path, 'r') as file:\n", + "# index_definition = json.load(file)\n", + "# return index_definition\n", + "# except Exception as e:\n", + "# raise ValueError(f\"Error loading index definition from {index_definition_path}: {str(e)}\")\n", + "\n", + "# Version for Local Environment\n", + "def load_index_definition_local(index_definition_path):\n", + " try:\n", + " with open(index_definition_path, 'r') as file:\n", + " index_definition = json.load(file)\n", + " return index_definition\n", + " except Exception as e:\n", + " raise ValueError(f\"Error loading index definition from {index_definition_path}: {str(e)}\")\n", + "\n", + "# Usage\n", + "# Uncomment the appropriate line based on your environment\n", + "# index_definition = load_index_definition_colab()\n", + "index_definition = load_index_definition_local('azure_index.json')" ] }, { @@ -742,8 +412,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:30:01,070 - INFO - Index 'vector_search_azure' found\n", - "2024-09-06 07:30:01,373 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" + "2025-01-30 01:19:46,460 - INFO - Index 'vector_search_azure' found\n", + "2025-01-30 01:19:47,594 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" ] } ], @@ -794,220 +464,6 @@ " raise RuntimeError(f\"Internal server error while creating/updating search index: {error_message}\")" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "QRV4k06L8mwS" - }, - "source": [ - "# Load the TREC Dataset\n", - "To build a search engine, we need data to search through. We use the TREC dataset, a well-known benchmark in the field of information retrieval. This dataset contains a wide variety of text data that we'll use to train our search engine. Loading the dataset is a crucial step because it provides the raw material that our search engine will work with. The quality and diversity of the data in the TREC dataset make it an excellent choice for testing and refining our search engine, ensuring that it can handle a wide range of queries effectively.\n", - "\n", - "The TREC dataset's rich content allows us to simulate real-world scenarios where users ask complex questions, enabling us to fine-tune our search engine's ability to understand and respond to various types of queries." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 420, - "referenced_widgets": [ - "8a564229df744b46954b2071c6a6675e", - "2bcc0c8ac8cf450786af9c143aa0c45e", - "9c4057c5ed274735b9e2a4a68a75850f", - "851647814d2144b1a5d45d089b7e550d", - "4258be805cd640da80eb31d7bcd57224", - "e44357c05e6340c2a9d62e03dbae75c7", - "7e9eacd828ad4fc8a91e0e709a6ae894", - "adf70ce39af24961bf4813b33dfa9a6d", - "8223fee9a1014359a08574cd33ed9be8", - "1bb005b776a54fa3b4540c0b0e54f9ad", - "5a09109b9cca4f51ae6b0f27d6beaff5", - "e6d34dac7c1347088812a24dd5eaa9b4", - "d0d85c5e388f4d98bf4ebe41dc735c59", - "1cfb972e32b94a61b90c1cd6e30c0168", - "de7b327febff4ce5aeace4cf5e7d8425", - "5506530162c44ffc8d2fa499bdd898e5", - "f4a92688a4ac4fbe8cdf0cf0c2adf92e", - "7f8b7a00ef79456b9c1be68d7c80acbd", - "846d0a68db8649a4b6a449e33e456557", - "48c2da481c184d12a1bc7f629f797393", - "313d80ed87f045da841b18d3ad40aaea", - "be3342a74451480f9be8660631fd716a", - "966f78ebd6fe451ba94f225c74822b45", - "f741064b353e49849872c7cf972de3c3", - "bb5c9bbe1f734aa28681f84d43fcf11b", - "fdf561800c7d4080a3f79da7acac0ade", - "c51ec2e3b7a942818a93aa41ba102d64", - "5ff28aed5de546f9871d3db0447eb66f", - "0a72183bcd8f49639ff7e704f1f82d37", - "42712160f4b44a87aa9e31c9df4a3feb", - "108bb0c972fa4fee8fca2bd2e632b47e", - "f862f2cc0ee74a25a41201e1c0d9ee22", - "ddf1806edafa473182ea94144930221a", - "3c2eeaa8cf524d0e9b36e97a8b87a2af", - "e7e7b99a739f4a759dcc5cb1c6c8f506", - "5004572235214e4db3b9318711cc08b4", - "570d400cfb394b0898e8b6f09521993e", - "ad752ded95884bcba2c01eac5ef3fdfe", - "d69ec185180d448893a6a01c3d64985b", - "5c83ccc140a84a1db041b7e1969041e2", - "b6c9ccf11a6b43b89e3f47664899026a", - "c393cea168ca4ddbb843ea6fc94759a9", - "3dabbf584cae4640949d1b57567e79ad", - "eabfed3f083c44f2b925defbb1c32cdc", - "71d2eeadaf064a9786c99a0c3100ab12", - "55459ee4f481430a8211e95f80d1ebd2", - "a070fe396d44404990fd382d14d58e6b", - "2497fec84589445381608179f088ec2f", - "2e0d06bf3b2e499b9649f801048b6202", - "c4d9f620c732417d97167bd97847e75f", - "da0c03f23da94e4bada122a084255fdc", - "57b3d95153164e74872d5b68e4c388f4", - "c911cc70bd394b8abedce89618dfb5b9", - "be421d34915d453ba49b3939be725cac", - "40646a88c38c4fca8068165fa5409c79", - "830bdf8312e64f9694cc9bb2f77da4f7", - "f1249595418f4481aad0f82f33fd1d96", - "b831670efec14fd08993cba39d1245bc", - "cabeb6f338ad4535b88491fd70416e3c", - "357cbf7417524b1facf5b953fab75c1a", - "46dc4caa549c48c897519ed4cd781e17", - "3c30fa950dac4aea9b2e5be54173ddf6", - "526c22f699ea4ab8afd347b8d5924c47", - "c46366a97dd6455486a5ab616d1a7dc4", - "7189e1ec10ab4f2f84daf9b2d5e0af4d", - "c1e113316431460e8f868a8d914839d9" - ] - }, - "id": "TRfRslF_8mzo", - "outputId": "b2428026-a686-4fb3-bdbf-06c747113280" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8a564229df744b46954b2071c6a6675e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading builder script: 0%| | 0.00/5.09k [00:00 50000:\n", + " print(f\"Skipping article with length {len(article)} - exceeds 50,000 character limit\")\n", + " continue\n", + " \n", + " # Convert article into the format needed for add_texts\n", + " texts = [article] # Single article as text\n", + " metadatas = [{}] # Empty metadata dictionary for each article\n", + " uuids = [str(uuid4())] # Generate UUID for the article\n", + " \n", + " # Use add_texts instead of add_documents\n", + " vector_store.add_texts(\n", + " texts=texts,\n", + " metadatas=metadatas,\n", + " ids=uuids\n", + " )\n", + " except Exception as e:\n", + " print(f\"Failed to save documents to vector store: {str(e)}\")\n", + " continue" ] }, { @@ -1153,7 +2445,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1166,7 +2458,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:30:52,165 - INFO - Successfully created cache\n" + "2025-01-30 01:53:39,495 - INFO - Successfully created cache\n" ] } ], @@ -1199,7 +2491,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1212,7 +2504,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:30:52,298 - INFO - Successfully created Azure OpenAI Chat model\n" + "2025-01-30 01:53:39,597 - INFO - Successfully created Azure OpenAI Chat model\n" ] } ], @@ -1222,7 +2514,7 @@ " deployment_name=AZURE_OPENAI_CHAT_DEPLOYMENT,\n", " openai_api_key=AZURE_OPENAI_KEY,\n", " azure_endpoint=AZURE_OPENAI_ENDPOINT,\n", - " openai_api_version=\"2024-07-01-preview\"\n", + " openai_api_version=AZURE_OPENAI_API_VERSION\n", " )\n", " logging.info(\"Successfully created Azure OpenAI Chat model\")\n", "except Exception as e:\n", @@ -1243,7 +2535,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1256,8 +2548,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:30:52,532 - INFO - HTTP Request: POST https://first-couchbase-instance.openai.azure.com//openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15 \"HTTP/1.1 200 OK\"\n", - "2024-09-06 07:30:52,839 - INFO - Semantic search completed in 0.53 seconds\n" + "2025-01-30 01:53:42,308 - INFO - HTTP Request: POST https://vector-search-demos-instance.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15 \"HTTP/1.1 200 OK\"\n", + "2025-01-30 01:53:44,514 - INFO - Semantic search completed in 4.91 seconds\n" ] }, { @@ -1265,22 +2557,129 @@ "output_type": "stream", "text": [ "\n", - "Semantic Search Results (completed in 0.53 seconds):\n", - "Distance: 0.9178, Text: Why did the world enter a global depression in 1929 ?\n", - "Distance: 0.8714, Text: When was `` the Great Depression '' ?\n", - "Distance: 0.8113, Text: What crop failure caused the Irish Famine ?\n", - "Distance: 0.7984, Text: What historical event happened in Dogtown in 1899 ?\n", - "Distance: 0.7917, Text: What caused the Lynmouth floods ?\n", - "Distance: 0.7915, Text: When was the first Wall Street Journal published ?\n", - "Distance: 0.7911, Text: When did the Dow first reach ?\n", - "Distance: 0.7885, Text: What were popular songs and types of songs in the 1920s ?\n", - "Distance: 0.7857, Text: When did World War I start ?\n", - "Distance: 0.7842, Text: What caused Harry Houdini 's death ?\n" + "Semantic Search Results (completed in 4.91 seconds):\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.6858, Text: Manchester City boss Pep Guardiola has won 18 trophies since he arrived at the club in 2016\n", + "\n", + "Manchester City boss Pep Guardiola says he is \"fine\" despite admitting his sleep and diet are being affected by the worst run of results in his entire managerial career. In an interview with former Italy international Luca Toni for Amazon Prime Sport before Wednesday's Champions League defeat by Juventus, Guardiola touched on the personal impact City's sudden downturn in form has had. Guardiola said his state of mind was \"ugly\", that his sleep was \"worse\" and he was eating lighter as his digestion had suffered. City go into Sunday's derby against Manchester United at Etihad Stadium having won just one of their past 10 games. The Juventus loss means there is a chance they may not even secure a play-off spot in the Champions League. Asked to elaborate on his comments to Toni, Guardiola said: \"I'm fine. \"In our jobs we always want to do our best or the best as possible. When that doesn't happen you are more uncomfortable than when the situation is going well, always that happened. \"In good moments I am happier but when I get to the next game I am still concerned about what I have to do. There is no human being that makes an activity and it doesn't matter how they do.\" Guardiola said City have to defend better and \"avoid making mistakes at both ends\". To emphasise his point, Guardiola referred back to the third game of City's current run, against a Sporting side managed by Ruben Amorim, who will be in the United dugout at the weekend. City dominated the first half in Lisbon, led thanks to Phil Foden's early effort and looked to be cruising. Instead, they conceded three times in 11 minutes either side of half-time as Sporting eventually ran out 4-1 winners. \"I would like to play the game like we played in Lisbon on Sunday, believe me,\" said Guardiola, who is facing the prospect of only having three fit defenders for the derby as Nathan Ake and Manuel Akanji try to overcome injury concerns. If there is solace for City, it comes from the knowledge United are not exactly flying. Their comeback Europa League victory against Viktoria Plzen on Thursday was their third win of Amorim's short reign so far but only one of those successes has come in the Premier League, where United have lost their past two games against Arsenal and Nottingham Forest. Nevertheless, Guardiola can see improvements already on the red side of the city. \"It's already there,\" he said. \"You see all the patterns, the movements, the runners and the pace. He will do a good job at United, I'm pretty sure of that.\"\n", + "\n", + "Guardiola says skipper Kyle Walker has been offered support by the club after the City defender highlighted the racial abuse he had received on social media in the wake of the Juventus trip. \"It's unacceptable,\" he said. \"Not because it's Kyle - for any human being. \"Unfortunately it happens many times in the real world. It is not necessary to say he has the support of the entire club. It is completely unacceptable and we give our support to him.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.6338, Text: 'We have to find a way' - Guardiola vows to end relegation form\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. 'Worrying' and 'staggering' - Why do Manchester City keep conceding?\n", + "\n", + "Manchester City are currently in relegation form and there is little sign of it ending. Saturday's 2-1 defeat at Aston Villa left them joint bottom of the form table over the past eight games with just Southampton for company. Saints, at the foot of the Premier League, have the same number of points, four, as City over their past eight matches having won one, drawn one and lost six - the same record as the floundering champions. And if Southampton - who appointed Ivan Juric as their new manager on Saturday - get at least a point at Fulham on Sunday, City will be on the worst run in the division. Even Wolves, who sacked boss Gary O'Neil last Sunday and replaced him with Vitor Pereira, have earned double the number of points during the same period having played a game fewer. They are damning statistics for Pep Guardiola, even if he does have some mitigating circumstances with injuries to Ederson, Nathan Ake and Ruben Dias - who all missed the loss at Villa Park - and the long-term loss of midfield powerhouse Rodri. Guardiola was happy with Saturday's performance, despite defeat in Birmingham, but there is little solace to take at slipping further out of the title race. He may have needed to field a half-fit Manuel Akanji and John Stones at Villa Park but that does not account for City looking a shadow of their former selves. That does not justify the error Josko Gvardiol made to gift Jhon Duran a golden chance inside the first 20 seconds, or £100m man Jack Grealish again failing to have an impact on a game. There may be legitimate reasons for City's drop off, whether that be injuries, mental fatigue or just simply a team coming to the end of its lifecycle, but their form, which has plunged off a cliff edge, would have been unthinkable as they strolled to a fourth straight title last season. \"The worrying thing is the number of goals conceded,\" said ex-England captain Alan Shearer on BBC Match of the Day. \"The number of times they were opened up because of the lack of protection and legs in midfield was staggering. There are so many things that are wrong at this moment in time.\"\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. Man City 'have to find a way' to return to form - Guardiola\n", + "\n", + "Afterwards Guardiola was calm, so much so it was difficult to hear him in the news conference, a contrast to the frustrated figure he cut on the touchline. He said: \"It depends on us. The solution is bring the players back. We have just one central defender fit, that is difficult. We are going to try next game - another opportunity and we don't think much further than that. \"Of course there are more reasons. We concede the goals we don't concede in the past, we [don't] score the goals we score in the past. Football is not just one reason. There are a lot of little factors. \"Last season we won the Premier League, but we came here and lost. We have to think positive and I have incredible trust in the guys. Some of them have incredible pride and desire to do it. We have to find a way, step by step, sooner or later to find a way back.\" Villa boss Unai Emery highlighted City's frailties, saying he felt Villa could seize on the visitors' lack of belief. \"Manchester City are a little bit under the confidence they have normally,\" he said. \"The second half was different, we dominated and we scored. Through those circumstances they were feeling worse than even in the first half.\"\n", + "\n", + "Erling Haaland had one touch in the Villa box\n", + "\n", + "There are chinks in the armour never seen before at City under Guardiola and Erling Haaland conceded belief within the squad is low. He told TNT after the game: \"Of course, [confidence levels are] not the best. We know how important confidence is and you can see that it affects every human being. That is how it is, we have to continue and stay positive even though it is difficult.\" Haaland, with 76 goals in 83 Premier League appearances since joining City from Borussia Dortmund in 2022, had one shot and one touch in the Villa box. His 18 touches in the whole game were the lowest of all starting players and he has been self critical, despite scoring 13 goals in the top flight this season. Over City's last eight games he has netted just twice though, but Guardiola refused to criticise his star striker. He said: \"Without him we will be even worse but I like the players feeling that way. I don't agree with Erling. He needs to have the balls delivered in the right spots but he will fight for the next one.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.6276, Text: 'I am not good enough' - Guardiola faces daunting and major rebuild\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. 'I am not good enough' - Guardiola says he must find a 'solution' after derby loss\n", + "\n", + "Pep Guardiola says his sleep has suffered during Manchester City's deepening crisis, so he will not be helped by a nightmarish conclusion to one of the most stunning defeats of his long reign. Guardiola looked agitated, animated and on edge even after City led the Manchester derby through Josko Gvardiol's 36th-minute header, his reaction to the goal one of almost disdain that it came via a deflected cross as opposed to in his purist style. He sat alone with his eyes closed sipping from a water bottle before the resumption of the second half, then was denied even the respite of victory when Manchester United gave this largely dismal derby a dramatic conclusion it barely deserved with a remarkable late comeback. First, with 88 minutes on the clock, Matheus Nunes presented Amad Diallo with the ball before compounding his error by flattening the forward as he made an attempt to recover his mistake. Bruno Fernandes completed the formalities from the penalty spot. Worse was to come two minutes later when Lisandro Martinez's routine long ball caught City's defence inexplicably statuesque. Goalkeeper Ederson's positioning was awry, allowing the lively Diallo to pounce from an acute angle to leave Guardiola and his players stunned. It was the latest into any game, 88 minutes, that reigning Premier League champions had led then lost. It was also the first time City had lost a game they were leading so late on. And in a sign of City's previous excellence that is now being challenged, they have only lost four of 105 Premier League home games under Guardiola in which they have been ahead at half-time, winning 94 and drawing seven. Guardiola delivered a brutal self-analysis as he told Match of the Day: \"I am not good enough. I am the boss. I am the manager. I have to find solutions and so far I haven't. That's the reality. \"Not much else to say. No defence. Manchester United were incredibly persistent. We have not lost eight games in two seasons. We can't defend that.\"\n", + "\n", + "Manchester City manager Pep Guardiola in despair during the derby defeat to Manchester United\n", + "\n", + "Guardiola suggested the serious renewal will wait until the summer but the red flags have been appearing for weeks in the sudden and shocking decline of a team that has lost the aura of invincibility that left many opponents beaten before kick-off in previous years. He has had stated City must \"survive\" this season - whatever qualifies as survival for a club of such rich ambition - but the quest for a record fifth successive Premier League title is surely over as they lie nine points behind leaders Liverpool having played a game more. Their Champions League aspirations are also in jeopardy after another loss, this time against Juventus in Turin. City's squad has been allowed to grow too old together. The insatiable thirst for success seems to have gone, the scales of superiority have fallen away and opponents now sense vulnerability right until the final whistle, as United did here. The manner in which United were able, and felt able, to snatch this victory drove right to the heart of how City, and Guardiola, are allowing opponents to prey on their downfall. Guardiola has every reason to cite injuries, most significantly to Rodri and also John Stones as well as others, but this cannot be used an excuse for such a dramatic decline in standards, allied to the appearance of a soft underbelly that is so easily exploited. And City's rebuild will not be a quick fix. With every performance, every defeat, the scale of what lies in front of Guardiola becomes more obvious - and daunting. Manchester City's fans did their best to reassure Guardiola of their faith in him with a giant Barcelona-inspired banner draped from the stands before kick-off emblazoned with his image reading \"Més que un entrenador\" - \"More Than A Coach\". And Guardiola will now need to be more than a coach than at any time in his career. He will have the finances but it will be done with City's challengers also strengthening. Kevin de Bruyne, 34 in June, lasted 68 minutes here before he was substituted. Age and injuries are catching up with one of the greatest players of the Premier League era and he is unlikely to be at City next season. Mateo Kovacic, who replaced De Bruyne, is also 31 in May. Kyle Walker, 34, is being increasingly exposed. His most notable contribution here was an embarrassing collapse to the ground after the mildest head-to-head collision with Rasmus Hojlund. Ilkay Gundogan, another 34-year-old and a previous pillar of Guardiola's great successes, no longer has the legs or energy to exert influence. This looks increasingly like a season too far following his return from Barcelona. Flaws are also being exposed elsewhere, with previously reliable performers failing to hit previous standards. Phil Foden scored 27 goals and had 12 assists when he was Premier League Player of the Season last term. This year he has just three goals and two assists in 18 appearances in all competitions. He has no goals and just one assist in 11 Premier League games. Jack Grealish, who came on after 77 minutes against United, has not scored in a year for Manchester City, his last goal coming in a 2-2 draw against Crystal Palace on 16 December last year. He has, in the meantime, scored twice for England. Erling Haaland is also struggling as City lack creativity and cutting edge. He has three goals in his past 11 Premier League games after scoring 10 in his first five. And in another indication of City's impotence, and their reliance on Haaland, defender Gvardiol's goal against United was his fourth this season, making him their second highest scorer in all competitions behind the Norwegian striker, who has 18. Goalkeeper Ederson, so reliable for so long, has already been dropped once this season and did not cover himself in glory for United's winner. Guardiola, with that freshly signed two-year contract, insists he \"wants it\" as he treads on this alien territory of failure. He will be under no illusions about the size of the job in front of him as he placed his head in his hands in anguish after yet another damaging and deeply revealing defeat. City and Guardiola are in new, unforgiving territory.\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.6066, Text: Pep Guardiola has said Manchester City will be his final managerial job in club football before he \"maybe\" coaches a national team.\n", + "\n", + "The former Barcelona and Bayern Munich boss has won 15 major trophies since taking charge of City in 2016.\n", + "\n", + "The 53-year-old Spaniard was approached in the summer about the possibility of becoming England manager, but last month signed a two-year contract extension with City until 2027.\n", + "\n", + "Speaking to celebrity chef Dani Garcia on YouTube, Guardiola did not indicate when he intends to step down at City but said he would not return to club football - in the Premier League or overseas.\n", + "\n", + "\"I'm not going to manage another team,\" he said.\n", + "\n", + "\"I'm not talking about the long-term future, but what I'm not going to do is leave Manchester City, go to another country, and do the same thing as now.\n", + "\n", + "\"I wouldn't have the energy. The thought of starting somewhere else, all the process of training and so on. No, no, no. Maybe a national team, but that's different.\n", + "\n", + "\"I want to leave it and go and play golf, but I can't [if he takes a club job]. I think stopping would do me good.\"\n", + "\n", + "City have won just once since Guardiola extended his contract - and once in nine games since beating Southampton on 26 October.\n", + "\n", + "That victory came at home to Nottingham Forest last Wednesday, but was followed by a 2-2 draw at Crystal Palace at the weekend.\n", + "\n", + "The Blues visit Juventus next in the Champions League on Wednesday (20:00 GMT), before hosting Manchester United in the Premier League on Sunday (16:30).\n", + "\n", + "\"Right now we are not in the position - when we have had the results of the last seven, eight games - to talk about winning games in plural,\" said Guardiola at his pre-match news conference.\n", + "\n", + "\"We have to win the game and not look at what happens in the next one yet.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.5761, Text: 'Self-doubt, errors & big changes' - inside the crisis at Man City\n", + "\n", + "Pep Guardiola has not been through a moment like this in his managerial career. Manchester City have lost nine matches in their past 12 - as many defeats as they had suffered in their previous 106 fixtures. At the end of October, City were still unbeaten at the top of the Premier League and favourites to win a fifth successive title. Now they are seventh, 12 points behind leaders Liverpool having played a game more. It has been an incredible fall from grace and left people trying to work out what has happened - and whether Guardiola can make it right. After discussing the situation with those who know him best, I have taken a closer look at the future - both short and long term - and how the current crisis at Man City is going to be solved.\n", + "\n", + "Pep Guardiola's Man City have lost nine of their past 12 matches\n", + "\n", + "Guardiola has also been giving it a lot of thought. He has not been sleeping very well, as he has said, and has not been himself at times when talking to the media. He has been talking to a lot of people about what is going on as he tries to work out the reasons for City's demise. Some reasons he knows, others he still doesn't. What people perhaps do not realise is Guardiola hugely doubts himself and always has. He will be thinking \"I'm not going to be able to get us out of this\" and needs the support of people close to him to push away those insecurities - and he has that. He is protected by his people who are very aware, like he is, that there are a lot of people that want City to fail. It has been a turbulent time for Guardiola. Remember those marks he had on his head after the 3-3 draw with Feyenoord in the Champions League? He always scratches his head, it is a gesture of nervousness. Normally nothing happens but on that day one of his nails was far too sharp so, after talking to the players in the changing room where he scratched his head because of his usual agitated gesturing, he went to the news conference. His right-hand man Manel Estiarte sent him photos in a message saying \"what have you got on your head?\", but by the time Guardiola returned to the coaching room there was hardly anything there again. He started that day with a cover on his nose after the same thing happened at the training ground the day before. Guardiola was having a footballing debate with Kyle Walker about positional stuff and marked his nose with that same nail. There was also that remarkable news conference after the Manchester derby when he said \"I don't know what to do\". That is partly true and partly not true. Ignore the fact Guardiola suggested he was \"not good enough\". He actually meant he was not good enough to resolve the situation with the group of players he has available and with all the other current difficulties. There are obviously logical explanations for the crisis and the first one has been talked about many times - the absence of injured midfielder Rodri. You know the game Jenga? When you take the wrong piece out, the whole tower collapses. That is what has happened here. It is normal for teams to have an over-reliance on one player if he is the best in the world in his position. And you cannot calculate the consequences of an injury that rules someone like Rodri out for the season. City are a team, like many modern ones, in which the holding midfielder is a key element to the construction. So, when you take Rodri out, it is difficult to hold it together. There were Plan Bs - John Stones, Manuel Akanji, even Nathan Ake - but injuries struck. The big injury list has been out of the ordinary and the busy calendar has also played a part in compounding the issues. However, one factor even Guardiola cannot explain is the big uncharacteristic errors in almost every game from international players. Why did Matheus Nunes make that challenge to give away the penalty against Manchester United? Jack Grealish is sent on at the end to keep the ball and cannot do that. There are errors from Walker and other defenders. These are some of the best players in the world. Of course the players' mindset is important, and confidence is diminishing. Wrong decisions get taken so there is almost panic on the pitch instead of calm. There are also players badly out of form who are having to play because of injuries. Walker is now unable to hide behind his pace, I'm not sure Kevin de Bruyne is ever getting back to the level he used to be at, Bernardo Silva and Ilkay Gundogan do not have time to rest, Grealish is not playing at his best. Some of these players were only meant to be playing one game a week but, because of injuries, have played 12 games in 40 days. It all has a domino effect. One consequence is that Erling Haaland isn't getting the service to score. But the Norwegian still remains City's top-scorer with 13. Defender Josko Gvardiol is next on the list with just four. The way their form has been analysed inside the City camp is there have only been three games where they deserved to lose (Liverpool, Bournemouth and Aston Villa). But of course it is time to change the dynamic.\n", + "\n", + "Guardiola has never protected his players so much. He has not criticised them and is not going to do so. They have won everything with him. Instead of doing more with them, he has tried doing less. He has sometimes given them more days off to clear their heads, so they can reset - two days this week for instance. Perhaps the time to change a team is when you are winning, but no-one was suggesting Man City were about to collapse when they were top and unbeaten after nine league games. Some people have asked how bad it has to get before City make a decision on Guardiola. The answer is that there is no decision to be made. Maybe if this was Real Madrid, Barcelona or Juventus, the pressure from outside would be massive and the argument would be made that Guardiola has to go. At City he has won the lot, so how can anyone say he is failing? Yes, this is a crisis. But given all their problems, City's renewed target is finishing in the top four. That is what is in all their heads now. The idea is to recover their essence by improving defensive concepts that are not there and re-establishing the intensity they are known for. Guardiola is planning to use the next two years of his contract, which is expected to be his last as a club manager, to prepare a new Manchester City. When he was at the end of his four years at Barcelona, he asked two managers what to do when you feel people are not responding to your instructions. Do you go or do the players go? Sir Alex Ferguson and Rafael Benitez both told him that the players need to go. Guardiola did not listen because of his emotional attachment to his players back then and he decided to leave the Camp Nou because he felt the cycle was over. He will still protect his players now but there is not the same emotional attachment - so it is the players who are going to leave this time. It is likely City will look to replace five or six regular starters. Guardiola knows it is the end of an era and the start of a new one. Changes will not be immediate and the majority of the work will be done in the summer. But they are open to any opportunities in January - and a holding midfielder is one thing they need. In the summer City might want to get Spain's Martin Zubimendi from Real Sociedad and they know 60m euros (£50m) will get him. He said no to Liverpool last summer even though everything was agreed, but he now wants to move on and the Premier League is the target. Even if they do not get Zubimendi, that is the calibre of footballer they are after. A new Manchester City is on its way - with changes driven by Guardiola, incoming sporting director Hugo Viana and the football department.\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.5627, Text: Man City might miss out on Champions League - Guardiola\n", + "\n", + "Erling Haaland was part of the Manchester City side that won the Champions League for the first time in 2023\n", + "\n", + "Manchester City boss Pep Guardiola says the club are in danger of missing out on a place in next season's Champions League. City are currently in their 14th consecutive season in European football's most prestigious club competition. Only Arsenal between 1998 and 2017, and Manchester United between 1996 and 2014, have a longer record of qualifying among English clubs. City are seventh in the Premier League after 17 matches, four points behind Nottingham Forest in fourth and a point behind fifth-placed Bournemouth. England are currently top of Uefa's European Performance Spot table and well placed to secure a fifth place in next season's Champions League, although City would still not qualify on current standings. \"When I said before, people laughed,\" said Guardiola. \"They said, 'qualifying for the Champions League is not a big success'. \"But I know it because it happens with clubs in this country. They were dominant for many years and after they were many years not qualifying for the Champions League.\"\n", + "\n", + "Guardiola's side host Everton on Boxing Day, before a trip to Leicester on 29 December and a home match against West Ham on 4 January. Given all three opponents are in the bottom seven, it offers City a chance to improve on an appalling recent record of four points from eight games, which Guardiola acknowledges has left their lofty European ambitions in doubt. \"The one team that has been in the Champions League for the past years has been Manchester City,\" he added. \"Now we are at risk, of course we are. Definitely.\" Arsenal, Chelsea, Liverpool and Manchester United finished in the Premier League's top four from the 2005-06 season to 2008-09. At least three of them also occupied the top four spots for 15 successive campaigns until 2012. But United have spent five out of the past 11 seasons outside the Champions League. Arsenal spent six seasons out of the competition before returning last term. Liverpool missed out all but one year in seven from 2010, while Chelsea are in their second successive campaign outside Europe's elite. This term the threat to City comes from unexpected sources. As well as Forest and Bournemouth, Aston Villa are ahead of City, while Newcastle, Fulham and Brighton are also within a couple of points. \"There are a lot of contenders,\" said Guardiola, whose side have lost nine of their last 12 games in all competitions. \"For every club it is so important and if we are not winning games, we will be out. \"If we don't qualify it is because we don't deserve it, because we were not prepared and because we had a lot of problems and didn't solve them.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.5516, Text: Man City's Dias ruled out for 'three or four weeks'\n", + "\n", + "Ruben Dias has won 10 major trophies during his time at Manchester City\n", + "\n", + "Manchester City have suffered a fresh injury blow with manager Pep Guardiola confirming Portugal central defender Ruben Dias has been ruled out for \"three or four weeks\" with a muscle injury. Dias, who suffered the injury in Saturday's 2-1 defeat by Manchester United, will miss the entire festive programme and potentially the FA Cup third-round tie with Salford on 11 January. The 27-year-old also faces a battle to be fit for City's crucial Champions League trip to Paris St-Germain on 22 January. Dias has already missed seven games with a calf injury this season, adding to a defensive injury list that has seen John Stones, Nathan Ake, Manuel Akanji and Kyle Walker all ruled out at various points, while Ballon d'Or winner Rodri will miss the remainder of the domestic season after suffering a cruciate knee ligament injury. \"It's a muscular problem and he will be out for three to four weeks,\" said Guardiola. \"After 75 minutes against United he felt something. But he's so strong and wanted to stay on the pitch. Now he's injured.\" Guardiola confirmed Stones, Akanji and midfielder Mateo Kovacic have all trained this week and could feature at Aston Villa on Saturday (12:30 GMT), but said goalkeeper Ederson was \"a doubt\" with an unspecified problem. \"Ederson has been struggling with some niggles in his leg, he doesn't feel completely fine,\" said Guardiola. \"Ederson is so important for us.\" Amid City's current run of one win in 11 games, surprise has been expressed about Guardiola's use of youngsters James McAtee and Nico O'Reilly. City made a point of keeping both players despite numerous loan options. Yet McAtee has made just two substitute appearances - coming on in the last minute on both occasions - while O'Reilly is yet to make his league debut. But it seems they will stay at the club for the second half of the season, with Guardiola replying \"I don't think so\" when asked if players might leave during the January transfer window. The Spaniard said he is \"not a big fan\" of buying players in January but it is \"possible\" City will look to sign someone because \"the circumstances of this season have been special\".\n", + "\n", + "Guardiola's mood was so downbeat in the immediate aftermath of the United defeat it was easy to imagine he might conclude he was no longer capable of doing the job. He gave his players a couple of days off afterwards and was brighter when he spoke to journalists in his scheduled briefing before the Villa trip. \"We'd just finished a game that we lost in the circumstances and I was not happy,\" he said. \"I try to be honest about the feelings of my teams. We fell down six times [number of Premier League games without a win], we have to stand up seven. There is no alternative. \"I'm fine. I'm a normal person with feelings like all of us. When the situation is going well we are better but it's normal. I would not go to the press conference if we were 1-0 up and expressing something that I didn't feel.\" Former Villa forward Jack Grealish has not scored for City in over a year but Guardiola pointed out he is not the only attacking player struggling this season. He added: \"We are struggling to create a little bit up front, but always I am optimistic about my players that they are going to turn and perform well.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.5148, Text: 'Life is not easy' - Haaland penalty miss sums up Man City crisis\n", + "\n", + "Manchester City striker Erling Haaland has now missed two of his 17 penalties taken in the Premier League\n", + "\n", + "Nothing seems to be going Manchester City's way at the moment - and star striker Erling Haaland is not a happy man. If there was any player currently in the Premier League you would hand the ball to for a penalty to win a match, it would be the prolific Norwegian. Not on this occasion, though. Looking to land a knockout blow, Haaland saw his second-half strike saved by spot-kick expert Jordan Pickford, as Manchester City's crisis continued with a 1-1 draw against Everton at Etihad Stadium. \"How do you react mentally?\" asked boss Pep Guardiola about the penalty miss. \"Life is not easy. Sport is not easy. When it happens, it is OK. \"There are still a lot of minutes to play and we had the chances afterwards. We created, incredible how they ran and fight. In some games it was not good but today well played.\" At the full-time whistle, there were a smattering of boos from home supporters at Etihad Stadium and a despondent Haaland ripped off his hair bobble and shirt before heading straight down the tunnel. As Haaland trudged off out of sight, Guardiola stood motionless on the pitch with the look of a man unable to find answers to their current crisis. City's all-conquering side have suffered a remarkable drop-off and now won just once in their last 13 games in all competitions. In the post-match news conference, the stunned Spaniard was barely audible in the responses to the questions fired his way. \"My body language was positive,\" he uttered. \"The team played really good. We had I don't know how many shots. The first half was brilliant.\"\n", + "\n", + "City have a lengthy injury list, but their downturn has coincided with Haaland's struggles in front of goal. The striker made a blistering start to the campaign looked to be on his way to setting more goalscoring records when netting 10 goals in his first five games, but has only managed three in 13 since. This has largely been down to underperformance on his expected goal (xG) rate, with statistics showing he has scored five fewer goals than he should have done during this recent run. There is also a stark contrast from his shot conversion rate from the first five games to his last 13, dropping from 38.5% to a lowly 6.4%. The penalty miss was Haaland's only shot on target in the contest - but Everton did not set out to pay him extra attention. Asked if there had been a special plan to handle the frontman, Dyche said: \"No. I was asked the other day when playing Chelsea about Cole Palmer and what a great player he is turning out to be. \"We know Haaland is too but I believe in the team. I don't look at one aspect of it, I look at the team, what they are doing and they have done a good job today.\"\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser.\n", + "\n", + "Pickford saved his seventh penalty in the Premier League since joining Everton in 2017\n", + "\n", + "After Toffees forward Iliman Ndiaye had cancelled out Bernardo Silva's opener, the defining moment came in the second half when Vitalii Mykolenko tripped Savinho in the box. During the VAR check, it was a battle of the minds with Everton captain Seamus Coleman receiving a yellow card for trying to put off Haaland, who had the ball in his hand. The booking ultimately appeared to be a well-earned one for the team and Pickford seemed to revel in his big moment, jumping up and down on his line, sticking his tongue out before stooping low to his right to push the ball away. Pickford has made big penalty saves for England on the international stage and this was seventh spot-kick stop since joining Everton in 2017. Such is City's luck at the moment that they managed to recycle the ball and Haaland headed in, but it was ruled out for offside, much to the joy of the travelling supporters. Haaland had scored 15 of 16 of his previous penalties in the league, with the other hitting the woodwork, while this was the first he had seen saved. \"It is good from the analysts to give him as much benefit of as many penalties as they can,\" Dyche said of Pickford's save. \"Credit to him for making the right decision in the spur of the moment.\" Former England goalkeeper Paul Robinson said on BBC Radio 5 Live: \"Pickford is good at putting pressure on strikers. Haaland must have felt the pressure there, this place went silent. \"It was a decent save but a really poor penalty. He didn't whip it around like a left-footer should. A poor penalty, but a good save.\" Ex-England defender Matt Upson added on Amazon Prime: \"It was brilliant save from Pickford. He relishes situations like this. He doesn't make life any easier for the penalty taker. \"But for Haaland to miss that penalty speaks a bit about where the confidence is in this team at the moment.\"\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. Dyche 'very pleased' with Everton defence in Man City draw\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.4715, Text: 'So happy he is back' - 'integral' De Bruyne 'one of best we've seen'\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. Match of the Day: How Kevin de Bruyne inspired Man City back to winning ways\n", + "\n", + "As Kevin de Bruyne made his way off the pitch after being replaced by Rico Lewis 16 minutes from the end of Manchester City's 3-0 win over Nottingham Forest, Pep Guardiola grabbed the Belgian and pulled him into an embrace. It was like a father offering affection to his son at a job well done. De Bruyne responded with a smile of satisfaction before continuing on his way to the top of the small terrace of City benches. A day earlier, Guardiola scoffed at the suggestions of Sky Sports duo Jamie Carragher and Gary Neville that there was some kind of rift between the boss and his star man. Why on earth, Guardiola countered, would he leave out someone who is capable of delivering moments like no-one else, even in his star-studded squad? Finally back on the pitch to start a Premier League game for the first time since August, De Bruyne proved exactly what Guardiola meant.\n", + "\n", + "Manchester City's Kevin de Bruyne has now scored two goals this season\n", + "\n", + "On a night Guardiola conceded City had to win, it was the Belgian's firm header that created an eighth-minute opener for Bernardo Silva as they went on to finally end their seven-match winless run and close the gap to leaders Liverpool to nine points. The goal from De Bruyne that followed was a thing of beauty as he backed away from Jeremy Doku as his fellow countryman ran with the ball, arriving in enough space to take the short pass and send his shot into the corner. Afforded extra space by his manager's decision to play Jack Grealish alongside him in a central position, De Bruyne schemed in the way he usually does. As chances came and went, he was playing some significant role. De Bruyne lasted 74 minutes, his longest match time since completing the full 90 minutes against Brentford on 14 September. He was on the bench when he was announced as man of the match, a decision received with enthusiasm by the City support. \"I am so happy he is back,\" said Guardiola. \"He played 75 fantastic minutes. \"He deserves the best because he's a lovely guy and has been massively important for so many years since he arrived.\"\n", + "\n", + "City are now unbeaten in their past 31 Premier League games with De Bruyne starting. He has been involved in 25 goals (nine goals, 16 assists) in those games. De Bruyne said: \"There have never been issues between me and Pep. He knows I've been struggling. It's painful and uncomfortable. \"Hopefully I can get back to my body with not much pain and then I'll be fine.\" However, a bit like the victory itself - tarnished by an injury to Manuel Akanji that may rule the Switzerland defender out of the weekend trip to Crystal Palace, and a hamstring problem for Nathan Ake who has already missed five weeks with a similar injury this season that Guardiola said \"doesn’t look good\" and makes him feel \"sad\" for the Dutchman - there was a caveat as the City boss assessed De Bruyne's contribution. \"He fought a lot and he prepared himself,\" he said. \"He is back to his physicality. The minutes he played at Anfield were really good. \"Last season he was out for many months, this season as well. We will see how he recovers after a long time injured and how he feels in three days.\" De Bruyne recorded four or more shots and created four or more chances for the third time in a Premier League game this season. Despite only starting five Premier League games, only Arsenal's Bukayo Saka has done so more often this term. Guardiola's fear must be that if he pushes De Bruyne too far too quickly, his body will let him down. Former Man City defender Micah Richards told BBC Match of the Day: \"He is a top-quality player and one of the best we have seen. He always manages to find space on the pitch.\" \"He has been integral to Man City's success over a number of years,\" added former City boss Stuart Pearce on Amazon Prime. \"He is the go-to player that sets Erling Haaland alight with his passing. He creates goals, he scores goals. \"If you were to pick one player out over the last eight or nine years De Bruyne would be at the top of almost everyone's list.\" It is a delicate balance given his team are still nine points adrift of Premier League leaders Liverpool and are also outside the Champions League top eight before next week's trip to Italy and a meeting with Juventus, after which City will only have January first-phase games remaining to ensure they secure qualification for the last-16 without needing to be bothered by February's play-off round. However, as with his team, De Bruyne's recovery had to start somewhere.\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.4524, Text: Manchester United manager Ruben Amorim has questioned the \"choices\" of people close to forward Marcus Rashford. Rashford, 27, said he was \"ready for a new challenge\" in an interview after being dropped for the 2-1 win at Manchester City on 15 December. The England international subsequently missed the 4-3 Carabao Cup quarter-final defeat by Tottenham and was also left out as United lost 3-0 to Bournemouth on Sunday, adding to speculation he could leave Old Trafford in January. \"I understand these players have a lot of people around them, making choices that are not the first idea from the player. \"They chose to do the interview as it is not just Marcus.\"\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. 'He wants to play. He is trying'\n", + "\n", + "Rashford has scored 138 goals in 426 appearances for the club since making his debut in 2016, having come through the United youth ranks. However, while he managed 30 goals in all competitions in 2022-23, he has struggled for form in three of the previous four seasons and attracted criticism from pundits and fans for a number of laboured displays during that time. Amorim said he can \"separate\" the decisions of those advising Rashford from his relationship with the player. \"At the moment I'm focused on improving Marcus,\" he added. \"We need a talented guy like Marcus. I forget the interview now and see what I see on the pitch.\" Regarding Rashford's future, the Portuguese boss said it is for him and the club \"to deal with that when the time comes\". Speaking to the wider media before United face Wolverhampton Wanderers on Thursday, Amorim denied the talk around Rashford was a distraction, adding: \"Some guys have a big responsibility here because they have been here for a long time. \"If you have big talents, [we need] big performances, big responsibility, big engagement to push everybody forward in this moment. Like any other player, [we want him to be] the best he can be. \"This is maybe one of the lowest moments in our club. We have to face it and be strong.\" United's humbling defeat by the Cherries means they head into Christmas in 13th place in the Premier League, after Wolves they host Newcastle on 30 December. It will be their lowest position in the table at this stage since they were 15th in 1986, just over two months into Sir Alex Ferguson's reign as manager. Amorim had to halt his post-match news conference on Sunday because of a leak in the ceiling of the press room. Asked later how he intended to reverse fortunes at Old Trafford, the former Sporting coach pointed to the ceiling and said: \"If I knew, I would solve all the problems of this club, even this. \"I know what I am going to do. That is so clear for me. I will not say I feel relaxed because I'm really frustrated. It's a very difficult moment but we will solve problems step by step and find answers for everything.\"\n", + "--------------------------------------------------------------------------------\n" ] } ], "source": [ - "query = \"What caused the 1929 Great Depression?\"\n", + "query = \"What was manchester city manager pep guardiola's reaction to the team's current form?\"\n", "\n", "try:\n", " # Perform the semantic search\n", @@ -1292,8 +2691,10 @@ "\n", " # Display search results\n", " print(f\"\\nSemantic Search Results (completed in {search_elapsed_time:.2f} seconds):\")\n", + " print(\"-\" * 80) # Add separator line\n", " for doc, score in search_results:\n", - " print(f\"Distance: {score:.4f}, Text: {doc.page_content}\")\n", + " print(f\"Score: {score:.4f}, Text: {doc.page_content}\")\n", + " print(\"-\" * 80) # Add separator between results\n", "\n", "except CouchbaseException as e:\n", " raise RuntimeError(f\"Error performing semantic search: {str(e)}\")\n", @@ -1315,7 +2716,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1328,7 +2729,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-06 07:30:52,860 - INFO - Successfully created RAG chain\n" + "2025-01-30 01:53:44,536 - INFO - Successfully created RAG chain\n" ] } ], @@ -1348,14 +2749,7 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1368,20 +2762,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "RAG Response: The 1929 Great Depression was caused by a combination of factors, including the stock market crash of October 1929, bank failures, reduction in consumer spending and investment, and poor economic policies.\n", - "RAG response generated in 2.32 seconds\n" + "RAG Response: Manchester City manager Pep Guardiola has candidly expressed unhappiness and concern about the team's current form, describing his state of mind as \"ugly\" and admitting that his sleep and diet have been affected. He acknowledged discomfort in light of the team's recent struggles, including their relegation-like form in the Premier League and challenges in the Champions League. Guardiola has taken responsibility for the team's shortcomings, stating, \"I am not good enough. I have to find solutions, and so far I haven’t.\" Despite the struggles, Guardiola remains focused on finding ways to improve the team step by step and expressed trust in his players, emphasizing the need to defend better and avoid mistakes. He has also recognized that Manchester City is in uncharted territory of failure and plans for a significant rebuild, calling the challenge both daunting and humbling.\n", + "RAG response generated in 6.71 seconds\n" ] } ], "source": [ "# Get responses\n", + "from openai import NotFoundError\n", + "\n", "logging.disable(sys.maxsize) # Disable logging to prevent tqdm output\n", "start_time = time.time()\n", - "rag_response = rag_chain.invoke(query)\n", - "rag_elapsed_time = time.time() - start_time\n", - "\n", - "print(f\"RAG Response: {rag_response}\")\n", - "print(f\"RAG response generated in {rag_elapsed_time:.2f} seconds\")" + "try:\n", + " rag_response = rag_chain.invoke(query)\n", + " rag_elapsed_time = time.time() - start_time\n", + " print(f\"RAG Response: {rag_response}\")\n", + " print(f\"RAG response generated in {rag_elapsed_time:.2f} seconds\")\n", + "except NotFoundError as e:\n", + " print(f\"Error: Azure OpenAI resource not found. Please check your configuration. Details: {str(e)}\")\n", + "except Exception as e:\n", + " print(f\"Unexpected error occurred: {str(e)}\")\n", + "finally:\n", + " logging.disable(logging.NOTSET) # Re-enable logging" ] }, { @@ -1398,7 +2800,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1412,36 +2814,72 @@ "output_type": "stream", "text": [ "\n", - "Query 1: Why do heavier objects travel downhill faster?\n", - "Response: Heavier objects travel downhill faster primarily due to the force of gravity acting on them. Gravity accelerates all objects at the same rate, but heavier objects may encounter less air resistance relative to their weight, allowing them to maintain higher speeds as they descend. Additionally, factors such as surface friction and the distribution of mass can influence the speed at which an object travels downhill.\n", - "Time taken: 61.73 seconds\n", - "\n", - "Query 2: What is the capital of France?\n", - "Response: The capital of France is Paris.\n", - "Time taken: 60.63 seconds\n", + "Query 1: What happened in the match between Fullham and Liverpool?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-01-30 01:53:52,496 - INFO - HTTP Request: POST https://vector-search-demos-instance.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15 \"HTTP/1.1 200 OK\"\n", + "2025-01-30 01:53:56,189 - INFO - HTTP Request: POST https://vector-search-demos-instance.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-07-01-preview \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response: The match between Fulham and Liverpool ended in a 2-2 draw in the Premier League at Anfield. Liverpool played the majority of the game with ten men after Andy Robertson received a red card in the 17th minute for denying a goalscoring opportunity. Despite this setback, Liverpool showed resilience and came from behind twice, with Diogo Jota scoring an 86th-minute equalizer. Liverpool dominated possession and several attacking metrics, showcasing their fighting spirit even under challenging circumstances. Fulham also showed bravery and earned a deserved point in what was described as an enthralling encounter.\n", + "Time taken: 5.31 seconds\n", "\n", - "Query 3: What caused the 1929 Great Depression?\n", - "Response: The 1929 Great Depression was caused by a combination of factors, including the stock market crash of October 1929, bank failures, reduction in consumer spending and investment, and poor economic policies.\n", - "Time taken: 1.49 seconds\n", + "Query 2: What was manchester city manager pep guardiola's reaction to the team's current form?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-01-30 01:53:56,883 - INFO - HTTP Request: POST https://vector-search-demos-instance.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15 \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response: Manchester City manager Pep Guardiola has candidly expressed unhappiness and concern about the team's current form, describing his state of mind as \"ugly\" and admitting that his sleep and diet have been affected. He acknowledged discomfort in light of the team's recent struggles, including their relegation-like form in the Premier League and challenges in the Champions League. Guardiola has taken responsibility for the team's shortcomings, stating, \"I am not good enough. I have to find solutions, and so far I haven’t.\" Despite the struggles, Guardiola remains focused on finding ways to improve the team step by step and expressed trust in his players, emphasizing the need to defend better and avoid mistakes. He has also recognized that Manchester City is in uncharted territory of failure and plans for a significant rebuild, calling the challenge both daunting and humbling.\n", + "Time taken: 3.52 seconds\n", "\n", - "Query 4: Why do heavier objects travel downhill faster?\n", - "Response: Heavier objects travel downhill faster primarily due to the force of gravity acting on them. Gravity accelerates all objects at the same rate, but heavier objects may encounter less air resistance relative to their weight, allowing them to maintain higher speeds as they descend. Additionally, factors such as surface friction and the distribution of mass can influence the speed at which an object travels downhill.\n", - "Time taken: 0.60 seconds\n" + "Query 3: What happened in the match between Fullham and Liverpool?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-01-30 01:54:00,389 - INFO - HTTP Request: POST https://vector-search-demos-instance.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15 \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response: The match between Fulham and Liverpool ended in a 2-2 draw in the Premier League at Anfield. Liverpool played the majority of the game with ten men after Andy Robertson received a red card in the 17th minute for denying a goalscoring opportunity. Despite this setback, Liverpool showed resilience and came from behind twice, with Diogo Jota scoring an 86th-minute equalizer. Liverpool dominated possession and several attacking metrics, showcasing their fighting spirit even under challenging circumstances. Fulham also showed bravery and earned a deserved point in what was described as an enthralling encounter.\n", + "Time taken: 2.08 seconds\n" ] } ], "source": [ "try:\n", " queries = [\n", - " \"Why do heavier objects travel downhill faster?\",\n", - " \"What is the capital of France?\",\n", - " \"What caused the 1929 Great Depression?\", # Repeated query\n", - " \"Why do heavier objects travel downhill faster?\", # Repeated query\n", + " \"What happened in the match between Fullham and Liverpool?\",\n", + " \"What was manchester city manager pep guardiola's reaction to the team's current form?\", # Repeated query\n", + " \"What happened in the match between Fullham and Liverpool?\", # Repeated query\n", " ]\n", "\n", " for i, query in enumerate(queries, 1):\n", " print(f\"\\nQuery {i}: {query}\")\n", " start_time = time.time()\n", + "\n", " response = rag_chain.invoke(query)\n", " elapsed_time = time.time() - start_time\n", " print(f\"Response: {response}\")\n", @@ -1479,7 +2917,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.0" + "version": "3.12.3" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/azure/azure_index.json b/azure/azure_index.json index d3035eff..bd66f3d5 100644 --- a/azure/azure_index.json +++ b/azure/azure_index.json @@ -40,7 +40,7 @@ "enabled": true, "fields": [ { - "dims": 1536, + "dims": 3072, "index": true, "name": "embedding", "similarity": "dot_product", From b591cfefcae5b0348e68e50b757611c94025a012 Mon Sep 17 00:00:00 2001 From: teetangh Date: Fri, 31 Jan 2025 14:35:06 +0530 Subject: [PATCH 2/4] cleaned azure tutorial --- .../RAG_with_Couchbase_and_AzureOpenAI.ipynb | 1865 +---------------- 1 file changed, 38 insertions(+), 1827 deletions(-) diff --git a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb index 26fab0a8..da7f100e 100644 --- a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb +++ b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb @@ -102,8 +102,8 @@ "import os\n", "import sys\n", "import time\n", - "from uuid import uuid4\n", "from datetime import timedelta\n", + "from uuid import uuid4\n", "\n", "from couchbase.auth import PasswordAuthenticator\n", "from couchbase.cluster import Cluster\n", @@ -114,7 +114,6 @@ "from couchbase.options import ClusterOptions\n", "from datasets import load_dataset\n", "from dotenv import load_dotenv\n", - "from langchain_core.documents import Document\n", "from langchain_core.globals import set_llm_cache\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.prompts import ChatPromptTemplate\n", @@ -122,6 +121,7 @@ "from langchain_couchbase.cache import CouchbaseCache\n", "from langchain_couchbase.vectorstores import CouchbaseVectorStore\n", "from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings\n", + "from openai import NotFoundError\n", "from tqdm import tqdm" ] }, @@ -143,7 +143,10 @@ }, "outputs": [], "source": [ - "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)" + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)\n", + "\n", + "# Suppress httpx logging\n", + "logging.getLogger('httpx').setLevel(logging.CRITICAL)" ] }, { @@ -218,7 +221,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-30 01:19:35,508 - INFO - Successfully connected to Couchbase\n" + "2025-01-31 14:05:09,889 - INFO - Successfully connected to Couchbase\n" ] } ], @@ -260,18 +263,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-30 01:19:37,927 - INFO - Collection 'azure' already exists.Skipping creation.\n", - "2025-01-30 01:19:39,332 - INFO - Primary index present or created successfully.\n", - "2025-01-30 01:19:42,061 - INFO - All documents cleared from the collection.\n", - "2025-01-30 01:19:43,393 - INFO - Collection 'cache' already exists.Skipping creation.\n", - "2025-01-30 01:19:44,621 - INFO - Primary index present or created successfully.\n", - "2025-01-30 01:19:44,865 - INFO - All documents cleared from the collection.\n" + "2025-01-31 14:05:12,120 - INFO - Collection 'azure' already exists.Skipping creation.\n", + "2025-01-31 14:05:13,317 - INFO - Primary index present or created successfully.\n", + "2025-01-31 14:05:13,628 - INFO - All documents cleared from the collection.\n", + "2025-01-31 14:05:14,763 - INFO - Collection 'cache' already exists.Skipping creation.\n", + "2025-01-31 14:05:15,848 - INFO - Primary index present or created successfully.\n", + "2025-01-31 14:05:16,122 - INFO - All documents cleared from the collection.\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -412,8 +415,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-30 01:19:46,460 - INFO - Index 'vector_search_azure' found\n", - "2025-01-30 01:19:47,594 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" + "2025-01-31 14:05:17,594 - INFO - Index 'vector_search_azure' found\n", + "2025-01-31 14:05:18,525 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" ] } ], @@ -490,7 +493,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-30 01:19:47,726 - INFO - Successfully created AzureOpenAIEmbeddings\n" + "2025-01-31 14:05:18,573 - INFO - Successfully created AzureOpenAIEmbeddings\n" ] } ], @@ -531,7 +534,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-30 01:19:51,782 - INFO - Successfully created vector store\n" + "2025-01-31 14:05:22,691 - INFO - Successfully created vector store\n" ] } ], @@ -569,7 +572,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-30 01:20:04,027 - INFO - Successfully loaded the BBC News dataset with 2687 rows.\n" + "2025-01-31 14:05:28,937 - INFO - Successfully loaded the BBC News dataset with 2687 rows.\n" ] }, { @@ -641,1161 +644,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Ingesting articles: 0%| | 0/1749 [00:00 Date: Wed, 5 Feb 2025 13:15:20 +0530 Subject: [PATCH 3/4] updated azure tutorial with auto batching --- .../RAG_with_Couchbase_and_AzureOpenAI.ipynb | 177 +++++++++++------- 1 file changed, 110 insertions(+), 67 deletions(-) diff --git a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb index da7f100e..ef776424 100644 --- a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb +++ b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb @@ -189,10 +189,14 @@ "SCOPE_NAME = os.getenv('SCOPE_NAME') or input('Enter your scope name (default: shared): ') or 'shared'\n", "COLLECTION_NAME = os.getenv('COLLECTION_NAME') or input('Enter your collection name (default: azure): ') or 'azure'\n", "CACHE_COLLECTION = os.getenv('CACHE_COLLECTION') or input('Enter your cache collection name (default: cache): ') or 'cache'\n", + "BATCH_PROCESSING_MODE = os.getenv('BATCH_PROCESSING_MODE') or input('Enter batch processing mode (manual/automatic): ').lower() or 'automatic'\n", "\n", "# Check if the variables are correctly loaded\n", "if not all([AZURE_OPENAI_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_EMBEDDING_DEPLOYMENT, AZURE_OPENAI_CHAT_DEPLOYMENT]):\n", - " raise ValueError(\"Missing required Azure OpenAI variables\")" + " raise ValueError(\"Missing required Azure OpenAI variables\")\n", + "\n", + "if BATCH_PROCESSING_MODE not in ['manual', 'automatic']:\n", + " raise ValueError(\"Invalid batch processing mode\")" ] }, { @@ -221,7 +225,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:05:09,889 - INFO - Successfully connected to Couchbase\n" + "2025-02-05 05:57:16,464 - INFO - Successfully connected to Couchbase\n" ] } ], @@ -263,18 +267,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:05:12,120 - INFO - Collection 'azure' already exists.Skipping creation.\n", - "2025-01-31 14:05:13,317 - INFO - Primary index present or created successfully.\n", - "2025-01-31 14:05:13,628 - INFO - All documents cleared from the collection.\n", - "2025-01-31 14:05:14,763 - INFO - Collection 'cache' already exists.Skipping creation.\n", - "2025-01-31 14:05:15,848 - INFO - Primary index present or created successfully.\n", - "2025-01-31 14:05:16,122 - INFO - All documents cleared from the collection.\n" + "2025-02-05 05:57:16,494 - INFO - Collection 'azure' already exists.Skipping creation.\n", + "2025-02-05 05:57:16,495 - INFO - Primary index present or created successfully.\n", + "2025-02-05 05:57:16,574 - INFO - All documents cleared from the collection.\n", + "2025-02-05 05:57:16,577 - INFO - Collection 'cache' already exists.Skipping creation.\n", + "2025-02-05 05:57:16,578 - INFO - Primary index present or created successfully.\n", + "2025-02-05 05:57:16,579 - INFO - All documents cleared from the collection.\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -415,8 +419,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:05:17,594 - INFO - Index 'vector_search_azure' found\n", - "2025-01-31 14:05:18,525 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" + "2025-02-05 05:57:16,594 - INFO - Index 'vector_search_azure' found\n", + "2025-02-05 05:57:16,603 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" ] } ], @@ -493,7 +497,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:05:18,573 - INFO - Successfully created AzureOpenAIEmbeddings\n" + "2025-02-05 05:57:16,619 - INFO - Successfully created AzureOpenAIEmbeddings\n" ] } ], @@ -534,7 +538,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:05:22,691 - INFO - Successfully created vector store\n" + "2025-02-05 05:57:16,637 - INFO - Successfully created vector store\n" ] } ], @@ -572,7 +576,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:05:28,937 - INFO - Successfully loaded the BBC News dataset with 2687 rows.\n" + "2025-02-05 05:57:23,300 - INFO - Successfully loaded the BBC News dataset with 2687 rows.\n" ] }, { @@ -632,7 +636,13 @@ "metadata": {}, "source": [ "## Saving Data to the Vector Store\n", - "With the Vector store set up, the next step is to populate it with data. We save the BBC articles dataset to the vector store. For each document, we will generate the embeddings for the article to use with the semantic search using LangChain. Here one of the articles is larger than the maximum tokens that we can use for our embedding model. If we want to ingest that document, we could split the document and ingest it in parts. However, since it is only a single document for simplicity, we ignore that document from the ingestion process." + "Now that we have our Vector store configured, we'll populate it with the BBC news articles using either manual or automatic processing. In manual mode, articles are processed in batches of 50, while automatic mode processes all articles at once. Both modes skip articles exceeding 50,000 characters due to the embedding model's token limit. \n", + "\n", + "In manual mode, the code uses tqdm to show a progress bar as articles are processed batch by batch. Each batch is added to the vector store using add_texts(), with error handling for individual articles and batches. Any remaining articles in a partial final batch are also processed.\n", + "\n", + "In automatic mode, the code filters out articles exceeding the character limit and processes all valid articles in a single batch. Both modes use Amazon Bedrock to generate vector embeddings enabling semantic search capabilities.\n", + "\n", + "The user can select the processing mode via input prompt, with logging to track the selected mode and completion status. Invalid mode selections raise an error." ] }, { @@ -644,46 +654,75 @@ "name": "stderr", "output_type": "stream", "text": [ - "Ingesting articles: 40%|███▉ | 697/1749 [07:43<10:44, 1.63it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Skipping article with length 73697 - exceeds 50,000 character limit\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Ingesting articles: 100%|██████████| 1749/1749 [19:01<00:00, 1.53it/s]\n" + "2025-02-05 05:57:35,571 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", + "2025-02-05 05:58:38,436 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", + "2025-02-05 05:59:42,285 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:00:45,114 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:01:47,226 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", + "2025-02-05 06:02:51,387 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:03:54,143 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:04:57,236 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:05:59,886 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:07:02,175 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", + "2025-02-05 06:08:05,576 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:09:12,532 - INFO - Retrying request to /embeddings in 49.000000 seconds\n", + "2025-02-05 06:10:13,974 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:11:16,796 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:12:19,822 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:13:22,688 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", + "2025-02-05 06:14:34,535 - INFO - Automatic processing completed successfully\n" ] } ], "source": [ - "for article in tqdm(unique_news_articles, desc=\"Ingesting articles\"):\n", - " try:\n", - " # Skip articles that exceed the model's token limit (50,000 characters)\n", - " if len(article) > 50000:\n", - " print(f\"Skipping article with length {len(article)} - exceeds 50,000 character limit\")\n", - " continue\n", + "if BATCH_PROCESSING_MODE == \"manual\":\n", + " # Manual batch processing\n", + " batch_size = 50\n", + " texts = []\n", + "\n", + " for article in tqdm(unique_news_articles, desc=\"Processing articles\"):\n", + " try:\n", + " # Skip articles that exceed the model's token limit (50,000 characters)\n", + " if len(article) > 50000:\n", + " print(f\"Skipping article with length {len(article)} - exceeds 50,000 character limit\")\n", + " continue\n", + " \n", + " # Add article to current batch\n", + " texts.append(article)\n", " \n", - " # Convert article into the format needed for add_texts\n", - " texts = [article] # Single article as text\n", - " metadatas = [{}] # Empty metadata dictionary for each article\n", - " uuids = [str(uuid4())] # Generate UUID for the article\n", - " \n", - " # Use add_texts instead of add_documents\n", + " # When batch is full, add to vector store\n", + " if len(texts) >= batch_size:\n", + " vector_store.add_texts(texts=texts, batch_size=batch_size)\n", + " # Clear list for next batch\n", + " texts = []\n", + " \n", + " except Exception as e:\n", + " print(f\"Failed to process article: {str(e)}\")\n", + " continue\n", + "\n", + " # Add any remaining documents in the final batch\n", + " if texts:\n", + " try:\n", + " vector_store.add_texts(texts=texts, batch_size=batch_size)\n", + " except Exception as e:\n", + " print(f\"Failed to save final batch to vector store: {str(e)}\")\n", + " \n", + " logging.info(\"Manual processing completed successfully\")\n", + "\n", + "elif BATCH_PROCESSING_MODE == \"automatic\":\n", + " # Automatic Batch Processing\n", + " articles = [article for article in unique_news_articles if article and len(article) <= 50000]\n", + "\n", + " try:\n", " vector_store.add_texts(\n", - " texts=texts,\n", - " metadatas=metadatas,\n", - " ids=uuids\n", + " texts=articles\n", " )\n", + " logging.info(\"Automatic processing completed successfully\")\n", " except Exception as e:\n", - " print(f\"Failed to save documents to vector store: {str(e)}\")\n", - " continue" + " raise ValueError(f\"Failed to save documents to vector store: {str(e)}\")\n", + "\n", + "else:\n", + " raise ValueError(\"Invalid processing mode. Please enter 'manual' or 'automatic'.\")" ] }, { @@ -713,7 +752,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:24:32,975 - INFO - Successfully created cache\n" + "2025-02-05 06:14:34,558 - INFO - Successfully created cache\n" ] } ], @@ -759,7 +798,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:24:32,997 - INFO - Successfully created Azure OpenAI Chat model\n" + "2025-02-05 06:14:34,584 - INFO - Successfully created Azure OpenAI Chat model\n" ] } ], @@ -803,7 +842,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:24:35,372 - INFO - Semantic search completed in 2.37 seconds\n" + "2025-02-05 06:14:34,920 - INFO - Semantic search completed in 0.33 seconds\n" ] }, { @@ -811,9 +850,9 @@ "output_type": "stream", "text": [ "\n", - "Semantic Search Results (completed in 2.37 seconds):\n", + "Semantic Search Results (completed in 0.33 seconds):\n", "--------------------------------------------------------------------------------\n", - "Score: 0.6858, Text: Manchester City boss Pep Guardiola has won 18 trophies since he arrived at the club in 2016\n", + "Score: 0.6856, Text: Manchester City boss Pep Guardiola has won 18 trophies since he arrived at the club in 2016\n", "\n", "Manchester City boss Pep Guardiola says he is \"fine\" despite admitting his sleep and diet are being affected by the worst run of results in his entire managerial career. In an interview with former Italy international Luca Toni for Amazon Prime Sport before Wednesday's Champions League defeat by Juventus, Guardiola touched on the personal impact City's sudden downturn in form has had. Guardiola said his state of mind was \"ugly\", that his sleep was \"worse\" and he was eating lighter as his digestion had suffered. City go into Sunday's derby against Manchester United at Etihad Stadium having won just one of their past 10 games. The Juventus loss means there is a chance they may not even secure a play-off spot in the Champions League. Asked to elaborate on his comments to Toni, Guardiola said: \"I'm fine. \"In our jobs we always want to do our best or the best as possible. When that doesn't happen you are more uncomfortable than when the situation is going well, always that happened. \"In good moments I am happier but when I get to the next game I am still concerned about what I have to do. There is no human being that makes an activity and it doesn't matter how they do.\" Guardiola said City have to defend better and \"avoid making mistakes at both ends\". To emphasise his point, Guardiola referred back to the third game of City's current run, against a Sporting side managed by Ruben Amorim, who will be in the United dugout at the weekend. City dominated the first half in Lisbon, led thanks to Phil Foden's early effort and looked to be cruising. Instead, they conceded three times in 11 minutes either side of half-time as Sporting eventually ran out 4-1 winners. \"I would like to play the game like we played in Lisbon on Sunday, believe me,\" said Guardiola, who is facing the prospect of only having three fit defenders for the derby as Nathan Ake and Manuel Akanji try to overcome injury concerns. If there is solace for City, it comes from the knowledge United are not exactly flying. Their comeback Europa League victory against Viktoria Plzen on Thursday was their third win of Amorim's short reign so far but only one of those successes has come in the Premier League, where United have lost their past two games against Arsenal and Nottingham Forest. Nevertheless, Guardiola can see improvements already on the red side of the city. \"It's already there,\" he said. \"You see all the patterns, the movements, the runners and the pace. He will do a good job at United, I'm pretty sure of that.\"\n", "\n", @@ -843,7 +882,7 @@ "\n", "Guardiola suggested the serious renewal will wait until the summer but the red flags have been appearing for weeks in the sudden and shocking decline of a team that has lost the aura of invincibility that left many opponents beaten before kick-off in previous years. He has had stated City must \"survive\" this season - whatever qualifies as survival for a club of such rich ambition - but the quest for a record fifth successive Premier League title is surely over as they lie nine points behind leaders Liverpool having played a game more. Their Champions League aspirations are also in jeopardy after another loss, this time against Juventus in Turin. City's squad has been allowed to grow too old together. The insatiable thirst for success seems to have gone, the scales of superiority have fallen away and opponents now sense vulnerability right until the final whistle, as United did here. The manner in which United were able, and felt able, to snatch this victory drove right to the heart of how City, and Guardiola, are allowing opponents to prey on their downfall. Guardiola has every reason to cite injuries, most significantly to Rodri and also John Stones as well as others, but this cannot be used an excuse for such a dramatic decline in standards, allied to the appearance of a soft underbelly that is so easily exploited. And City's rebuild will not be a quick fix. With every performance, every defeat, the scale of what lies in front of Guardiola becomes more obvious - and daunting. Manchester City's fans did their best to reassure Guardiola of their faith in him with a giant Barcelona-inspired banner draped from the stands before kick-off emblazoned with his image reading \"Més que un entrenador\" - \"More Than A Coach\". And Guardiola will now need to be more than a coach than at any time in his career. He will have the finances but it will be done with City's challengers also strengthening. Kevin de Bruyne, 34 in June, lasted 68 minutes here before he was substituted. Age and injuries are catching up with one of the greatest players of the Premier League era and he is unlikely to be at City next season. Mateo Kovacic, who replaced De Bruyne, is also 31 in May. Kyle Walker, 34, is being increasingly exposed. His most notable contribution here was an embarrassing collapse to the ground after the mildest head-to-head collision with Rasmus Hojlund. Ilkay Gundogan, another 34-year-old and a previous pillar of Guardiola's great successes, no longer has the legs or energy to exert influence. This looks increasingly like a season too far following his return from Barcelona. Flaws are also being exposed elsewhere, with previously reliable performers failing to hit previous standards. Phil Foden scored 27 goals and had 12 assists when he was Premier League Player of the Season last term. This year he has just three goals and two assists in 18 appearances in all competitions. He has no goals and just one assist in 11 Premier League games. Jack Grealish, who came on after 77 minutes against United, has not scored in a year for Manchester City, his last goal coming in a 2-2 draw against Crystal Palace on 16 December last year. He has, in the meantime, scored twice for England. Erling Haaland is also struggling as City lack creativity and cutting edge. He has three goals in his past 11 Premier League games after scoring 10 in his first five. And in another indication of City's impotence, and their reliance on Haaland, defender Gvardiol's goal against United was his fourth this season, making him their second highest scorer in all competitions behind the Norwegian striker, who has 18. Goalkeeper Ederson, so reliable for so long, has already been dropped once this season and did not cover himself in glory for United's winner. Guardiola, with that freshly signed two-year contract, insists he \"wants it\" as he treads on this alien territory of failure. He will be under no illusions about the size of the job in front of him as he placed his head in his hands in anguish after yet another damaging and deeply revealing defeat. City and Guardiola are in new, unforgiving territory.\n", "--------------------------------------------------------------------------------\n", - "Score: 0.6066, Text: Pep Guardiola has said Manchester City will be his final managerial job in club football before he \"maybe\" coaches a national team.\n", + "Score: 0.6076, Text: Pep Guardiola has said Manchester City will be his final managerial job in club football before he \"maybe\" coaches a national team.\n", "\n", "The former Barcelona and Bayern Munich boss has won 15 major trophies since taking charge of City in 2016.\n", "\n", @@ -887,7 +926,7 @@ "\n", "Guardiola's side host Everton on Boxing Day, before a trip to Leicester on 29 December and a home match against West Ham on 4 January. Given all three opponents are in the bottom seven, it offers City a chance to improve on an appalling recent record of four points from eight games, which Guardiola acknowledges has left their lofty European ambitions in doubt. \"The one team that has been in the Champions League for the past years has been Manchester City,\" he added. \"Now we are at risk, of course we are. Definitely.\" Arsenal, Chelsea, Liverpool and Manchester United finished in the Premier League's top four from the 2005-06 season to 2008-09. At least three of them also occupied the top four spots for 15 successive campaigns until 2012. But United have spent five out of the past 11 seasons outside the Champions League. Arsenal spent six seasons out of the competition before returning last term. Liverpool missed out all but one year in seven from 2010, while Chelsea are in their second successive campaign outside Europe's elite. This term the threat to City comes from unexpected sources. As well as Forest and Bournemouth, Aston Villa are ahead of City, while Newcastle, Fulham and Brighton are also within a couple of points. \"There are a lot of contenders,\" said Guardiola, whose side have lost nine of their last 12 games in all competitions. \"For every club it is so important and if we are not winning games, we will be out. \"If we don't qualify it is because we don't deserve it, because we were not prepared and because we had a lot of problems and didn't solve them.\"\n", "--------------------------------------------------------------------------------\n", - "Score: 0.5516, Text: Man City's Dias ruled out for 'three or four weeks'\n", + "Score: 0.5504, Text: Man City's Dias ruled out for 'three or four weeks'\n", "\n", "Ruben Dias has won 10 major trophies during his time at Manchester City\n", "\n", @@ -895,7 +934,7 @@ "\n", "Guardiola's mood was so downbeat in the immediate aftermath of the United defeat it was easy to imagine he might conclude he was no longer capable of doing the job. He gave his players a couple of days off afterwards and was brighter when he spoke to journalists in his scheduled briefing before the Villa trip. \"We'd just finished a game that we lost in the circumstances and I was not happy,\" he said. \"I try to be honest about the feelings of my teams. We fell down six times [number of Premier League games without a win], we have to stand up seven. There is no alternative. \"I'm fine. I'm a normal person with feelings like all of us. When the situation is going well we are better but it's normal. I would not go to the press conference if we were 1-0 up and expressing something that I didn't feel.\" Former Villa forward Jack Grealish has not scored for City in over a year but Guardiola pointed out he is not the only attacking player struggling this season. He added: \"We are struggling to create a little bit up front, but always I am optimistic about my players that they are going to turn and perform well.\"\n", "--------------------------------------------------------------------------------\n", - "Score: 0.5148, Text: 'Life is not easy' - Haaland penalty miss sums up Man City crisis\n", + "Score: 0.5146, Text: 'Life is not easy' - Haaland penalty miss sums up Man City crisis\n", "\n", "Manchester City striker Erling Haaland has now missed two of his 17 penalties taken in the Premier League\n", "\n", @@ -911,7 +950,7 @@ "\n", "This video can not be played To play this video you need to enable JavaScript in your browser. Dyche 'very pleased' with Everton defence in Man City draw\n", "--------------------------------------------------------------------------------\n", - "Score: 0.4715, Text: 'So happy he is back' - 'integral' De Bruyne 'one of best we've seen'\n", + "Score: 0.4713, Text: 'So happy he is back' - 'integral' De Bruyne 'one of best we've seen'\n", "\n", "This video can not be played To play this video you need to enable JavaScript in your browser. Match of the Day: How Kevin de Bruyne inspired Man City back to winning ways\n", "\n", @@ -923,7 +962,7 @@ "\n", "City are now unbeaten in their past 31 Premier League games with De Bruyne starting. He has been involved in 25 goals (nine goals, 16 assists) in those games. De Bruyne said: \"There have never been issues between me and Pep. He knows I've been struggling. It's painful and uncomfortable. \"Hopefully I can get back to my body with not much pain and then I'll be fine.\" However, a bit like the victory itself - tarnished by an injury to Manuel Akanji that may rule the Switzerland defender out of the weekend trip to Crystal Palace, and a hamstring problem for Nathan Ake who has already missed five weeks with a similar injury this season that Guardiola said \"doesn’t look good\" and makes him feel \"sad\" for the Dutchman - there was a caveat as the City boss assessed De Bruyne's contribution. \"He fought a lot and he prepared himself,\" he said. \"He is back to his physicality. The minutes he played at Anfield were really good. \"Last season he was out for many months, this season as well. We will see how he recovers after a long time injured and how he feels in three days.\" De Bruyne recorded four or more shots and created four or more chances for the third time in a Premier League game this season. Despite only starting five Premier League games, only Arsenal's Bukayo Saka has done so more often this term. Guardiola's fear must be that if he pushes De Bruyne too far too quickly, his body will let him down. Former Man City defender Micah Richards told BBC Match of the Day: \"He is a top-quality player and one of the best we have seen. He always manages to find space on the pitch.\" \"He has been integral to Man City's success over a number of years,\" added former City boss Stuart Pearce on Amazon Prime. \"He is the go-to player that sets Erling Haaland alight with his passing. He creates goals, he scores goals. \"If you were to pick one player out over the last eight or nine years De Bruyne would be at the top of almost everyone's list.\" It is a delicate balance given his team are still nine points adrift of Premier League leaders Liverpool and are also outside the Champions League top eight before next week's trip to Italy and a meeting with Juventus, after which City will only have January first-phase games remaining to ensure they secure qualification for the last-16 without needing to be bothered by February's play-off round. However, as with his team, De Bruyne's recovery had to start somewhere.\n", "--------------------------------------------------------------------------------\n", - "Score: 0.4524, Text: Manchester United manager Ruben Amorim has questioned the \"choices\" of people close to forward Marcus Rashford. Rashford, 27, said he was \"ready for a new challenge\" in an interview after being dropped for the 2-1 win at Manchester City on 15 December. The England international subsequently missed the 4-3 Carabao Cup quarter-final defeat by Tottenham and was also left out as United lost 3-0 to Bournemouth on Sunday, adding to speculation he could leave Old Trafford in January. \"I understand these players have a lot of people around them, making choices that are not the first idea from the player. \"They chose to do the interview as it is not just Marcus.\"\n", + "Score: 0.4525, Text: Manchester United manager Ruben Amorim has questioned the \"choices\" of people close to forward Marcus Rashford. Rashford, 27, said he was \"ready for a new challenge\" in an interview after being dropped for the 2-1 win at Manchester City on 15 December. The England international subsequently missed the 4-3 Carabao Cup quarter-final defeat by Tottenham and was also left out as United lost 3-0 to Bournemouth on Sunday, adding to speculation he could leave Old Trafford in January. \"I understand these players have a lot of people around them, making choices that are not the first idea from the player. \"They chose to do the interview as it is not just Marcus.\"\n", "\n", "This video can not be played To play this video you need to enable JavaScript in your browser. 'He wants to play. He is trying'\n", "\n", @@ -983,7 +1022,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-31 14:24:35,381 - INFO - Successfully created RAG chain\n" + "2025-02-05 06:14:34,931 - INFO - Successfully created RAG chain\n" ] } ], @@ -1016,8 +1055,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "RAG Response: Pep Guardiola expressed significant concern and frustration about Manchester City's current form, describing the situation as one of the worst periods in his managerial career. He admitted his state of mind has been affected, with his sleep and diet suffering as a result. Guardiola acknowledged the dramatic drop in the team’s performance, citing defensive vulnerabilities, lack of creativity, and subpar individual performances as key issues. He also took responsibility for the team’s struggles, saying, “I am not good enough. I am the manager. I have to find solutions and so far I haven’t.” Despite the challenges, he expressed trust in his players and vowed to work step by step to find a way back to their best form.\n", - "RAG response generated in 5.77 seconds\n" + "RAG Response: Pep Guardiola expressed significant concern and frustration regarding Manchester City's current form, which he described as one of the worst runs of his managerial career. He admitted to feeling personally affected, noting that his sleep, diet, and overall state of mind had suffered during the team's downturn in results. Guardiola repeatedly emphasized the need to find solutions and took responsibility for the situation, stating, \"I am not good enough\" and acknowledging the necessity for a \"step-by-step\" recovery. Despite the struggles, he maintained confidence in his players and highlighted injuries, defensive mistakes, and a lack of creativity as contributing factors to the decline. He also recognized the daunting challenge of a potential rebuild, given the team's aging squad and loss of dominance.\n", + "RAG response generated in 3.66 seconds\n" ] } ], @@ -1051,7 +1090,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1066,16 +1105,20 @@ "text": [ "\n", "Query 1: What happened in the match between Fullham and Liverpool?\n", - "Response: In the Premier League match between Fulham and Liverpool at Anfield, the game ended in an intense 2-2 draw. Liverpool played almost the entire match with ten men after Andy Robertson received a red card in the 17th minute for denying a goalscoring opportunity. Despite their numerical disadvantage, Liverpool showed incredible resilience, coming from behind twice, with Diogo Jota scoring a late equaliser in the 86th minute. Liverpool dominated possession and key attacking metrics, earning praise for their performance. Fulham also played bravely, with their left-back, Antonee Robinson, noting that Liverpool’s efforts made it feel like they had a full squad on the field. The match was seen as a testament to Liverpool's title credentials under head coach Arne Slot.\n", - "Time taken: 6.66 seconds\n", + "Response: In the match between Fulham and Liverpool, the two teams played to an exciting 2-2 draw in the Premier League at Anfield. Liverpool played nearly the entire match with ten men after Andy Robertson was shown a red card in the 17th minute for denying a goalscoring opportunity. Despite being a man down, Liverpool twice came from behind to secure the draw, with an 86th-minute equaliser from Diogo Jota highlighting their resilience.\n", + "\n", + "Liverpool's performance was widely praised for their \"phenomenal\" effort in the face of adversity, maintaining over 60% possession and leading in key attacking metrics. Fulham, enjoying their best Premier League start since 2003, also showed bravery and strong performance, with their left-back Antonee Robinson noting it \"didn't feel like [Liverpool] had 10 men at all.\"\n", + "Time taken: 9.48 seconds\n", "\n", "Query 2: What was manchester city manager pep guardiola's reaction to the team's current form?\n", - "Response: Pep Guardiola expressed significant concern and frustration about Manchester City's current form, describing the situation as one of the worst periods in his managerial career. He admitted his state of mind has been affected, with his sleep and diet suffering as a result. Guardiola acknowledged the dramatic drop in the team’s performance, citing defensive vulnerabilities, lack of creativity, and subpar individual performances as key issues. He also took responsibility for the team’s struggles, saying, “I am not good enough. I am the manager. I have to find solutions and so far I haven’t.” Despite the challenges, he expressed trust in his players and vowed to work step by step to find a way back to their best form.\n", - "Time taken: 3.47 seconds\n", + "Response: Pep Guardiola expressed significant concern and frustration regarding Manchester City's current form, which he described as one of the worst runs of his managerial career. He admitted to feeling personally affected, noting that his sleep, diet, and overall state of mind had suffered during the team's downturn in results. Guardiola repeatedly emphasized the need to find solutions and took responsibility for the situation, stating, \"I am not good enough\" and acknowledging the necessity for a \"step-by-step\" recovery. Despite the struggles, he maintained confidence in his players and highlighted injuries, defensive mistakes, and a lack of creativity as contributing factors to the decline. He also recognized the daunting challenge of a potential rebuild, given the team's aging squad and loss of dominance.\n", + "Time taken: 1.12 seconds\n", "\n", "Query 3: What happened in the match between Fullham and Liverpool?\n", - "Response: In the Premier League match between Fulham and Liverpool at Anfield, the game ended in an intense 2-2 draw. Liverpool played almost the entire match with ten men after Andy Robertson received a red card in the 17th minute for denying a goalscoring opportunity. Despite their numerical disadvantage, Liverpool showed incredible resilience, coming from behind twice, with Diogo Jota scoring a late equaliser in the 86th minute. Liverpool dominated possession and key attacking metrics, earning praise for their performance. Fulham also played bravely, with their left-back, Antonee Robinson, noting that Liverpool’s efforts made it feel like they had a full squad on the field. The match was seen as a testament to Liverpool's title credentials under head coach Arne Slot.\n", - "Time taken: 1.82 seconds\n" + "Response: In the match between Fulham and Liverpool, the two teams played to an exciting 2-2 draw in the Premier League at Anfield. Liverpool played nearly the entire match with ten men after Andy Robertson was shown a red card in the 17th minute for denying a goalscoring opportunity. Despite being a man down, Liverpool twice came from behind to secure the draw, with an 86th-minute equaliser from Diogo Jota highlighting their resilience.\n", + "\n", + "Liverpool's performance was widely praised for their \"phenomenal\" effort in the face of adversity, maintaining over 60% possession and leading in key attacking metrics. Fulham, enjoying their best Premier League start since 2003, also showed bravery and strong performance, with their left-back Antonee Robinson noting it \"didn't feel like [Liverpool] had 10 men at all.\"\n", + "Time taken: 0.30 seconds\n" ] } ], From de27ebd0d3c4745433c7732f00a923907320e025 Mon Sep 17 00:00:00 2001 From: Kaustav Ghosh Date: Wed, 5 Feb 2025 23:54:59 +0530 Subject: [PATCH 4/4] Refactor Azure RAG notebook with improved collection setup and error handling - Updated collection setup function to handle bucket, scope, and collection creation dynamically - Reduced embedding dimensions from 3072 to 1536 in vector search index - Simplified error handling in search index and RAG chain creation - Removed unnecessary imports and batch processing mode configuration - Enhanced logging and error messages for better debugging - Improved overall code readability and error resilience --- .../RAG_with_Couchbase_and_AzureOpenAI.ipynb | 400 +++++++++--------- azure/azure_index.json | 4 +- 2 files changed, 211 insertions(+), 193 deletions(-) diff --git a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb index ef776424..08f7401a 100644 --- a/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb +++ b/azure/RAG_with_Couchbase_and_AzureOpenAI.ipynb @@ -100,16 +100,15 @@ "import json\n", "import logging\n", "import os\n", - "import sys\n", "import time\n", "from datetime import timedelta\n", - "from uuid import uuid4\n", "\n", "from couchbase.auth import PasswordAuthenticator\n", "from couchbase.cluster import Cluster\n", "from couchbase.exceptions import (CouchbaseException,\n", - " InternalServerFailureException,\n", - " QueryIndexAlreadyExistsException)\n", + " InternalServerFailureException,\n", + " QueryIndexAlreadyExistsException,ServiceUnavailableException)\n", + "from couchbase.management.buckets import CreateBucketSettings\n", "from couchbase.management.search import SearchIndex\n", "from couchbase.options import ClusterOptions\n", "from datasets import load_dataset\n", @@ -121,8 +120,7 @@ "from langchain_couchbase.cache import CouchbaseCache\n", "from langchain_couchbase.vectorstores import CouchbaseVectorStore\n", "from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings\n", - "from openai import NotFoundError\n", - "from tqdm import tqdm" + "from openai import NotFoundError" ] }, { @@ -189,14 +187,10 @@ "SCOPE_NAME = os.getenv('SCOPE_NAME') or input('Enter your scope name (default: shared): ') or 'shared'\n", "COLLECTION_NAME = os.getenv('COLLECTION_NAME') or input('Enter your collection name (default: azure): ') or 'azure'\n", "CACHE_COLLECTION = os.getenv('CACHE_COLLECTION') or input('Enter your cache collection name (default: cache): ') or 'cache'\n", - "BATCH_PROCESSING_MODE = os.getenv('BATCH_PROCESSING_MODE') or input('Enter batch processing mode (manual/automatic): ').lower() or 'automatic'\n", "\n", "# Check if the variables are correctly loaded\n", "if not all([AZURE_OPENAI_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_EMBEDDING_DEPLOYMENT, AZURE_OPENAI_CHAT_DEPLOYMENT]):\n", - " raise ValueError(\"Missing required Azure OpenAI variables\")\n", - "\n", - "if BATCH_PROCESSING_MODE not in ['manual', 'automatic']:\n", - " raise ValueError(\"Invalid batch processing mode\")" + " raise ValueError(\"Missing required Azure OpenAI variables\")" ] }, { @@ -225,7 +219,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:16,464 - INFO - Successfully connected to Couchbase\n" + "2025-02-05 23:32:31,737 - INFO - Successfully connected to Couchbase\n" ] } ], @@ -246,10 +240,31 @@ "id": "C_Gpy32N8mcZ" }, "source": [ - "# Setting Up Collections in Couchbase\n", - "In Couchbase, data is organized in buckets, which can be further divided into scopes and collections. Think of a collection as a table in a traditional SQL database. Before we can store any data, we need to ensure that our collections exist. If they don't, we must create them. This step is important because it prepares the database to handle the specific types of data our application will process. By setting up collections, we define the structure of our data storage, which is essential for efficient data retrieval and management.\n", + "## Setting Up Collections in Couchbase\n", + "\n", + "The setup_collection() function handles creating and configuring the hierarchical data organization in Couchbase:\n", + "\n", + "1. Bucket Creation:\n", + " - Checks if specified bucket exists, creates it if not\n", + " - Sets bucket properties like RAM quota (1024MB) and replication (disabled)\n", + "\n", + "2. Scope Management: \n", + " - Verifies if requested scope exists within bucket\n", + " - Creates new scope if needed (unless it's the default \"_default\" scope)\n", "\n", - "Moreover, setting up collections allows us to isolate different types of data within the same bucket, providing a more organized and scalable data structure. This is particularly useful when dealing with large datasets, as it ensures that related data is stored together, making it easier to manage and query." + "3. Collection Setup:\n", + " - Checks for collection existence within scope\n", + " - Creates collection if it doesn't exist\n", + " - Waits 2 seconds for collection to be ready\n", + "\n", + "Additional Tasks:\n", + "- Creates primary index on collection for query performance\n", + "- Clears any existing documents for clean state\n", + "- Implements comprehensive error handling and logging\n", + "\n", + "The function is called twice to set up:\n", + "1. Main collection for vector embeddings\n", + "2. Cache collection for storing results\n" ] }, { @@ -267,18 +282,25 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:16,494 - INFO - Collection 'azure' already exists.Skipping creation.\n", - "2025-02-05 05:57:16,495 - INFO - Primary index present or created successfully.\n", - "2025-02-05 05:57:16,574 - INFO - All documents cleared from the collection.\n", - "2025-02-05 05:57:16,577 - INFO - Collection 'cache' already exists.Skipping creation.\n", - "2025-02-05 05:57:16,578 - INFO - Primary index present or created successfully.\n", - "2025-02-05 05:57:16,579 - INFO - All documents cleared from the collection.\n" + "2025-02-05 23:32:31,797 - INFO - Bucket 'vector-search-testing' does not exist. Creating it...\n", + "2025-02-05 23:32:32,384 - INFO - Bucket 'vector-search-testing' created successfully.\n", + "2025-02-05 23:32:32,393 - INFO - Scope 'shared' does not exist. Creating it...\n", + "2025-02-05 23:32:32,450 - INFO - Scope 'shared' created successfully.\n", + "2025-02-05 23:32:32,462 - INFO - Collection 'azure' does not exist. Creating it...\n", + "2025-02-05 23:32:32,532 - INFO - Collection 'azure' created successfully.\n", + "2025-02-05 23:32:37,006 - INFO - Primary index present or created successfully.\n", + "2025-02-05 23:32:41,769 - INFO - All documents cleared from the collection.\n", + "2025-02-05 23:32:41,771 - INFO - Bucket 'vector-search-testing' exists.\n", + "2025-02-05 23:32:41,783 - INFO - Collection 'cache' does not exist. Creating it...\n", + "2025-02-05 23:32:41,835 - INFO - Collection 'cache' created successfully.\n", + "2025-02-05 23:32:48,115 - INFO - Primary index present or created successfully.\n", + "2025-02-05 23:32:48,126 - INFO - All documents cleared from the collection.\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -289,9 +311,34 @@ "source": [ "def setup_collection(cluster, bucket_name, scope_name, collection_name):\n", " try:\n", - " bucket = cluster.bucket(bucket_name)\n", + " # Check if bucket exists, create if it doesn't\n", + " try:\n", + " bucket = cluster.bucket(bucket_name)\n", + " logging.info(f\"Bucket '{bucket_name}' exists.\")\n", + " except Exception as e:\n", + " logging.info(f\"Bucket '{bucket_name}' does not exist. Creating it...\")\n", + " bucket_settings = CreateBucketSettings(\n", + " name=bucket_name,\n", + " bucket_type='couchbase',\n", + " ram_quota_mb=1024,\n", + " flush_enabled=True,\n", + " num_replicas=0\n", + " )\n", + " cluster.buckets().create_bucket(bucket_settings)\n", + " bucket = cluster.bucket(bucket_name)\n", + " logging.info(f\"Bucket '{bucket_name}' created successfully.\")\n", + "\n", " bucket_manager = bucket.collections()\n", "\n", + " # Check if scope exists, create if it doesn't\n", + " scopes = bucket_manager.get_all_scopes()\n", + " scope_exists = any(scope.name == scope_name for scope in scopes)\n", + " \n", + " if not scope_exists and scope_name != \"_default\":\n", + " logging.info(f\"Scope '{scope_name}' does not exist. Creating it...\")\n", + " bucket_manager.create_scope(scope_name)\n", + " logging.info(f\"Scope '{scope_name}' created successfully.\")\n", + "\n", " # Check if collection exists, create if it doesn't\n", " collections = bucket_manager.get_all_scopes()\n", " collection_exists = any(\n", @@ -304,9 +351,11 @@ " bucket_manager.create_collection(scope_name, collection_name)\n", " logging.info(f\"Collection '{collection_name}' created successfully.\")\n", " else:\n", - " logging.info(f\"Collection '{collection_name}' already exists.Skipping creation.\")\n", + " logging.info(f\"Collection '{collection_name}' already exists. Skipping creation.\")\n", "\n", + " # Wait for collection to be ready\n", " collection = bucket.scope(scope_name).collection(collection_name)\n", + " time.sleep(2) # Give the collection time to be ready for queries\n", "\n", " # Ensure primary index exists\n", " try:\n", @@ -326,9 +375,9 @@ " return collection\n", " except Exception as e:\n", " raise RuntimeError(f\"Error setting up collection: {str(e)}\")\n", - "\n", + " \n", "setup_collection(cluster, CB_BUCKET_NAME, SCOPE_NAME, COLLECTION_NAME)\n", - "setup_collection(cluster, CB_BUCKET_NAME, SCOPE_NAME, CACHE_COLLECTION)" + "setup_collection(cluster, CB_BUCKET_NAME, SCOPE_NAME, CACHE_COLLECTION)\n" ] }, { @@ -341,7 +390,7 @@ "\n", "Semantic search requires an efficient way to retrieve relevant documents based on a user's query. This is where the Couchbase **Vector Search Index** comes into play. In this step, we load the Vector Search Index definition from a JSON file, which specifies how the index should be structured. This includes the fields to be indexed, the dimensions of the vectors, and other parameters that determine how the search engine processes queries based on vector similarity.\n", "\n", - "This Azure vector search index configuration requires specific default settings to function properly. This tutorial uses the bucket named `vector-search-testing` with the scope `shared` and collection `azure`. The configuration is set up for vectors with exactly `3072 dimensions`, using dot product similarity and optimized for recall. If you want to use a different bucket, scope, or collection, you will need to modify the index configuration accordingly.\n", + "This Azure vector search index configuration requires specific default settings to function properly. This tutorial uses the bucket named `vector-search-testing` with the scope `shared` and collection `azure`. The configuration is set up for vectors with exactly `1536 dimensions`, using dot product similarity and optimized for recall. If you want to use a different bucket, scope, or collection, you will need to modify the index configuration accordingly.\n", "\n", "For more information on creating a vector search index, please follow the [instructions](https://docs.couchbase.com/cloud/vector-search/create-vector-search-index-ui.html).\n" ] @@ -419,8 +468,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:16,594 - INFO - Index 'vector_search_azure' found\n", - "2025-02-05 05:57:16,603 - INFO - Index 'vector_search_azure' already exists. Skipping creation/update.\n" + "2025-02-05 23:32:48,217 - INFO - Creating new index 'vector_search_azure'...\n", + "2025-02-05 23:32:48,398 - INFO - Index 'vector_search_azure' successfully created/updated.\n" ] } ], @@ -446,29 +495,11 @@ "\n", "except QueryIndexAlreadyExistsException:\n", " logging.info(f\"Index '{index_name}' already exists. Skipping creation/update.\")\n", - "\n", + "except ServiceUnavailableException:\n", + " raise RuntimeError(\"Search service is not available. Please ensure the Search service is enabled in your Couchbase cluster.\")\n", "except InternalServerFailureException as e:\n", - " error_message = str(e)\n", - " logging.error(f\"InternalServerFailureException raised: {error_message}\")\n", - "\n", - " try:\n", - " # Accessing the response_body attribute from the context\n", - " error_context = e.context\n", - " response_body = error_context.response_body\n", - " if response_body:\n", - " error_details = json.loads(response_body)\n", - " error_message = error_details.get('error', '')\n", - "\n", - " if \"collection: 'azure' doesn't belong to scope: 'shared'\" in error_message:\n", - " raise ValueError(\"Collection 'azure' does not belong to scope 'shared'. Please check the collection and scope names.\")\n", - "\n", - " except ValueError as ve:\n", - " logging.error(str(ve))\n", - " raise\n", - "\n", - " except Exception as json_error:\n", - " logging.error(f\"Failed to parse the error message: {json_error}\")\n", - " raise RuntimeError(f\"Internal server error while creating/updating search index: {error_message}\")" + " logging.error(f\"Internal server error: {str(e)}\")\n", + " raise" ] }, { @@ -497,7 +528,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:16,619 - INFO - Successfully created AzureOpenAIEmbeddings\n" + "2025-02-05 23:32:48,749 - INFO - Successfully created AzureOpenAIEmbeddings\n" ] } ], @@ -538,7 +569,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:16,637 - INFO - Successfully created vector store\n" + "2025-02-05 23:32:48,828 - INFO - Successfully created vector store\n" ] } ], @@ -576,7 +607,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:23,300 - INFO - Successfully loaded the BBC News dataset with 2687 rows.\n" + "2025-02-05 23:32:56,920 - INFO - Successfully loaded the BBC News dataset with 2687 rows.\n" ] }, { @@ -636,13 +667,24 @@ "metadata": {}, "source": [ "## Saving Data to the Vector Store\n", - "Now that we have our Vector store configured, we'll populate it with the BBC news articles using either manual or automatic processing. In manual mode, articles are processed in batches of 50, while automatic mode processes all articles at once. Both modes skip articles exceeding 50,000 characters due to the embedding model's token limit. \n", + "To efficiently handle the large number of articles, we process them in batches of 50 articles at a time. This batch processing approach helps manage memory usage and provides better control over the ingestion process.\n", "\n", - "In manual mode, the code uses tqdm to show a progress bar as articles are processed batch by batch. Each batch is added to the vector store using add_texts(), with error handling for individual articles and batches. Any remaining articles in a partial final batch are also processed.\n", + "We first filter out any articles that exceed 50,000 characters to avoid potential issues with token limits. Then, using the vector store's add_texts method, we add the filtered articles to our vector database. The batch_size parameter controls how many articles are processed in each iteration.\n", "\n", - "In automatic mode, the code filters out articles exceeding the character limit and processes all valid articles in a single batch. Both modes use Amazon Bedrock to generate vector embeddings enabling semantic search capabilities.\n", + "This approach offers several benefits:\n", + "1. Memory Efficiency: Processing in smaller batches prevents memory overload\n", + "2. Error Handling: If an error occurs, only the current batch is affected\n", + "3. Progress Tracking: Easier to monitor and track the ingestion progress\n", + "4. Resource Management: Better control over CPU and network resource utilization\n", "\n", - "The user can select the processing mode via input prompt, with logging to track the selected mode and completion status. Invalid mode selections raise an error." + "We use a conservative batch size of 10 to ensure reliable operation.\n", + "The optimal batch size depends on many factors including:\n", + "- Document sizes being inserted\n", + "- Available system resources\n", + "- Network conditions\n", + "- Concurrent workload\n", + "\n", + "Consider measuring performance with your specific workload before adjusting.\n" ] }, { @@ -654,75 +696,40 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 05:57:35,571 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", - "2025-02-05 05:58:38,436 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", - "2025-02-05 05:59:42,285 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:00:45,114 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:01:47,226 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", - "2025-02-05 06:02:51,387 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:03:54,143 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:04:57,236 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:05:59,886 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:07:02,175 - INFO - Retrying request to /embeddings in 51.000000 seconds\n", - "2025-02-05 06:08:05,576 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:09:12,532 - INFO - Retrying request to /embeddings in 49.000000 seconds\n", - "2025-02-05 06:10:13,974 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:11:16,796 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:12:19,822 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:13:22,688 - INFO - Retrying request to /embeddings in 50.000000 seconds\n", - "2025-02-05 06:14:34,535 - INFO - Automatic processing completed successfully\n" + "2025-02-05 23:33:09,629 - INFO - Retrying request to /embeddings in 52.000000 seconds\n", + "2025-02-05 23:34:12,707 - INFO - Retrying request to /embeddings in 53.000000 seconds\n", + "2025-02-05 23:35:15,367 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:36:20,388 - INFO - Retrying request to /embeddings in 52.000000 seconds\n", + "2025-02-05 23:37:21,859 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:38:27,398 - INFO - Retrying request to /embeddings in 53.000000 seconds\n", + "2025-02-05 23:39:30,662 - INFO - Retrying request to /embeddings in 52.000000 seconds\n", + "2025-02-05 23:40:31,879 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:41:35,448 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:42:38,597 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:43:42,440 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:44:45,903 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:45:50,230 - INFO - Retrying request to /embeddings in 53.000000 seconds\n", + "2025-02-05 23:46:52,044 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:47:55,416 - INFO - Retrying request to /embeddings in 54.000000 seconds\n", + "2025-02-05 23:48:59,242 - INFO - Retrying request to /embeddings in 53.000000 seconds\n", + "2025-02-05 23:50:02,868 - INFO - Document ingestion completed successfully.\n" ] } ], "source": [ - "if BATCH_PROCESSING_MODE == \"manual\":\n", - " # Manual batch processing\n", - " batch_size = 50\n", - " texts = []\n", - "\n", - " for article in tqdm(unique_news_articles, desc=\"Processing articles\"):\n", - " try:\n", - " # Skip articles that exceed the model's token limit (50,000 characters)\n", - " if len(article) > 50000:\n", - " print(f\"Skipping article with length {len(article)} - exceeds 50,000 character limit\")\n", - " continue\n", - " \n", - " # Add article to current batch\n", - " texts.append(article)\n", - " \n", - " # When batch is full, add to vector store\n", - " if len(texts) >= batch_size:\n", - " vector_store.add_texts(texts=texts, batch_size=batch_size)\n", - " # Clear list for next batch\n", - " texts = []\n", - " \n", - " except Exception as e:\n", - " print(f\"Failed to process article: {str(e)}\")\n", - " continue\n", + "batch_size = 100\n", "\n", - " # Add any remaining documents in the final batch\n", - " if texts:\n", - " try:\n", - " vector_store.add_texts(texts=texts, batch_size=batch_size)\n", - " except Exception as e:\n", - " print(f\"Failed to save final batch to vector store: {str(e)}\")\n", - " \n", - " logging.info(\"Manual processing completed successfully\")\n", - "\n", - "elif BATCH_PROCESSING_MODE == \"automatic\":\n", - " # Automatic Batch Processing\n", - " articles = [article for article in unique_news_articles if article and len(article) <= 50000]\n", - "\n", - " try:\n", - " vector_store.add_texts(\n", - " texts=articles\n", - " )\n", - " logging.info(\"Automatic processing completed successfully\")\n", - " except Exception as e:\n", - " raise ValueError(f\"Failed to save documents to vector store: {str(e)}\")\n", + "# Automatic Batch Processing\n", + "articles = [article for article in unique_news_articles if article and len(article) <= 50000]\n", "\n", - "else:\n", - " raise ValueError(\"Invalid processing mode. Please enter 'manual' or 'automatic'.\")" + "try:\n", + " vector_store.add_texts(\n", + " texts=articles,\n", + " batch_size=batch_size\n", + " )\n", + " logging.info(\"Document ingestion completed successfully.\")\n", + "except Exception as e:\n", + " raise ValueError(f\"Failed to save documents to vector store: {str(e)}\")\n" ] }, { @@ -752,7 +759,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 06:14:34,558 - INFO - Successfully created cache\n" + "2025-02-05 23:50:02,916 - INFO - Successfully created cache\n" ] } ], @@ -798,7 +805,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 06:14:34,584 - INFO - Successfully created Azure OpenAI Chat model\n" + "2025-02-05 23:50:03,057 - INFO - Successfully created Azure OpenAI Chat model\n" ] } ], @@ -842,7 +849,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 06:14:34,920 - INFO - Semantic search completed in 0.33 seconds\n" + "2025-02-05 23:50:03,409 - INFO - Semantic search completed in 0.32 seconds\n" ] }, { @@ -850,39 +857,15 @@ "output_type": "stream", "text": [ "\n", - "Semantic Search Results (completed in 0.33 seconds):\n", + "Semantic Search Results (completed in 0.32 seconds):\n", "--------------------------------------------------------------------------------\n", - "Score: 0.6856, Text: Manchester City boss Pep Guardiola has won 18 trophies since he arrived at the club in 2016\n", + "Score: 0.7044, Text: Manchester City boss Pep Guardiola has won 18 trophies since he arrived at the club in 2016\n", "\n", "Manchester City boss Pep Guardiola says he is \"fine\" despite admitting his sleep and diet are being affected by the worst run of results in his entire managerial career. In an interview with former Italy international Luca Toni for Amazon Prime Sport before Wednesday's Champions League defeat by Juventus, Guardiola touched on the personal impact City's sudden downturn in form has had. Guardiola said his state of mind was \"ugly\", that his sleep was \"worse\" and he was eating lighter as his digestion had suffered. City go into Sunday's derby against Manchester United at Etihad Stadium having won just one of their past 10 games. The Juventus loss means there is a chance they may not even secure a play-off spot in the Champions League. Asked to elaborate on his comments to Toni, Guardiola said: \"I'm fine. \"In our jobs we always want to do our best or the best as possible. When that doesn't happen you are more uncomfortable than when the situation is going well, always that happened. \"In good moments I am happier but when I get to the next game I am still concerned about what I have to do. There is no human being that makes an activity and it doesn't matter how they do.\" Guardiola said City have to defend better and \"avoid making mistakes at both ends\". To emphasise his point, Guardiola referred back to the third game of City's current run, against a Sporting side managed by Ruben Amorim, who will be in the United dugout at the weekend. City dominated the first half in Lisbon, led thanks to Phil Foden's early effort and looked to be cruising. Instead, they conceded three times in 11 minutes either side of half-time as Sporting eventually ran out 4-1 winners. \"I would like to play the game like we played in Lisbon on Sunday, believe me,\" said Guardiola, who is facing the prospect of only having three fit defenders for the derby as Nathan Ake and Manuel Akanji try to overcome injury concerns. If there is solace for City, it comes from the knowledge United are not exactly flying. Their comeback Europa League victory against Viktoria Plzen on Thursday was their third win of Amorim's short reign so far but only one of those successes has come in the Premier League, where United have lost their past two games against Arsenal and Nottingham Forest. Nevertheless, Guardiola can see improvements already on the red side of the city. \"It's already there,\" he said. \"You see all the patterns, the movements, the runners and the pace. He will do a good job at United, I'm pretty sure of that.\"\n", "\n", "Guardiola says skipper Kyle Walker has been offered support by the club after the City defender highlighted the racial abuse he had received on social media in the wake of the Juventus trip. \"It's unacceptable,\" he said. \"Not because it's Kyle - for any human being. \"Unfortunately it happens many times in the real world. It is not necessary to say he has the support of the entire club. It is completely unacceptable and we give our support to him.\"\n", "--------------------------------------------------------------------------------\n", - "Score: 0.6338, Text: 'We have to find a way' - Guardiola vows to end relegation form\n", - "\n", - "This video can not be played To play this video you need to enable JavaScript in your browser. 'Worrying' and 'staggering' - Why do Manchester City keep conceding?\n", - "\n", - "Manchester City are currently in relegation form and there is little sign of it ending. Saturday's 2-1 defeat at Aston Villa left them joint bottom of the form table over the past eight games with just Southampton for company. Saints, at the foot of the Premier League, have the same number of points, four, as City over their past eight matches having won one, drawn one and lost six - the same record as the floundering champions. And if Southampton - who appointed Ivan Juric as their new manager on Saturday - get at least a point at Fulham on Sunday, City will be on the worst run in the division. Even Wolves, who sacked boss Gary O'Neil last Sunday and replaced him with Vitor Pereira, have earned double the number of points during the same period having played a game fewer. They are damning statistics for Pep Guardiola, even if he does have some mitigating circumstances with injuries to Ederson, Nathan Ake and Ruben Dias - who all missed the loss at Villa Park - and the long-term loss of midfield powerhouse Rodri. Guardiola was happy with Saturday's performance, despite defeat in Birmingham, but there is little solace to take at slipping further out of the title race. He may have needed to field a half-fit Manuel Akanji and John Stones at Villa Park but that does not account for City looking a shadow of their former selves. That does not justify the error Josko Gvardiol made to gift Jhon Duran a golden chance inside the first 20 seconds, or £100m man Jack Grealish again failing to have an impact on a game. There may be legitimate reasons for City's drop off, whether that be injuries, mental fatigue or just simply a team coming to the end of its lifecycle, but their form, which has plunged off a cliff edge, would have been unthinkable as they strolled to a fourth straight title last season. \"The worrying thing is the number of goals conceded,\" said ex-England captain Alan Shearer on BBC Match of the Day. \"The number of times they were opened up because of the lack of protection and legs in midfield was staggering. There are so many things that are wrong at this moment in time.\"\n", - "\n", - "This video can not be played To play this video you need to enable JavaScript in your browser. Man City 'have to find a way' to return to form - Guardiola\n", - "\n", - "Afterwards Guardiola was calm, so much so it was difficult to hear him in the news conference, a contrast to the frustrated figure he cut on the touchline. He said: \"It depends on us. The solution is bring the players back. We have just one central defender fit, that is difficult. We are going to try next game - another opportunity and we don't think much further than that. \"Of course there are more reasons. We concede the goals we don't concede in the past, we [don't] score the goals we score in the past. Football is not just one reason. There are a lot of little factors. \"Last season we won the Premier League, but we came here and lost. We have to think positive and I have incredible trust in the guys. Some of them have incredible pride and desire to do it. We have to find a way, step by step, sooner or later to find a way back.\" Villa boss Unai Emery highlighted City's frailties, saying he felt Villa could seize on the visitors' lack of belief. \"Manchester City are a little bit under the confidence they have normally,\" he said. \"The second half was different, we dominated and we scored. Through those circumstances they were feeling worse than even in the first half.\"\n", - "\n", - "Erling Haaland had one touch in the Villa box\n", - "\n", - "There are chinks in the armour never seen before at City under Guardiola and Erling Haaland conceded belief within the squad is low. He told TNT after the game: \"Of course, [confidence levels are] not the best. We know how important confidence is and you can see that it affects every human being. That is how it is, we have to continue and stay positive even though it is difficult.\" Haaland, with 76 goals in 83 Premier League appearances since joining City from Borussia Dortmund in 2022, had one shot and one touch in the Villa box. His 18 touches in the whole game were the lowest of all starting players and he has been self critical, despite scoring 13 goals in the top flight this season. Over City's last eight games he has netted just twice though, but Guardiola refused to criticise his star striker. He said: \"Without him we will be even worse but I like the players feeling that way. I don't agree with Erling. He needs to have the balls delivered in the right spots but he will fight for the next one.\"\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6276, Text: 'I am not good enough' - Guardiola faces daunting and major rebuild\n", - "\n", - "This video can not be played To play this video you need to enable JavaScript in your browser. 'I am not good enough' - Guardiola says he must find a 'solution' after derby loss\n", - "\n", - "Pep Guardiola says his sleep has suffered during Manchester City's deepening crisis, so he will not be helped by a nightmarish conclusion to one of the most stunning defeats of his long reign. Guardiola looked agitated, animated and on edge even after City led the Manchester derby through Josko Gvardiol's 36th-minute header, his reaction to the goal one of almost disdain that it came via a deflected cross as opposed to in his purist style. He sat alone with his eyes closed sipping from a water bottle before the resumption of the second half, then was denied even the respite of victory when Manchester United gave this largely dismal derby a dramatic conclusion it barely deserved with a remarkable late comeback. First, with 88 minutes on the clock, Matheus Nunes presented Amad Diallo with the ball before compounding his error by flattening the forward as he made an attempt to recover his mistake. Bruno Fernandes completed the formalities from the penalty spot. Worse was to come two minutes later when Lisandro Martinez's routine long ball caught City's defence inexplicably statuesque. Goalkeeper Ederson's positioning was awry, allowing the lively Diallo to pounce from an acute angle to leave Guardiola and his players stunned. It was the latest into any game, 88 minutes, that reigning Premier League champions had led then lost. It was also the first time City had lost a game they were leading so late on. And in a sign of City's previous excellence that is now being challenged, they have only lost four of 105 Premier League home games under Guardiola in which they have been ahead at half-time, winning 94 and drawing seven. Guardiola delivered a brutal self-analysis as he told Match of the Day: \"I am not good enough. I am the boss. I am the manager. I have to find solutions and so far I haven't. That's the reality. \"Not much else to say. No defence. Manchester United were incredibly persistent. We have not lost eight games in two seasons. We can't defend that.\"\n", - "\n", - "Manchester City manager Pep Guardiola in despair during the derby defeat to Manchester United\n", - "\n", - "Guardiola suggested the serious renewal will wait until the summer but the red flags have been appearing for weeks in the sudden and shocking decline of a team that has lost the aura of invincibility that left many opponents beaten before kick-off in previous years. He has had stated City must \"survive\" this season - whatever qualifies as survival for a club of such rich ambition - but the quest for a record fifth successive Premier League title is surely over as they lie nine points behind leaders Liverpool having played a game more. Their Champions League aspirations are also in jeopardy after another loss, this time against Juventus in Turin. City's squad has been allowed to grow too old together. The insatiable thirst for success seems to have gone, the scales of superiority have fallen away and opponents now sense vulnerability right until the final whistle, as United did here. The manner in which United were able, and felt able, to snatch this victory drove right to the heart of how City, and Guardiola, are allowing opponents to prey on their downfall. Guardiola has every reason to cite injuries, most significantly to Rodri and also John Stones as well as others, but this cannot be used an excuse for such a dramatic decline in standards, allied to the appearance of a soft underbelly that is so easily exploited. And City's rebuild will not be a quick fix. With every performance, every defeat, the scale of what lies in front of Guardiola becomes more obvious - and daunting. Manchester City's fans did their best to reassure Guardiola of their faith in him with a giant Barcelona-inspired banner draped from the stands before kick-off emblazoned with his image reading \"Més que un entrenador\" - \"More Than A Coach\". And Guardiola will now need to be more than a coach than at any time in his career. He will have the finances but it will be done with City's challengers also strengthening. Kevin de Bruyne, 34 in June, lasted 68 minutes here before he was substituted. Age and injuries are catching up with one of the greatest players of the Premier League era and he is unlikely to be at City next season. Mateo Kovacic, who replaced De Bruyne, is also 31 in May. Kyle Walker, 34, is being increasingly exposed. His most notable contribution here was an embarrassing collapse to the ground after the mildest head-to-head collision with Rasmus Hojlund. Ilkay Gundogan, another 34-year-old and a previous pillar of Guardiola's great successes, no longer has the legs or energy to exert influence. This looks increasingly like a season too far following his return from Barcelona. Flaws are also being exposed elsewhere, with previously reliable performers failing to hit previous standards. Phil Foden scored 27 goals and had 12 assists when he was Premier League Player of the Season last term. This year he has just three goals and two assists in 18 appearances in all competitions. He has no goals and just one assist in 11 Premier League games. Jack Grealish, who came on after 77 minutes against United, has not scored in a year for Manchester City, his last goal coming in a 2-2 draw against Crystal Palace on 16 December last year. He has, in the meantime, scored twice for England. Erling Haaland is also struggling as City lack creativity and cutting edge. He has three goals in his past 11 Premier League games after scoring 10 in his first five. And in another indication of City's impotence, and their reliance on Haaland, defender Gvardiol's goal against United was his fourth this season, making him their second highest scorer in all competitions behind the Norwegian striker, who has 18. Goalkeeper Ederson, so reliable for so long, has already been dropped once this season and did not cover himself in glory for United's winner. Guardiola, with that freshly signed two-year contract, insists he \"wants it\" as he treads on this alien territory of failure. He will be under no illusions about the size of the job in front of him as he placed his head in his hands in anguish after yet another damaging and deeply revealing defeat. City and Guardiola are in new, unforgiving territory.\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6076, Text: Pep Guardiola has said Manchester City will be his final managerial job in club football before he \"maybe\" coaches a national team.\n", + "Score: 0.6900, Text: Pep Guardiola has said Manchester City will be his final managerial job in club football before he \"maybe\" coaches a national team.\n", "\n", "The former Barcelona and Bayern Munich boss has won 15 major trophies since taking charge of City in 2016.\n", "\n", @@ -908,7 +891,17 @@ "\n", "\"We have to win the game and not look at what happens in the next one yet.\"\n", "--------------------------------------------------------------------------------\n", - "Score: 0.5761, Text: 'Self-doubt, errors & big changes' - inside the crisis at Man City\n", + "Score: 0.6689, Text: 'I am not good enough' - Guardiola faces daunting and major rebuild\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. 'I am not good enough' - Guardiola says he must find a 'solution' after derby loss\n", + "\n", + "Pep Guardiola says his sleep has suffered during Manchester City's deepening crisis, so he will not be helped by a nightmarish conclusion to one of the most stunning defeats of his long reign. Guardiola looked agitated, animated and on edge even after City led the Manchester derby through Josko Gvardiol's 36th-minute header, his reaction to the goal one of almost disdain that it came via a deflected cross as opposed to in his purist style. He sat alone with his eyes closed sipping from a water bottle before the resumption of the second half, then was denied even the respite of victory when Manchester United gave this largely dismal derby a dramatic conclusion it barely deserved with a remarkable late comeback. First, with 88 minutes on the clock, Matheus Nunes presented Amad Diallo with the ball before compounding his error by flattening the forward as he made an attempt to recover his mistake. Bruno Fernandes completed the formalities from the penalty spot. Worse was to come two minutes later when Lisandro Martinez's routine long ball caught City's defence inexplicably statuesque. Goalkeeper Ederson's positioning was awry, allowing the lively Diallo to pounce from an acute angle to leave Guardiola and his players stunned. It was the latest into any game, 88 minutes, that reigning Premier League champions had led then lost. It was also the first time City had lost a game they were leading so late on. And in a sign of City's previous excellence that is now being challenged, they have only lost four of 105 Premier League home games under Guardiola in which they have been ahead at half-time, winning 94 and drawing seven. Guardiola delivered a brutal self-analysis as he told Match of the Day: \"I am not good enough. I am the boss. I am the manager. I have to find solutions and so far I haven't. That's the reality. \"Not much else to say. No defence. Manchester United were incredibly persistent. We have not lost eight games in two seasons. We can't defend that.\"\n", + "\n", + "Manchester City manager Pep Guardiola in despair during the derby defeat to Manchester United\n", + "\n", + "Guardiola suggested the serious renewal will wait until the summer but the red flags have been appearing for weeks in the sudden and shocking decline of a team that has lost the aura of invincibility that left many opponents beaten before kick-off in previous years. He has had stated City must \"survive\" this season - whatever qualifies as survival for a club of such rich ambition - but the quest for a record fifth successive Premier League title is surely over as they lie nine points behind leaders Liverpool having played a game more. Their Champions League aspirations are also in jeopardy after another loss, this time against Juventus in Turin. City's squad has been allowed to grow too old together. The insatiable thirst for success seems to have gone, the scales of superiority have fallen away and opponents now sense vulnerability right until the final whistle, as United did here. The manner in which United were able, and felt able, to snatch this victory drove right to the heart of how City, and Guardiola, are allowing opponents to prey on their downfall. Guardiola has every reason to cite injuries, most significantly to Rodri and also John Stones as well as others, but this cannot be used an excuse for such a dramatic decline in standards, allied to the appearance of a soft underbelly that is so easily exploited. And City's rebuild will not be a quick fix. With every performance, every defeat, the scale of what lies in front of Guardiola becomes more obvious - and daunting. Manchester City's fans did their best to reassure Guardiola of their faith in him with a giant Barcelona-inspired banner draped from the stands before kick-off emblazoned with his image reading \"Més que un entrenador\" - \"More Than A Coach\". And Guardiola will now need to be more than a coach than at any time in his career. He will have the finances but it will be done with City's challengers also strengthening. Kevin de Bruyne, 34 in June, lasted 68 minutes here before he was substituted. Age and injuries are catching up with one of the greatest players of the Premier League era and he is unlikely to be at City next season. Mateo Kovacic, who replaced De Bruyne, is also 31 in May. Kyle Walker, 34, is being increasingly exposed. His most notable contribution here was an embarrassing collapse to the ground after the mildest head-to-head collision with Rasmus Hojlund. Ilkay Gundogan, another 34-year-old and a previous pillar of Guardiola's great successes, no longer has the legs or energy to exert influence. This looks increasingly like a season too far following his return from Barcelona. Flaws are also being exposed elsewhere, with previously reliable performers failing to hit previous standards. Phil Foden scored 27 goals and had 12 assists when he was Premier League Player of the Season last term. This year he has just three goals and two assists in 18 appearances in all competitions. He has no goals and just one assist in 11 Premier League games. Jack Grealish, who came on after 77 minutes against United, has not scored in a year for Manchester City, his last goal coming in a 2-2 draw against Crystal Palace on 16 December last year. He has, in the meantime, scored twice for England. Erling Haaland is also struggling as City lack creativity and cutting edge. He has three goals in his past 11 Premier League games after scoring 10 in his first five. And in another indication of City's impotence, and their reliance on Haaland, defender Gvardiol's goal against United was his fourth this season, making him their second highest scorer in all competitions behind the Norwegian striker, who has 18. Goalkeeper Ederson, so reliable for so long, has already been dropped once this season and did not cover himself in glory for United's winner. Guardiola, with that freshly signed two-year contract, insists he \"wants it\" as he treads on this alien territory of failure. He will be under no illusions about the size of the job in front of him as he placed his head in his hands in anguish after yet another damaging and deeply revealing defeat. City and Guardiola are in new, unforgiving territory.\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.6526, Text: 'Self-doubt, errors & big changes' - inside the crisis at Man City\n", "\n", "Pep Guardiola has not been through a moment like this in his managerial career. Manchester City have lost nine matches in their past 12 - as many defeats as they had suffered in their previous 106 fixtures. At the end of October, City were still unbeaten at the top of the Premier League and favourites to win a fifth successive title. Now they are seventh, 12 points behind leaders Liverpool having played a game more. It has been an incredible fall from grace and left people trying to work out what has happened - and whether Guardiola can make it right. After discussing the situation with those who know him best, I have taken a closer look at the future - both short and long term - and how the current crisis at Man City is going to be solved.\n", "\n", @@ -918,7 +911,29 @@ "\n", "Guardiola has never protected his players so much. He has not criticised them and is not going to do so. They have won everything with him. Instead of doing more with them, he has tried doing less. He has sometimes given them more days off to clear their heads, so they can reset - two days this week for instance. Perhaps the time to change a team is when you are winning, but no-one was suggesting Man City were about to collapse when they were top and unbeaten after nine league games. Some people have asked how bad it has to get before City make a decision on Guardiola. The answer is that there is no decision to be made. Maybe if this was Real Madrid, Barcelona or Juventus, the pressure from outside would be massive and the argument would be made that Guardiola has to go. At City he has won the lot, so how can anyone say he is failing? Yes, this is a crisis. But given all their problems, City's renewed target is finishing in the top four. That is what is in all their heads now. The idea is to recover their essence by improving defensive concepts that are not there and re-establishing the intensity they are known for. Guardiola is planning to use the next two years of his contract, which is expected to be his last as a club manager, to prepare a new Manchester City. When he was at the end of his four years at Barcelona, he asked two managers what to do when you feel people are not responding to your instructions. Do you go or do the players go? Sir Alex Ferguson and Rafael Benitez both told him that the players need to go. Guardiola did not listen because of his emotional attachment to his players back then and he decided to leave the Camp Nou because he felt the cycle was over. He will still protect his players now but there is not the same emotional attachment - so it is the players who are going to leave this time. It is likely City will look to replace five or six regular starters. Guardiola knows it is the end of an era and the start of a new one. Changes will not be immediate and the majority of the work will be done in the summer. But they are open to any opportunities in January - and a holding midfielder is one thing they need. In the summer City might want to get Spain's Martin Zubimendi from Real Sociedad and they know 60m euros (£50m) will get him. He said no to Liverpool last summer even though everything was agreed, but he now wants to move on and the Premier League is the target. Even if they do not get Zubimendi, that is the calibre of footballer they are after. A new Manchester City is on its way - with changes driven by Guardiola, incoming sporting director Hugo Viana and the football department.\n", "--------------------------------------------------------------------------------\n", - "Score: 0.5627, Text: Man City might miss out on Champions League - Guardiola\n", + "Score: 0.6335, Text: Man City's Dias ruled out for 'three or four weeks'\n", + "\n", + "Ruben Dias has won 10 major trophies during his time at Manchester City\n", + "\n", + "Manchester City have suffered a fresh injury blow with manager Pep Guardiola confirming Portugal central defender Ruben Dias has been ruled out for \"three or four weeks\" with a muscle injury. Dias, who suffered the injury in Saturday's 2-1 defeat by Manchester United, will miss the entire festive programme and potentially the FA Cup third-round tie with Salford on 11 January. The 27-year-old also faces a battle to be fit for City's crucial Champions League trip to Paris St-Germain on 22 January. Dias has already missed seven games with a calf injury this season, adding to a defensive injury list that has seen John Stones, Nathan Ake, Manuel Akanji and Kyle Walker all ruled out at various points, while Ballon d'Or winner Rodri will miss the remainder of the domestic season after suffering a cruciate knee ligament injury. \"It's a muscular problem and he will be out for three to four weeks,\" said Guardiola. \"After 75 minutes against United he felt something. But he's so strong and wanted to stay on the pitch. Now he's injured.\" Guardiola confirmed Stones, Akanji and midfielder Mateo Kovacic have all trained this week and could feature at Aston Villa on Saturday (12:30 GMT), but said goalkeeper Ederson was \"a doubt\" with an unspecified problem. \"Ederson has been struggling with some niggles in his leg, he doesn't feel completely fine,\" said Guardiola. \"Ederson is so important for us.\" Amid City's current run of one win in 11 games, surprise has been expressed about Guardiola's use of youngsters James McAtee and Nico O'Reilly. City made a point of keeping both players despite numerous loan options. Yet McAtee has made just two substitute appearances - coming on in the last minute on both occasions - while O'Reilly is yet to make his league debut. But it seems they will stay at the club for the second half of the season, with Guardiola replying \"I don't think so\" when asked if players might leave during the January transfer window. The Spaniard said he is \"not a big fan\" of buying players in January but it is \"possible\" City will look to sign someone because \"the circumstances of this season have been special\".\n", + "\n", + "Guardiola's mood was so downbeat in the immediate aftermath of the United defeat it was easy to imagine he might conclude he was no longer capable of doing the job. He gave his players a couple of days off afterwards and was brighter when he spoke to journalists in his scheduled briefing before the Villa trip. \"We'd just finished a game that we lost in the circumstances and I was not happy,\" he said. \"I try to be honest about the feelings of my teams. We fell down six times [number of Premier League games without a win], we have to stand up seven. There is no alternative. \"I'm fine. I'm a normal person with feelings like all of us. When the situation is going well we are better but it's normal. I would not go to the press conference if we were 1-0 up and expressing something that I didn't feel.\" Former Villa forward Jack Grealish has not scored for City in over a year but Guardiola pointed out he is not the only attacking player struggling this season. He added: \"We are struggling to create a little bit up front, but always I am optimistic about my players that they are going to turn and perform well.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.6182, Text: 'We have to find a way' - Guardiola vows to end relegation form\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. 'Worrying' and 'staggering' - Why do Manchester City keep conceding?\n", + "\n", + "Manchester City are currently in relegation form and there is little sign of it ending. Saturday's 2-1 defeat at Aston Villa left them joint bottom of the form table over the past eight games with just Southampton for company. Saints, at the foot of the Premier League, have the same number of points, four, as City over their past eight matches having won one, drawn one and lost six - the same record as the floundering champions. And if Southampton - who appointed Ivan Juric as their new manager on Saturday - get at least a point at Fulham on Sunday, City will be on the worst run in the division. Even Wolves, who sacked boss Gary O'Neil last Sunday and replaced him with Vitor Pereira, have earned double the number of points during the same period having played a game fewer. They are damning statistics for Pep Guardiola, even if he does have some mitigating circumstances with injuries to Ederson, Nathan Ake and Ruben Dias - who all missed the loss at Villa Park - and the long-term loss of midfield powerhouse Rodri. Guardiola was happy with Saturday's performance, despite defeat in Birmingham, but there is little solace to take at slipping further out of the title race. He may have needed to field a half-fit Manuel Akanji and John Stones at Villa Park but that does not account for City looking a shadow of their former selves. That does not justify the error Josko Gvardiol made to gift Jhon Duran a golden chance inside the first 20 seconds, or £100m man Jack Grealish again failing to have an impact on a game. There may be legitimate reasons for City's drop off, whether that be injuries, mental fatigue or just simply a team coming to the end of its lifecycle, but their form, which has plunged off a cliff edge, would have been unthinkable as they strolled to a fourth straight title last season. \"The worrying thing is the number of goals conceded,\" said ex-England captain Alan Shearer on BBC Match of the Day. \"The number of times they were opened up because of the lack of protection and legs in midfield was staggering. There are so many things that are wrong at this moment in time.\"\n", + "\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. Man City 'have to find a way' to return to form - Guardiola\n", + "\n", + "Afterwards Guardiola was calm, so much so it was difficult to hear him in the news conference, a contrast to the frustrated figure he cut on the touchline. He said: \"It depends on us. The solution is bring the players back. We have just one central defender fit, that is difficult. We are going to try next game - another opportunity and we don't think much further than that. \"Of course there are more reasons. We concede the goals we don't concede in the past, we [don't] score the goals we score in the past. Football is not just one reason. There are a lot of little factors. \"Last season we won the Premier League, but we came here and lost. We have to think positive and I have incredible trust in the guys. Some of them have incredible pride and desire to do it. We have to find a way, step by step, sooner or later to find a way back.\" Villa boss Unai Emery highlighted City's frailties, saying he felt Villa could seize on the visitors' lack of belief. \"Manchester City are a little bit under the confidence they have normally,\" he said. \"The second half was different, we dominated and we scored. Through those circumstances they were feeling worse than even in the first half.\"\n", + "\n", + "Erling Haaland had one touch in the Villa box\n", + "\n", + "There are chinks in the armour never seen before at City under Guardiola and Erling Haaland conceded belief within the squad is low. He told TNT after the game: \"Of course, [confidence levels are] not the best. We know how important confidence is and you can see that it affects every human being. That is how it is, we have to continue and stay positive even though it is difficult.\" Haaland, with 76 goals in 83 Premier League appearances since joining City from Borussia Dortmund in 2022, had one shot and one touch in the Villa box. His 18 touches in the whole game were the lowest of all starting players and he has been self critical, despite scoring 13 goals in the top flight this season. Over City's last eight games he has netted just twice though, but Guardiola refused to criticise his star striker. He said: \"Without him we will be even worse but I like the players feeling that way. I don't agree with Erling. He needs to have the balls delivered in the right spots but he will fight for the next one.\"\n", + "--------------------------------------------------------------------------------\n", + "Score: 0.5842, Text: Man City might miss out on Champions League - Guardiola\n", "\n", "Erling Haaland was part of the Manchester City side that won the Champions League for the first time in 2023\n", "\n", @@ -926,15 +941,19 @@ "\n", "Guardiola's side host Everton on Boxing Day, before a trip to Leicester on 29 December and a home match against West Ham on 4 January. Given all three opponents are in the bottom seven, it offers City a chance to improve on an appalling recent record of four points from eight games, which Guardiola acknowledges has left their lofty European ambitions in doubt. \"The one team that has been in the Champions League for the past years has been Manchester City,\" he added. \"Now we are at risk, of course we are. Definitely.\" Arsenal, Chelsea, Liverpool and Manchester United finished in the Premier League's top four from the 2005-06 season to 2008-09. At least three of them also occupied the top four spots for 15 successive campaigns until 2012. But United have spent five out of the past 11 seasons outside the Champions League. Arsenal spent six seasons out of the competition before returning last term. Liverpool missed out all but one year in seven from 2010, while Chelsea are in their second successive campaign outside Europe's elite. This term the threat to City comes from unexpected sources. As well as Forest and Bournemouth, Aston Villa are ahead of City, while Newcastle, Fulham and Brighton are also within a couple of points. \"There are a lot of contenders,\" said Guardiola, whose side have lost nine of their last 12 games in all competitions. \"For every club it is so important and if we are not winning games, we will be out. \"If we don't qualify it is because we don't deserve it, because we were not prepared and because we had a lot of problems and didn't solve them.\"\n", "--------------------------------------------------------------------------------\n", - "Score: 0.5504, Text: Man City's Dias ruled out for 'three or four weeks'\n", + "Score: 0.5295, Text: 'So happy he is back' - 'integral' De Bruyne 'one of best we've seen'\n", "\n", - "Ruben Dias has won 10 major trophies during his time at Manchester City\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. Match of the Day: How Kevin de Bruyne inspired Man City back to winning ways\n", "\n", - "Manchester City have suffered a fresh injury blow with manager Pep Guardiola confirming Portugal central defender Ruben Dias has been ruled out for \"three or four weeks\" with a muscle injury. Dias, who suffered the injury in Saturday's 2-1 defeat by Manchester United, will miss the entire festive programme and potentially the FA Cup third-round tie with Salford on 11 January. The 27-year-old also faces a battle to be fit for City's crucial Champions League trip to Paris St-Germain on 22 January. Dias has already missed seven games with a calf injury this season, adding to a defensive injury list that has seen John Stones, Nathan Ake, Manuel Akanji and Kyle Walker all ruled out at various points, while Ballon d'Or winner Rodri will miss the remainder of the domestic season after suffering a cruciate knee ligament injury. \"It's a muscular problem and he will be out for three to four weeks,\" said Guardiola. \"After 75 minutes against United he felt something. But he's so strong and wanted to stay on the pitch. Now he's injured.\" Guardiola confirmed Stones, Akanji and midfielder Mateo Kovacic have all trained this week and could feature at Aston Villa on Saturday (12:30 GMT), but said goalkeeper Ederson was \"a doubt\" with an unspecified problem. \"Ederson has been struggling with some niggles in his leg, he doesn't feel completely fine,\" said Guardiola. \"Ederson is so important for us.\" Amid City's current run of one win in 11 games, surprise has been expressed about Guardiola's use of youngsters James McAtee and Nico O'Reilly. City made a point of keeping both players despite numerous loan options. Yet McAtee has made just two substitute appearances - coming on in the last minute on both occasions - while O'Reilly is yet to make his league debut. But it seems they will stay at the club for the second half of the season, with Guardiola replying \"I don't think so\" when asked if players might leave during the January transfer window. The Spaniard said he is \"not a big fan\" of buying players in January but it is \"possible\" City will look to sign someone because \"the circumstances of this season have been special\".\n", + "As Kevin de Bruyne made his way off the pitch after being replaced by Rico Lewis 16 minutes from the end of Manchester City's 3-0 win over Nottingham Forest, Pep Guardiola grabbed the Belgian and pulled him into an embrace. It was like a father offering affection to his son at a job well done. De Bruyne responded with a smile of satisfaction before continuing on his way to the top of the small terrace of City benches. A day earlier, Guardiola scoffed at the suggestions of Sky Sports duo Jamie Carragher and Gary Neville that there was some kind of rift between the boss and his star man. Why on earth, Guardiola countered, would he leave out someone who is capable of delivering moments like no-one else, even in his star-studded squad? Finally back on the pitch to start a Premier League game for the first time since August, De Bruyne proved exactly what Guardiola meant.\n", "\n", - "Guardiola's mood was so downbeat in the immediate aftermath of the United defeat it was easy to imagine he might conclude he was no longer capable of doing the job. He gave his players a couple of days off afterwards and was brighter when he spoke to journalists in his scheduled briefing before the Villa trip. \"We'd just finished a game that we lost in the circumstances and I was not happy,\" he said. \"I try to be honest about the feelings of my teams. We fell down six times [number of Premier League games without a win], we have to stand up seven. There is no alternative. \"I'm fine. I'm a normal person with feelings like all of us. When the situation is going well we are better but it's normal. I would not go to the press conference if we were 1-0 up and expressing something that I didn't feel.\" Former Villa forward Jack Grealish has not scored for City in over a year but Guardiola pointed out he is not the only attacking player struggling this season. He added: \"We are struggling to create a little bit up front, but always I am optimistic about my players that they are going to turn and perform well.\"\n", + "Manchester City's Kevin de Bruyne has now scored two goals this season\n", + "\n", + "On a night Guardiola conceded City had to win, it was the Belgian's firm header that created an eighth-minute opener for Bernardo Silva as they went on to finally end their seven-match winless run and close the gap to leaders Liverpool to nine points. The goal from De Bruyne that followed was a thing of beauty as he backed away from Jeremy Doku as his fellow countryman ran with the ball, arriving in enough space to take the short pass and send his shot into the corner. Afforded extra space by his manager's decision to play Jack Grealish alongside him in a central position, De Bruyne schemed in the way he usually does. As chances came and went, he was playing some significant role. De Bruyne lasted 74 minutes, his longest match time since completing the full 90 minutes against Brentford on 14 September. He was on the bench when he was announced as man of the match, a decision received with enthusiasm by the City support. \"I am so happy he is back,\" said Guardiola. \"He played 75 fantastic minutes. \"He deserves the best because he's a lovely guy and has been massively important for so many years since he arrived.\"\n", + "\n", + "City are now unbeaten in their past 31 Premier League games with De Bruyne starting. He has been involved in 25 goals (nine goals, 16 assists) in those games. De Bruyne said: \"There have never been issues between me and Pep. He knows I've been struggling. It's painful and uncomfortable. \"Hopefully I can get back to my body with not much pain and then I'll be fine.\" However, a bit like the victory itself - tarnished by an injury to Manuel Akanji that may rule the Switzerland defender out of the weekend trip to Crystal Palace, and a hamstring problem for Nathan Ake who has already missed five weeks with a similar injury this season that Guardiola said \"doesn’t look good\" and makes him feel \"sad\" for the Dutchman - there was a caveat as the City boss assessed De Bruyne's contribution. \"He fought a lot and he prepared himself,\" he said. \"He is back to his physicality. The minutes he played at Anfield were really good. \"Last season he was out for many months, this season as well. We will see how he recovers after a long time injured and how he feels in three days.\" De Bruyne recorded four or more shots and created four or more chances for the third time in a Premier League game this season. Despite only starting five Premier League games, only Arsenal's Bukayo Saka has done so more often this term. Guardiola's fear must be that if he pushes De Bruyne too far too quickly, his body will let him down. Former Man City defender Micah Richards told BBC Match of the Day: \"He is a top-quality player and one of the best we have seen. He always manages to find space on the pitch.\" \"He has been integral to Man City's success over a number of years,\" added former City boss Stuart Pearce on Amazon Prime. \"He is the go-to player that sets Erling Haaland alight with his passing. He creates goals, he scores goals. \"If you were to pick one player out over the last eight or nine years De Bruyne would be at the top of almost everyone's list.\" It is a delicate balance given his team are still nine points adrift of Premier League leaders Liverpool and are also outside the Champions League top eight before next week's trip to Italy and a meeting with Juventus, after which City will only have January first-phase games remaining to ensure they secure qualification for the last-16 without needing to be bothered by February's play-off round. However, as with his team, De Bruyne's recovery had to start somewhere.\n", "--------------------------------------------------------------------------------\n", - "Score: 0.5146, Text: 'Life is not easy' - Haaland penalty miss sums up Man City crisis\n", + "Score: 0.5169, Text: 'Life is not easy' - Haaland penalty miss sums up Man City crisis\n", "\n", "Manchester City striker Erling Haaland has now missed two of his 17 penalties taken in the Premier League\n", "\n", @@ -950,23 +969,17 @@ "\n", "This video can not be played To play this video you need to enable JavaScript in your browser. Dyche 'very pleased' with Everton defence in Man City draw\n", "--------------------------------------------------------------------------------\n", - "Score: 0.4713, Text: 'So happy he is back' - 'integral' De Bruyne 'one of best we've seen'\n", - "\n", - "This video can not be played To play this video you need to enable JavaScript in your browser. Match of the Day: How Kevin de Bruyne inspired Man City back to winning ways\n", - "\n", - "As Kevin de Bruyne made his way off the pitch after being replaced by Rico Lewis 16 minutes from the end of Manchester City's 3-0 win over Nottingham Forest, Pep Guardiola grabbed the Belgian and pulled him into an embrace. It was like a father offering affection to his son at a job well done. De Bruyne responded with a smile of satisfaction before continuing on his way to the top of the small terrace of City benches. A day earlier, Guardiola scoffed at the suggestions of Sky Sports duo Jamie Carragher and Gary Neville that there was some kind of rift between the boss and his star man. Why on earth, Guardiola countered, would he leave out someone who is capable of delivering moments like no-one else, even in his star-studded squad? Finally back on the pitch to start a Premier League game for the first time since August, De Bruyne proved exactly what Guardiola meant.\n", + "Score: 0.4618, Text: Amorim knows job in 'danger' without victories\n", "\n", - "Manchester City's Kevin de Bruyne has now scored two goals this season\n", + "This video can not be played To play this video you need to enable JavaScript in your browser. 'I know that every manager is in danger'\n", "\n", - "On a night Guardiola conceded City had to win, it was the Belgian's firm header that created an eighth-minute opener for Bernardo Silva as they went on to finally end their seven-match winless run and close the gap to leaders Liverpool to nine points. The goal from De Bruyne that followed was a thing of beauty as he backed away from Jeremy Doku as his fellow countryman ran with the ball, arriving in enough space to take the short pass and send his shot into the corner. Afforded extra space by his manager's decision to play Jack Grealish alongside him in a central position, De Bruyne schemed in the way he usually does. As chances came and went, he was playing some significant role. De Bruyne lasted 74 minutes, his longest match time since completing the full 90 minutes against Brentford on 14 September. He was on the bench when he was announced as man of the match, a decision received with enthusiasm by the City support. \"I am so happy he is back,\" said Guardiola. \"He played 75 fantastic minutes. \"He deserves the best because he's a lovely guy and has been massively important for so many years since he arrived.\"\n", + "Manchester United head coach Ruben Amorim says the vast expense of bringing him in will not shield him from the sack if he fails to produce a winning team. While United sources stress there is total support for the new boss inside Old Trafford, recent results and performances have made some fans nervous. Away supporters booed their team at the final whistle of the 2-0 defeat by Wolves at Molineux on Boxing Day, and with many exiting quickly it left the players to acknowledge hundreds of empty yellow seats before heading to the tunnel. \"The manager of Manchester United can never, no matter what, be comfortable,\" said Amorim. \"You can argue I have been here one month and I've had four training [sessions], but we are not winning. That is the reality.\" Amorim has collected seven points from seven Premier League games since taking charge last month - only one more point than fellow Portuguese Vitor Pereira, who has won both his games since becoming Wolves boss. Five defeats in Amorim's first 10 games is the worst record of any new United manager since Walter Crickmer, who stepped up from being club secretary in the 1930s. It is not what was anticipated when chief executive Omar Berrada flew to Lisbon to offer Amorim the job in the wake of Erik ten Hag's dismissal on 28 October. United were so convinced in Amorim, they paid Sporting £10.6m in compensation to get him out of his contract. But Amorim does not believe that will save him if results do not improve. \"I know that if we don't win, regardless if they pay the buyout or not, every manager is in danger,\" he said. \"I like that because that is the job.\"\n", "\n", - "City are now unbeaten in their past 31 Premier League games with De Bruyne starting. He has been involved in 25 goals (nine goals, 16 assists) in those games. De Bruyne said: \"There have never been issues between me and Pep. He knows I've been struggling. It's painful and uncomfortable. \"Hopefully I can get back to my body with not much pain and then I'll be fine.\" However, a bit like the victory itself - tarnished by an injury to Manuel Akanji that may rule the Switzerland defender out of the weekend trip to Crystal Palace, and a hamstring problem for Nathan Ake who has already missed five weeks with a similar injury this season that Guardiola said \"doesn’t look good\" and makes him feel \"sad\" for the Dutchman - there was a caveat as the City boss assessed De Bruyne's contribution. \"He fought a lot and he prepared himself,\" he said. \"He is back to his physicality. The minutes he played at Anfield were really good. \"Last season he was out for many months, this season as well. We will see how he recovers after a long time injured and how he feels in three days.\" De Bruyne recorded four or more shots and created four or more chances for the third time in a Premier League game this season. Despite only starting five Premier League games, only Arsenal's Bukayo Saka has done so more often this term. Guardiola's fear must be that if he pushes De Bruyne too far too quickly, his body will let him down. Former Man City defender Micah Richards told BBC Match of the Day: \"He is a top-quality player and one of the best we have seen. He always manages to find space on the pitch.\" \"He has been integral to Man City's success over a number of years,\" added former City boss Stuart Pearce on Amazon Prime. \"He is the go-to player that sets Erling Haaland alight with his passing. He creates goals, he scores goals. \"If you were to pick one player out over the last eight or nine years De Bruyne would be at the top of almost everyone's list.\" It is a delicate balance given his team are still nine points adrift of Premier League leaders Liverpool and are also outside the Champions League top eight before next week's trip to Italy and a meeting with Juventus, after which City will only have January first-phase games remaining to ensure they secure qualification for the last-16 without needing to be bothered by February's play-off round. However, as with his team, De Bruyne's recovery had to start somewhere.\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.4525, Text: Manchester United manager Ruben Amorim has questioned the \"choices\" of people close to forward Marcus Rashford. Rashford, 27, said he was \"ready for a new challenge\" in an interview after being dropped for the 2-1 win at Manchester City on 15 December. The England international subsequently missed the 4-3 Carabao Cup quarter-final defeat by Tottenham and was also left out as United lost 3-0 to Bournemouth on Sunday, adding to speculation he could leave Old Trafford in January. \"I understand these players have a lot of people around them, making choices that are not the first idea from the player. \"They chose to do the interview as it is not just Marcus.\"\n", + "The five-day gap between the home game with Newcastle on 30 December and an immensely difficult visit to old rivals and title favourites Liverpool on 5 January is the longest spell Amorim will have had to work with his players since his appointment. He will have another spare week after that, then three more midweek games to work with a squad United sources say is not expected to change much in personnel during the January transfer window, because of the club's tight Profit and Sustainability position. Evidently, it would have been far easier for such a dramatic transition to take place during the summer. Amorim did ask if his switch could be delayed until the end of the season but that request was rejected by Berrada. \"There's no point talking or thinking about that,\" said Amorim. \"I'm here and have to focus on the job. \"It's part of football to have these difficult moments. I already knew it was going to be tough. You expect to win more games, to have players with more confidence to sell the idea and to work and improve things. \"At this moment it's really hard. We have to survive to have time and then to improve the team.\"\n", "\n", - "This video can not be played To play this video you need to enable JavaScript in your browser. 'He wants to play. He is trying'\n", + "Manchester United head coach Ruben Amorim has won four and lost five of his first 10 games in charge\n", "\n", - "Rashford has scored 138 goals in 426 appearances for the club since making his debut in 2016, having come through the United youth ranks. However, while he managed 30 goals in all competitions in 2022-23, he has struggled for form in three of the previous four seasons and attracted criticism from pundits and fans for a number of laboured displays during that time. Amorim said he can \"separate\" the decisions of those advising Rashford from his relationship with the player. \"At the moment I'm focused on improving Marcus,\" he added. \"We need a talented guy like Marcus. I forget the interview now and see what I see on the pitch.\" Regarding Rashford's future, the Portuguese boss said it is for him and the club \"to deal with that when the time comes\". Speaking to the wider media before United face Wolverhampton Wanderers on Thursday, Amorim denied the talk around Rashford was a distraction, adding: \"Some guys have a big responsibility here because they have been here for a long time. \"If you have big talents, [we need] big performances, big responsibility, big engagement to push everybody forward in this moment. Like any other player, [we want him to be] the best he can be. \"This is maybe one of the lowest moments in our club. We have to face it and be strong.\" United's humbling defeat by the Cherries means they head into Christmas in 13th place in the Premier League, after Wolves they host Newcastle on 30 December. It will be their lowest position in the table at this stage since they were 15th in 1986, just over two months into Sir Alex Ferguson's reign as manager. Amorim had to halt his post-match news conference on Sunday because of a leak in the ceiling of the press room. Asked later how he intended to reverse fortunes at Old Trafford, the former Sporting coach pointed to the ceiling and said: \"If I knew, I would solve all the problems of this club, even this. \"I know what I am going to do. That is so clear for me. I will not say I feel relaxed because I'm really frustrated. It's a very difficult moment but we will solve problems step by step and find answers for everything.\"\n", + "Former United star Cristiano Ronaldo has backed Portuguese compatriot Amorim to turn the club's fortunes around. Ronaldo's second spell at Old Trafford ended in November 2022 following an explosive TV interview with presenter and journalist Piers Morgan. \"He [Amorim] did a fantastic job in Portugal with my [club] Sporting,\" said Ronaldo. \"But the Premier League is a different beast, the most competitive league in the world. I knew that it would be tough and they will continue the storm. \"But the storm will finish and the sun will rise. Things crossed, it will be good with him and I hope the best for Manchester United because it is a club I still love.\" Al Nassr forward Ronaldo, 39, who was speaking at the Globe Soccer Awards in Dubai where he was named the Best Middle East player of 2024, added: \"I will continue to say, the problem is not the coaches. \"It's like the aquarium and you have the fish inside and it's sick, and you take him out and fix the problem. \"If you put it back in the aquarium it will be sick again. This is the problem of Manchester United. It is the same.\"\n", "--------------------------------------------------------------------------------\n" ] } @@ -1022,7 +1035,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-05 06:14:34,931 - INFO - Successfully created RAG chain\n" + "2025-02-05 23:50:03,454 - INFO - Successfully created RAG chain\n" ] } ], @@ -1042,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1055,13 +1068,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "RAG Response: Pep Guardiola expressed significant concern and frustration regarding Manchester City's current form, which he described as one of the worst runs of his managerial career. He admitted to feeling personally affected, noting that his sleep, diet, and overall state of mind had suffered during the team's downturn in results. Guardiola repeatedly emphasized the need to find solutions and took responsibility for the situation, stating, \"I am not good enough\" and acknowledging the necessity for a \"step-by-step\" recovery. Despite the struggles, he maintained confidence in his players and highlighted injuries, defensive mistakes, and a lack of creativity as contributing factors to the decline. He also recognized the daunting challenge of a potential rebuild, given the team's aging squad and loss of dominance.\n", - "RAG response generated in 3.66 seconds\n" + "RAG Response: The match between Fulham and Liverpool ended in a 2-2 draw at Anfield. Liverpool played most of the game with ten men after Andy Robertson was sent off in the 17th minute for denying a goalscoring opportunity. Despite this setback, Liverpool twice came from behind, with Diogo Jota scoring an 86th-minute equalizer. Liverpool maintained over 60% possession and led in several attacking metrics, showing resilience and fighting spirit. The performance was widely praised, with both teams earning compliments for their bravery and effort.\n", + "RAG response generated in 4.57 seconds\n" ] } ], "source": [ - "logging.disable(sys.maxsize) # Disable logging to prevent tqdm output\n", "start_time = time.time()\n", "try:\n", " rag_response = rag_chain.invoke(query)\n", @@ -1070,10 +1082,13 @@ " print(f\"RAG response generated in {rag_elapsed_time:.2f} seconds\")\n", "except NotFoundError as e:\n", " print(f\"Error: Azure OpenAI resource not found. Please check your configuration. Details: {str(e)}\")\n", + "except InternalServerFailureException as e:\n", + " if \"query request rejected\" in str(e):\n", + " print(\"Error: Search request was rejected due to rate limiting. Please try again later.\")\n", + " else:\n", + " print(f\"Internal server error occurred: {str(e)}\")\n", "except Exception as e:\n", - " print(f\"Unexpected error occurred: {str(e)}\")\n", - "finally:\n", - " logging.disable(logging.NOTSET) # Re-enable logging" + " print(f\"Unexpected error occurred: {str(e)}\")" ] }, { @@ -1090,7 +1105,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1105,20 +1120,16 @@ "text": [ "\n", "Query 1: What happened in the match between Fullham and Liverpool?\n", - "Response: In the match between Fulham and Liverpool, the two teams played to an exciting 2-2 draw in the Premier League at Anfield. Liverpool played nearly the entire match with ten men after Andy Robertson was shown a red card in the 17th minute for denying a goalscoring opportunity. Despite being a man down, Liverpool twice came from behind to secure the draw, with an 86th-minute equaliser from Diogo Jota highlighting their resilience.\n", - "\n", - "Liverpool's performance was widely praised for their \"phenomenal\" effort in the face of adversity, maintaining over 60% possession and leading in key attacking metrics. Fulham, enjoying their best Premier League start since 2003, also showed bravery and strong performance, with their left-back Antonee Robinson noting it \"didn't feel like [Liverpool] had 10 men at all.\"\n", - "Time taken: 9.48 seconds\n", + "Response: The match between Fulham and Liverpool ended in a 2-2 draw at Anfield. Liverpool played most of the game with ten men after Andy Robertson was sent off in the 17th minute for denying a goalscoring opportunity. Despite this setback, Liverpool twice came from behind, with Diogo Jota scoring an 86th-minute equalizer. Liverpool maintained over 60% possession and led in several attacking metrics, showing resilience and fighting spirit. The performance was widely praised, with both teams earning compliments for their bravery and effort.\n", + "Time taken: 1.24 seconds\n", "\n", "Query 2: What was manchester city manager pep guardiola's reaction to the team's current form?\n", - "Response: Pep Guardiola expressed significant concern and frustration regarding Manchester City's current form, which he described as one of the worst runs of his managerial career. He admitted to feeling personally affected, noting that his sleep, diet, and overall state of mind had suffered during the team's downturn in results. Guardiola repeatedly emphasized the need to find solutions and took responsibility for the situation, stating, \"I am not good enough\" and acknowledging the necessity for a \"step-by-step\" recovery. Despite the struggles, he maintained confidence in his players and highlighted injuries, defensive mistakes, and a lack of creativity as contributing factors to the decline. He also recognized the daunting challenge of a potential rebuild, given the team's aging squad and loss of dominance.\n", - "Time taken: 1.12 seconds\n", + "Response: Manchester City manager Pep Guardiola expressed significant frustration and self-doubt about his team's current form. He admitted his state of mind was \"ugly,\" his sleep was suffering, and his digestion was affected. Guardiola admitted he was \"not good enough\" to resolve the issues with the current team and took responsibility for finding solutions to reverse their decline. He criticized the team's defensive performances and uncharacteristic errors and acknowledged that confidence was decreasing among players. Guardiola recognized that the team required changes and hinted at a potential major rebuild. Despite the struggles, he remained determined to address the crisis and insisted that this situation was an opportunity to prepare for a new Manchester City in the future.\n", + "Time taken: 3.30 seconds\n", "\n", "Query 3: What happened in the match between Fullham and Liverpool?\n", - "Response: In the match between Fulham and Liverpool, the two teams played to an exciting 2-2 draw in the Premier League at Anfield. Liverpool played nearly the entire match with ten men after Andy Robertson was shown a red card in the 17th minute for denying a goalscoring opportunity. Despite being a man down, Liverpool twice came from behind to secure the draw, with an 86th-minute equaliser from Diogo Jota highlighting their resilience.\n", - "\n", - "Liverpool's performance was widely praised for their \"phenomenal\" effort in the face of adversity, maintaining over 60% possession and leading in key attacking metrics. Fulham, enjoying their best Premier League start since 2003, also showed bravery and strong performance, with their left-back Antonee Robinson noting it \"didn't feel like [Liverpool] had 10 men at all.\"\n", - "Time taken: 0.30 seconds\n" + "Response: The match between Fulham and Liverpool ended in a 2-2 draw at Anfield. Liverpool played most of the game with ten men after Andy Robertson was sent off in the 17th minute for denying a goalscoring opportunity. Despite this setback, Liverpool twice came from behind, with Diogo Jota scoring an 86th-minute equalizer. Liverpool maintained over 60% possession and led in several attacking metrics, showing resilience and fighting spirit. The performance was widely praised, with both teams earning compliments for their bravery and effort.\n", + "Time taken: 0.32 seconds\n" ] } ], @@ -1138,8 +1149,15 @@ " elapsed_time = time.time() - start_time\n", " print(f\"Response: {response}\")\n", " print(f\"Time taken: {elapsed_time:.2f} seconds\")\n", + "except NotFoundError as e:\n", + " print(f\"Error: Azure OpenAI resource not found. Please check your configuration. Details: {str(e)}\")\n", + "except InternalServerFailureException as e:\n", + " if \"query request rejected\" in str(e):\n", + " print(\"Error: Search request was rejected due to rate limiting. Please try again later.\")\n", + " else:\n", + " print(f\"Internal server error occurred: {str(e)}\")\n", "except Exception as e:\n", - " raise ValueError(f\"Error generating RAG response: {str(e)}\")" + " print(f\"Unexpected error occurred: {str(e)}\")" ] }, { @@ -1171,7 +1189,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.2" + "version": "3.12.3" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/azure/azure_index.json b/azure/azure_index.json index bd66f3d5..ef514b44 100644 --- a/azure/azure_index.json +++ b/azure/azure_index.json @@ -40,7 +40,7 @@ "enabled": true, "fields": [ { - "dims": 3072, + "dims": 1536, "index": true, "name": "embedding", "similarity": "dot_product", @@ -71,4 +71,4 @@ } }, "sourceParams": {} - } \ No newline at end of file + }