whyashthakker · Jiadalee · Mar 21, 2025 · Mar 21, 2025 · Mar 23, 2025 · Mar 28, 2025
diff --git a/2_vector_db/embeddings/different_types_of_embeddings.py b/2_vector_db/embeddings/different_types_of_embeddings.py
@@ -22,6 +22,7 @@ def word_embeddings():
     # - min_count=1: Ignores all words with total frequency lower than this
     # - workers=4: Number of CPU cores to use for training
     print("Word Embedding for 'cat':", model.wv['cat'])
+    print("Word Embedding for 'dog':", model.wv['dog'])
 
 # Sentence Embeddings
 def sentence_embeddings():

diff --git a/2_vector_db/embeddings/simple_embedding.py b/2_vector_db/embeddings/simple_embedding.py
@@ -13,15 +13,16 @@
 
 # Plotting the embeddings
 plt.figure(figsize=(10, 8))
+
 for word, embedding in word_embeddings.items():
-    plt.scatter(embedding[0], embedding[1], label=word)
-    plt.annotate(word, (embedding[0], embedding[1]))
+    plt.scatter(embedding[0], embedding[1], marker='o', s=100, edgecolors='black', alpha=0.75)
+    plt.annotate(word, (embedding[0], embedding[1]), textcoords="offset points", xytext=(5, 5), ha='center', fontsize=10)
+
+plt.title("2D Word Embeddings", fontsize=14)
+plt.xlabel("Dimension 1", fontsize=12)
+plt.ylabel("Dimension 2", fontsize=12)
+plt.grid(True, linestyle='--', alpha=0.6)
 
-plt.title("Simple 2D Word Embeddings")
-plt.xlabel("Dimension 1")
-plt.ylabel("Dimension 2")
-plt.legend()
-plt.grid(True)
 plt.show()
 
 # Print out the embeddings

diff --git a/2_vector_db/faiss_annoy_pinecone/creating_vector_db.py b/2_vector_db/faiss_annoy_pinecone/creating_vector_db.py
@@ -1,13 +1,21 @@
-from pinecone import Pinecone, ServerlessSpec
+from pinecone import Pinecone
+from pinecone import ServerlessSpec
 from sentence_transformers import SentenceTransformer
 import os
 from dotenv import load_dotenv
-
 load_dotenv()
 
 try:
-    # Initialize Pinecone
-    pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
+    # initialize the pinecore api key
+    PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
+    if not PINECONE_API_KEY:
+        PINECONE_API_KEY = input("Please enter your PINECONE API key: ")
+        os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
+        pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
+
+    # initialize the pinecore api key
+    # pc = Pinecone(api_key= 'pcsk_27UvEa_PHyZ3n7E6euuUHXg7CPCjSb7HmEDe8j1XqzqmHLZHgpswygWhAHu7weNmuqWDrT')
+
     print("Pinecone initialized successfully.")
 
     # Load a pre-trained model
@@ -30,7 +38,7 @@
     print(f"Vector dimension: {dimension}")
 
     # Define index name
-    index_name = "ragudemy"
+    index_name = "firstPineconeIndex"
 
     # Check if index already exists
     existing_indexes = pc.list_indexes()

diff --git a/2_vector_db/vectors_search/image_search.py b/2_vector_db/vectors_search/image_search.py
@@ -46,7 +46,7 @@ def search_similar_images(query_image_path, vector_db):
     print("Creating vector database...")
     vector_db = create_vector_db(image_files)
 
-    query_image_path = "2_vector_db/3.png"
+    query_image_path = "2_vector_db/2.png"
     print(f"\nPerforming similarity search with query image: {query_image_path}")
     results = search_similar_images(query_image_path, vector_db)
 

diff --git a/3_building_simple_rag_pipeline/tesla_motors_data.csv b/3_building_simple_rag_pipeline/tesla_motors_data.csv
diff --git a/4_advanced_RAG/multi-vector.py b/4_advanced_RAG/multi-vector.py
@@ -35,6 +35,7 @@ def fetch_html(url):
     try:
         response = requests.get(url, headers=headers)
         response.raise_for_status()
+        response.encoding = 'utf-8'  # Force UTF-8 encoding
         return response.text
     except requests.RequestException as e:
         print(f"Error fetching the website: {e}")
@@ -45,8 +46,10 @@ def process_website(url):
     if not html_content:
         raise ValueError("No content could be fetched from the website.")
 
-    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.html') as temp_file:
-        temp_file.write(html_content)
+    cleaned_text = html_content.encode('utf-8', errors='ignore').decode('utf-8')
+
+    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.html', encoding='utf-8') as temp_file:
+        temp_file.write(cleaned_text)
         temp_file_path = temp_file.name
 
     try:

diff --git a/6_sql_rag/tesla_motors_data.csv b/6_sql_rag/tesla_motors_data.csv
diff --git a/6_sql_rag/tesla_motors_data.db b/6_sql_rag/tesla_motors_data.db
diff --git a/9_prompt_caching/with_claude.py b/9_prompt_caching/with_claude.py
@@ -2,7 +2,8 @@
 import requests
 from bs4 import BeautifulSoup
 from langchain.text_splitter import CharacterTextSplitter
-from langchain_anthropic import ChatAnthropic, AnthropicEmbeddings
+from langchain_anthropic import ChatAnthropic
+from langchain_community.embeddings import AnthropicEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate

diff --git a/Requirements.txt b/Requirements.txt
@@ -88,7 +88,6 @@ pdfplumber
 pillow
 pinecone
 pinecone-plugin-inference
-pinecone-plugin-interface
 platformdirs
 pooch
 protobuf

diff --git a/flagged/log.csv b/flagged/log.csv
@@ -0,0 +1,3 @@
+Upload PDF,Enter your question,Answer,Processing Log,flag,username,timestamp
+flagged\Upload PDF\addd6a8e72f9ebd9faee\Use long short-term memory to enhance Internet of Things for combinedsewer overflow monitoring.pdf,what is sewer overflow?,,,,,2025-03-28 14:57:45.291407
+flagged\Upload PDF\fb9ce6b62de17c74f929\introlinux4-spring22.pdf,what is bash shell?,,,,,2025-03-31 20:47:38.308820