add part 2

783f88b1 · abir.chebbi · 7e26a1ca · 783f88b1 · 783f88b1 · 783f88b1
Commit 783f88b1 authored 9 months ago by abir.chebbi
--- a/Part 1/Create-Vector-DB.py
+++ b/Part 1/Create-Vector-DB.py
@@ -6,7 +6,7 @@ import time
 client = boto3.client('opensearchserverless')
 service = 'aoss'
-Vector_store_name='test1'
+Vector_store_name='test2'
 def createEncryptionPolicy(client):
    """Creates an encryption policy that matches all collections beginning with test"""

--- a/Part 1/Delete-s3.py
+++ b/Part 1/Delete-s3.py
--- a/Part 1/main.py
+++ b/Part 1/main.py
@@ -12,7 +12,7 @@ import json
 ##  Local directory for storing PDF files
 LOCAL_DIR = "pdfs" 
-index_name = "cloud_lecture_test3"
+index_name = "cloud_lecture"
 ## S3_client
@@ -29,7 +29,7 @@ credentials = boto3.Session().get_credentials()
 awsauth = AWSV4SignerAuth(credentials, 'us-east-1', 'aoss')
 ## Vector DB endpoint
-host= 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com'
+host= 'j6phg34iv0f2rlvxwawd.us-east-1.aoss.amazonaws.com'
 ## Opensearch Client
 OpenSearch_client = OpenSearch(
@@ -142,16 +142,16 @@ def main():
    download_documents(BUCKET_NAME,LOCAL_DIR)
    loader= PyPDFDirectoryLoader(LOCAL_DIR)
    docs = loader.load()
-    print(docs[80])
+    print(docs[1])
    chunks = split_text(docs, 1000, 100)
-    print(chunks[80])
+    print(chunks[1])
    embeddings= generate_embeddings(bedrock_client, chunks)
-    print(embeddings[80])
+    print(embeddings[1])
    texts = [chunk.page_content for chunk in chunks]
     # Prepare metadata for each chunk
    meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks]
-    print(embeddings[80])
+    print(embeddings[1])
-    print(meta_data[80])
+    print(meta_data[1])
    store_embeddings(embeddings, texts, meta_data ,host, awsauth,index_name)

--- a/Part 1/test.py
+++ b/Part 1/test.py
@@ -14,7 +14,6 @@ opensearch_client = OpenSearch(
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
-    timeout=300
 )
 # Embeddings Client

--- a/Part 1/Delete-Vector-DB.py
+++ b/Part 1/Delete-Vector-DB.py
--- a/Part 2/main.py
+++ b/Part 2/main.py
+import boto3
+import streamlit as st
+## Bedrock
+from langchain.llms.bedrock import Bedrock
+## prompt and chain
+from langchain.chains import RetrievalQA
+from langchain_community.embeddings import BedrockEmbeddings
+from langchain_community.chat_models import BedrockChat
+from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
+from langchain import PromptTemplate
+# Embeddings Client
+bedrock_client = boto3.client(service_name="bedrock-runtime")
+# configuring streamlit page settings
+st.set_page_config(
+    page_title="cloud lecture lab",
+    page_icon="💬",
+    layout="centered"
+)
+# streamlit page title
+st.title("Chat with your lecture")
+# AWS and OpenSearch Configuration
+host = 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com'  
+index_name = 'cloud_lecture'
+awsauth = AWSV4SignerAuth(boto3.Session().get_credentials(), 'us-east-1', 'aoss')
+# OpenSearch Client
+opensearch_client = OpenSearch(
+    hosts=[{'host': host, 'port': 443}],
+    http_auth=awsauth,
+    use_ssl=True,
+    verify_certs=True,
+    connection_class=RequestsHttpConnection,
+)
+def get_embedding(question, bedrock_client):
+    embeddings_model = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_client)
+    embedding = embeddings_model.embed_query(question)
+    return embedding
+def similarity_search(embed_query, index_name):
+    query_body = {
+        "size": 5,
+        "query": {
+            "knn": {
+                "vector_field": {
+                    "vector": embed_query,
+                    "k": 5
+                }
+            }
+        }
+    }
+    response = opensearch_client.search(index=index_name, body=query_body)
+    return response['hits']['hits']
+def prepare_prompt(question, context):
+    template = """
+    You are a Professor. The student will ask you a questions about the lecture. 
+    Use following piece of context to answer the question. 
+    If you don't know the answer, just say you don't know. 
+    Context:   <context>
+    {context}
+    </context>
+    Question: {question}
+    Answer: 
+    """
+    prompt = PromptTemplate(
+    template=template, 
+    input_variables=['context', 'question']
+    )
+    prompt_formatted_str = prompt.format(context=context, question= question)
+    return prompt_formatted_str
+def generate_answer(prompt):
+    model = BedrockChat(model_id="anthropic.claude-v2", model_kwargs={"temperature": 0.1})
+    answer = model.invoke(prompt)
+    return answer
+def main():
+    # initialize chat session in streamlit if not already present
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    # display chat history
+    for message in st.session_state.chat_history:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # input field for user's message
+    user_prompt = st.chat_input("Ask a question for your knowledge base")
+    if user_prompt:
+    # add user's message to chat and display it
+        st.chat_message("user").markdown(user_prompt)
+        st.session_state.chat_history.append({"role": "user", "content": user_prompt})
+        # Generate and display answer
+        print(user_prompt)
+        embed_question= get_embedding(user_prompt,bedrock_client)
+        print(embed_question)
+        sim_results = similarity_search(embed_question, index_name)
+        context = [i['_source']['text'] for i in sim_results]
+        print(context)
+        prompt = prepare_prompt(user_prompt, context)
+        print(prompt)
+        answer = generate_answer(prompt)
+        st.session_state.chat_history.append({"role": "system", "content": answer})
+        for message in st.session_state.chat_history[-1:]:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+if __name__== "__main__":
+    main()
--- a/Part 2/requirements.txt
+++ b/Part 2/requirements.txt
+streamlit
--- a/Part 2/run_app.py
+++ b/Part 2/run_app.py
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 1. AWS CLI: Ensure AWS CLI is installed and configured on your laptop(refer to Session 1)
 2. Ensure python is installed: python 3.8 or higher
 2. Install required python libraries listed in the 'requirements.txt': 
-`pip install -r requirement.txt`
+`pip3 install -r requirements.txt`
 ## Part 1: 
@@ -31,4 +31,8 @@ The main.py script will:
 1. Download PDF files from the S3 bucket.
 2. Split them into chunks.
 3. Generate embeddings from the chunks.
 4. Store these embeddings in the OpenSearch Vector DB.
\ No newline at end of file
+## Part 2: