Skip to content
Snippets Groups Projects
Commit 783f88b1 authored by abir.chebbi's avatar abir.chebbi
Browse files

add part 2

parent 7e26a1ca
No related branches found
No related tags found
No related merge requests found
...@@ -6,7 +6,7 @@ import time ...@@ -6,7 +6,7 @@ import time
client = boto3.client('opensearchserverless') client = boto3.client('opensearchserverless')
service = 'aoss' service = 'aoss'
Vector_store_name='test1' Vector_store_name='test2'
def createEncryptionPolicy(client): def createEncryptionPolicy(client):
"""Creates an encryption policy that matches all collections beginning with test""" """Creates an encryption policy that matches all collections beginning with test"""
......
File moved
...@@ -12,7 +12,7 @@ import json ...@@ -12,7 +12,7 @@ import json
## Local directory for storing PDF files ## Local directory for storing PDF files
LOCAL_DIR = "pdfs" LOCAL_DIR = "pdfs"
index_name = "cloud_lecture_test3" index_name = "cloud_lecture"
## S3_client ## S3_client
...@@ -29,7 +29,7 @@ credentials = boto3.Session().get_credentials() ...@@ -29,7 +29,7 @@ credentials = boto3.Session().get_credentials()
awsauth = AWSV4SignerAuth(credentials, 'us-east-1', 'aoss') awsauth = AWSV4SignerAuth(credentials, 'us-east-1', 'aoss')
## Vector DB endpoint ## Vector DB endpoint
host= 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com' host= 'j6phg34iv0f2rlvxwawd.us-east-1.aoss.amazonaws.com'
## Opensearch Client ## Opensearch Client
OpenSearch_client = OpenSearch( OpenSearch_client = OpenSearch(
...@@ -142,16 +142,16 @@ def main(): ...@@ -142,16 +142,16 @@ def main():
download_documents(BUCKET_NAME,LOCAL_DIR) download_documents(BUCKET_NAME,LOCAL_DIR)
loader= PyPDFDirectoryLoader(LOCAL_DIR) loader= PyPDFDirectoryLoader(LOCAL_DIR)
docs = loader.load() docs = loader.load()
print(docs[80]) print(docs[1])
chunks = split_text(docs, 1000, 100) chunks = split_text(docs, 1000, 100)
print(chunks[80]) print(chunks[1])
embeddings= generate_embeddings(bedrock_client, chunks) embeddings= generate_embeddings(bedrock_client, chunks)
print(embeddings[80]) print(embeddings[1])
texts = [chunk.page_content for chunk in chunks] texts = [chunk.page_content for chunk in chunks]
# Prepare metadata for each chunk # Prepare metadata for each chunk
meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks] meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks]
print(embeddings[80]) print(embeddings[1])
print(meta_data[80]) print(meta_data[1])
store_embeddings(embeddings, texts, meta_data ,host, awsauth,index_name) store_embeddings(embeddings, texts, meta_data ,host, awsauth,index_name)
......
...@@ -14,7 +14,6 @@ opensearch_client = OpenSearch( ...@@ -14,7 +14,6 @@ opensearch_client = OpenSearch(
use_ssl=True, use_ssl=True,
verify_certs=True, verify_certs=True,
connection_class=RequestsHttpConnection, connection_class=RequestsHttpConnection,
timeout=300
) )
# Embeddings Client # Embeddings Client
......
File moved
import boto3
import streamlit as st
## Bedrock
from langchain.llms.bedrock import Bedrock
## prompt and chain
from langchain.chains import RetrievalQA
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.chat_models import BedrockChat
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from langchain import PromptTemplate
# Embeddings Client
bedrock_client = boto3.client(service_name="bedrock-runtime")
# configuring streamlit page settings
st.set_page_config(
page_title="cloud lecture lab",
page_icon="💬",
layout="centered"
)
# streamlit page title
st.title("Chat with your lecture")
# AWS and OpenSearch Configuration
host = 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com'
index_name = 'cloud_lecture'
awsauth = AWSV4SignerAuth(boto3.Session().get_credentials(), 'us-east-1', 'aoss')
# OpenSearch Client
opensearch_client = OpenSearch(
hosts=[{'host': host, 'port': 443}],
http_auth=awsauth,
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection,
)
def get_embedding(question, bedrock_client):
embeddings_model = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_client)
embedding = embeddings_model.embed_query(question)
return embedding
def similarity_search(embed_query, index_name):
query_body = {
"size": 5,
"query": {
"knn": {
"vector_field": {
"vector": embed_query,
"k": 5
}
}
}
}
response = opensearch_client.search(index=index_name, body=query_body)
return response['hits']['hits']
def prepare_prompt(question, context):
template = """
You are a Professor. The student will ask you a questions about the lecture.
Use following piece of context to answer the question.
If you don't know the answer, just say you don't know.
Context: <context>
{context}
</context>
Question: {question}
Answer:
"""
prompt = PromptTemplate(
template=template,
input_variables=['context', 'question']
)
prompt_formatted_str = prompt.format(context=context, question= question)
return prompt_formatted_str
def generate_answer(prompt):
model = BedrockChat(model_id="anthropic.claude-v2", model_kwargs={"temperature": 0.1})
answer = model.invoke(prompt)
return answer
def main():
# initialize chat session in streamlit if not already present
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# display chat history
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# input field for user's message
user_prompt = st.chat_input("Ask a question for your knowledge base")
if user_prompt:
# add user's message to chat and display it
st.chat_message("user").markdown(user_prompt)
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
# Generate and display answer
print(user_prompt)
embed_question= get_embedding(user_prompt,bedrock_client)
print(embed_question)
sim_results = similarity_search(embed_question, index_name)
context = [i['_source']['text'] for i in sim_results]
print(context)
prompt = prepare_prompt(user_prompt, context)
print(prompt)
answer = generate_answer(prompt)
st.session_state.chat_history.append({"role": "system", "content": answer})
for message in st.session_state.chat_history[-1:]:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if __name__== "__main__":
main()
streamlit
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
1. AWS CLI: Ensure AWS CLI is installed and configured on your laptop(refer to Session 1) 1. AWS CLI: Ensure AWS CLI is installed and configured on your laptop(refer to Session 1)
2. Ensure python is installed: python 3.8 or higher 2. Ensure python is installed: python 3.8 or higher
2. Install required python libraries listed in the 'requirements.txt': 2. Install required python libraries listed in the 'requirements.txt':
`pip install -r requirement.txt` `pip3 install -r requirements.txt`
## Part 1: ## Part 1:
...@@ -31,4 +31,8 @@ The main.py script will: ...@@ -31,4 +31,8 @@ The main.py script will:
1. Download PDF files from the S3 bucket. 1. Download PDF files from the S3 bucket.
2. Split them into chunks. 2. Split them into chunks.
3. Generate embeddings from the chunks. 3. Generate embeddings from the chunks.
4. Store these embeddings in the OpenSearch Vector DB. 4. Store these embeddings in the OpenSearch Vector DB.
\ No newline at end of file
## Part 2:
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment