Skip to content
Snippets Groups Projects
Commit 783f88b1 authored by abir.chebbi's avatar abir.chebbi
Browse files

add part 2

parent 7e26a1ca
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@ import time
client = boto3.client('opensearchserverless')
service = 'aoss'
Vector_store_name='test1'
Vector_store_name='test2'
def createEncryptionPolicy(client):
"""Creates an encryption policy that matches all collections beginning with test"""
......
File moved
......@@ -12,7 +12,7 @@ import json
## Local directory for storing PDF files
LOCAL_DIR = "pdfs"
index_name = "cloud_lecture_test3"
index_name = "cloud_lecture"
## S3_client
......@@ -29,7 +29,7 @@ credentials = boto3.Session().get_credentials()
awsauth = AWSV4SignerAuth(credentials, 'us-east-1', 'aoss')
## Vector DB endpoint
host= 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com'
host= 'j6phg34iv0f2rlvxwawd.us-east-1.aoss.amazonaws.com'
## Opensearch Client
OpenSearch_client = OpenSearch(
......@@ -142,16 +142,16 @@ def main():
download_documents(BUCKET_NAME,LOCAL_DIR)
loader= PyPDFDirectoryLoader(LOCAL_DIR)
docs = loader.load()
print(docs[80])
print(docs[1])
chunks = split_text(docs, 1000, 100)
print(chunks[80])
print(chunks[1])
embeddings= generate_embeddings(bedrock_client, chunks)
print(embeddings[80])
print(embeddings[1])
texts = [chunk.page_content for chunk in chunks]
# Prepare metadata for each chunk
meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks]
print(embeddings[80])
print(meta_data[80])
print(embeddings[1])
print(meta_data[1])
store_embeddings(embeddings, texts, meta_data ,host, awsauth,index_name)
......
......@@ -14,7 +14,6 @@ opensearch_client = OpenSearch(
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection,
timeout=300
)
# Embeddings Client
......
File moved
import boto3
import streamlit as st
## Bedrock
from langchain.llms.bedrock import Bedrock
## prompt and chain
from langchain.chains import RetrievalQA
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.chat_models import BedrockChat
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from langchain import PromptTemplate
# Embeddings Client
bedrock_client = boto3.client(service_name="bedrock-runtime")
# configuring streamlit page settings
st.set_page_config(
page_title="cloud lecture lab",
page_icon="💬",
layout="centered"
)
# streamlit page title
st.title("Chat with your lecture")
# AWS and OpenSearch Configuration
host = 'd7gvxdj7jpz3h3bj0xq6.us-east-1.aoss.amazonaws.com'
index_name = 'cloud_lecture'
awsauth = AWSV4SignerAuth(boto3.Session().get_credentials(), 'us-east-1', 'aoss')
# OpenSearch Client
opensearch_client = OpenSearch(
hosts=[{'host': host, 'port': 443}],
http_auth=awsauth,
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection,
)
def get_embedding(question, bedrock_client):
embeddings_model = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_client)
embedding = embeddings_model.embed_query(question)
return embedding
def similarity_search(embed_query, index_name):
query_body = {
"size": 5,
"query": {
"knn": {
"vector_field": {
"vector": embed_query,
"k": 5
}
}
}
}
response = opensearch_client.search(index=index_name, body=query_body)
return response['hits']['hits']
def prepare_prompt(question, context):
template = """
You are a Professor. The student will ask you a questions about the lecture.
Use following piece of context to answer the question.
If you don't know the answer, just say you don't know.
Context: <context>
{context}
</context>
Question: {question}
Answer:
"""
prompt = PromptTemplate(
template=template,
input_variables=['context', 'question']
)
prompt_formatted_str = prompt.format(context=context, question= question)
return prompt_formatted_str
def generate_answer(prompt):
model = BedrockChat(model_id="anthropic.claude-v2", model_kwargs={"temperature": 0.1})
answer = model.invoke(prompt)
return answer
def main():
# initialize chat session in streamlit if not already present
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# display chat history
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# input field for user's message
user_prompt = st.chat_input("Ask a question for your knowledge base")
if user_prompt:
# add user's message to chat and display it
st.chat_message("user").markdown(user_prompt)
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
# Generate and display answer
print(user_prompt)
embed_question= get_embedding(user_prompt,bedrock_client)
print(embed_question)
sim_results = similarity_search(embed_question, index_name)
context = [i['_source']['text'] for i in sim_results]
print(context)
prompt = prepare_prompt(user_prompt, context)
print(prompt)
answer = generate_answer(prompt)
st.session_state.chat_history.append({"role": "system", "content": answer})
for message in st.session_state.chat_history[-1:]:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if __name__== "__main__":
main()
streamlit
......@@ -4,7 +4,7 @@
1. AWS CLI: Ensure AWS CLI is installed and configured on your laptop(refer to Session 1)
2. Ensure python is installed: python 3.8 or higher
2. Install required python libraries listed in the 'requirements.txt':
`pip install -r requirement.txt`
`pip3 install -r requirements.txt`
## Part 1:
......@@ -32,3 +32,7 @@ The main.py script will:
2. Split them into chunks.
3. Generate embeddings from the chunks.
4. Store these embeddings in the OpenSearch Vector DB.
## Part 2:
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment