Skip to content
Snippets Groups Projects
Commit 607b999e authored by abir.chebbi's avatar abir.chebbi
Browse files

files renamed and readme updated

parent 9d623cfc
Branches
No related tags found
No related merge requests found
File deleted
File deleted
[aws]
aws_access_key_id =
aws_secret_access_key =
region =
[opensearch]
endpoint =
index_name =
File moved
File moved
File moved
File moved
File moved
File moved
import boto3 import boto3
import os import os
#from tqdm.auto import tqdm
from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import BedrockEmbeddings from langchain_community.embeddings import BedrockEmbeddings
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from langchain_community.vectorstores import OpenSearchVectorSearch from langchain_community.vectorstores import OpenSearchVectorSearch
import uuid
import json
import argparse import argparse
...@@ -139,18 +136,19 @@ def main(bucket_name, endpoint,index_name, local_path): ...@@ -139,18 +136,19 @@ def main(bucket_name, endpoint,index_name, local_path):
download_documents(bucket_name,local_path) download_documents(bucket_name,local_path)
loader= PyPDFDirectoryLoader(local_path) loader= PyPDFDirectoryLoader(local_path)
docs = loader.load() docs = loader.load()
print(docs[1]) print('Start chunking')
chunks = split_text(docs, 1000, 100) chunks = split_text(docs, 1000, 100)
print(chunks[1]) print(chunks[1])
create_index(OpenSearch_client,index_name) create_index(OpenSearch_client,index_name)
print('Start vectorising')
embeddings= generate_embeddings(bedrock_client, chunks) embeddings= generate_embeddings(bedrock_client, chunks)
print(embeddings[1]) print(embeddings[1])
texts = [chunk.page_content for chunk in chunks] texts = [chunk.page_content for chunk in chunks]
# Prepare metadata for each chunk # Prepare metadata for each chunk
meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks] meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks]
print(embeddings[1]) print('Start storing')
print(meta_data[1])
store_embeddings(embeddings, texts, meta_data ,endpoint, awsauth,index_name) store_embeddings(embeddings, texts, meta_data ,endpoint, awsauth,index_name)
print('End storing')
......
File moved
[aws]
aws_access_key_id = AKIAVEKYIBTQKSG2R342
aws_secret_access_key = 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
region = us-east-1
[opensearch]
endpoint = ku7nut9wcogpz1nw15j0.us-east-1.aoss.amazonaws.com
index_name = cloud-lecture
...@@ -16,12 +16,12 @@ ec2 = boto3.resource('ec2') ...@@ -16,12 +16,12 @@ ec2 = boto3.resource('ec2')
# User code that's executed when the instance starts # User code that's executed when the instance starts
script = f"""#!/bin/bash script = f"""#!/bin/bash
cat <<EOT > /home/ubuntu/chatbot-lab/Part\ 2/config.ini cat <<EOT > /home/ubuntu/chatbot-lab/Part2/config.ini
{config_content} {config_content}
EOT EOT
source /home/ubuntu/chatbotlab/bin/activate source /home/ubuntu/chatbotlab/bin/activate
## Run the apllication ## Run the apllication
cd /home/ubuntu/chatbot-lab/Part\ 2 cd /home/ubuntu/chatbot-lab/Part2
streamlit run main.py streamlit run main.py
""" """
...@@ -29,7 +29,7 @@ encoded_script = base64.b64encode(script.encode()).decode('utf-8') ...@@ -29,7 +29,7 @@ encoded_script = base64.b64encode(script.encode()).decode('utf-8')
# Create a new EC2 instance # Create a new EC2 instance
instance = ec2.create_instances( instance = ec2.create_instances(
ImageId='ami-03a1012f7ddc87219', ImageId='ami-05747e7a13dac9d14',
MinCount=1, MinCount=1,
MaxCount=1, MaxCount=1,
InstanceType='t2.micro', InstanceType='t2.micro',
...@@ -37,7 +37,8 @@ instance = ec2.create_instances( ...@@ -37,7 +37,8 @@ instance = ec2.create_instances(
SecurityGroupIds=['sg-06f3ca7153db92958'], SecurityGroupIds=['sg-06f3ca7153db92958'],
UserData=encoded_script UserData=encoded_script
) )
print("Instance created with ID:", instance[0].id) print("Instance created with ID:", instance[0].id)
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment