Skip to content
Snippets Groups Projects
Commit 607b999e authored by abir.chebbi's avatar abir.chebbi
Browse files

files renamed and readme updated

parent 9d623cfc
No related branches found
No related tags found
No related merge requests found
File deleted
File deleted
[aws]
aws_access_key_id =
aws_secret_access_key =
region =
[opensearch]
endpoint =
index_name =
File moved
File moved
File moved
File moved
File moved
File moved
import boto3
import os
#from tqdm.auto import tqdm
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import BedrockEmbeddings
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from langchain_community.vectorstores import OpenSearchVectorSearch
import uuid
import json
import argparse
......@@ -139,18 +136,19 @@ def main(bucket_name, endpoint,index_name, local_path):
download_documents(bucket_name,local_path)
loader= PyPDFDirectoryLoader(local_path)
docs = loader.load()
print(docs[1])
print('Start chunking')
chunks = split_text(docs, 1000, 100)
print(chunks[1])
create_index(OpenSearch_client,index_name)
print('Start vectorising')
embeddings= generate_embeddings(bedrock_client, chunks)
print(embeddings[1])
texts = [chunk.page_content for chunk in chunks]
# Prepare metadata for each chunk
meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks]
print(embeddings[1])
print(meta_data[1])
print('Start storing')
store_embeddings(embeddings, texts, meta_data ,endpoint, awsauth,index_name)
print('End storing')
......
File moved
[aws]
aws_access_key_id = AKIAVEKYIBTQKSG2R342
aws_secret_access_key = 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
region = us-east-1
[opensearch]
endpoint = ku7nut9wcogpz1nw15j0.us-east-1.aoss.amazonaws.com
index_name = cloud-lecture
......@@ -16,12 +16,12 @@ ec2 = boto3.resource('ec2')
# User code that's executed when the instance starts
script = f"""#!/bin/bash
cat <<EOT > /home/ubuntu/chatbot-lab/Part\ 2/config.ini
cat <<EOT > /home/ubuntu/chatbot-lab/Part2/config.ini
{config_content}
EOT
source /home/ubuntu/chatbotlab/bin/activate
## Run the apllication
cd /home/ubuntu/chatbot-lab/Part\ 2
cd /home/ubuntu/chatbot-lab/Part2
streamlit run main.py
"""
......@@ -29,7 +29,7 @@ encoded_script = base64.b64encode(script.encode()).decode('utf-8')
# Create a new EC2 instance
instance = ec2.create_instances(
ImageId='ami-03a1012f7ddc87219',
ImageId='ami-05747e7a13dac9d14',
MinCount=1,
MaxCount=1,
InstanceType='t2.micro',
......@@ -37,7 +37,8 @@ instance = ec2.create_instances(
SecurityGroupIds=['sg-06f3ca7153db92958'],
UserData=encoded_script
)
print("Instance created with ID:", instance[0].id)
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment