diff --git a/Part 1/pdfs/AWS-openstack.pdf b/Part 1/pdfs/AWS-openstack.pdf deleted file mode 100644 index 064caadf0bc4628e2b3cc2a9ad8d0f55fd1e6819..0000000000000000000000000000000000000000 Binary files a/Part 1/pdfs/AWS-openstack.pdf and /dev/null differ diff --git a/Part 1/pdfs/FAAS.pdf b/Part 1/pdfs/FAAS.pdf deleted file mode 100644 index edcdb22d598faceb1e5946c43ef173310214a1f7..0000000000000000000000000000000000000000 Binary files a/Part 1/pdfs/FAAS.pdf and /dev/null differ diff --git a/Part 2/config.ini b/Part 2/config.ini deleted file mode 100644 index 235a33263ceb6809cb68a8b4e9fe0f00aea6d3d9..0000000000000000000000000000000000000000 --- a/Part 2/config.ini +++ /dev/null @@ -1,10 +0,0 @@ -[aws] -aws_access_key_id = -aws_secret_access_key = -region = - -[opensearch] -endpoint = -index_name = - - diff --git a/Part 1/.DS_Store b/Part1/.DS_Store similarity index 100% rename from Part 1/.DS_Store rename to Part1/.DS_Store diff --git a/Part 1/create-S3-and-put-docs.py b/Part1/create-S3-and-put-docs.py similarity index 100% rename from Part 1/create-S3-and-put-docs.py rename to Part1/create-S3-and-put-docs.py diff --git a/Part 1/create-vector-db.py b/Part1/create-vector-db.py similarity index 100% rename from Part 1/create-vector-db.py rename to Part1/create-vector-db.py diff --git a/Part 1/delete-s3.py b/Part1/delete-s3.py similarity index 100% rename from Part 1/delete-s3.py rename to Part1/delete-s3.py diff --git a/Part 1/requirements.txt b/Part1/requirements.txt similarity index 100% rename from Part 1/requirements.txt rename to Part1/requirements.txt diff --git a/Part 1/test.py b/Part1/test.py similarity index 100% rename from Part 1/test.py rename to Part1/test.py diff --git a/Part 1/main.py b/Part1/vectorise-store.py similarity index 97% rename from Part 1/main.py rename to Part1/vectorise-store.py index 053087c6e58b7f4419da371fd638b912bcc9b1a0..225ecf323daaaf63833d1f332640246bc2c8c879 100644 --- a/Part 1/main.py +++ b/Part1/vectorise-store.py @@ -1,13 +1,10 @@ import boto3 import os -#from tqdm.auto import tqdm from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import BedrockEmbeddings from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth from langchain_community.vectorstores import OpenSearchVectorSearch -import uuid -import json import argparse @@ -139,18 +136,19 @@ def main(bucket_name, endpoint,index_name, local_path): download_documents(bucket_name,local_path) loader= PyPDFDirectoryLoader(local_path) docs = loader.load() - print(docs[1]) + print('Start chunking') chunks = split_text(docs, 1000, 100) print(chunks[1]) create_index(OpenSearch_client,index_name) + print('Start vectorising') embeddings= generate_embeddings(bedrock_client, chunks) print(embeddings[1]) texts = [chunk.page_content for chunk in chunks] # Prepare metadata for each chunk meta_data = [{'source': chunk.metadata['source'], 'page': chunk.metadata['page'] + 1} for chunk in chunks] - print(embeddings[1]) - print(meta_data[1]) + print('Start storing') store_embeddings(embeddings, texts, meta_data ,endpoint, awsauth,index_name) + print('End storing') diff --git a/Part 2/main.py b/Part2/chatbot.py similarity index 100% rename from Part 2/main.py rename to Part2/chatbot.py diff --git a/Part2/config.ini b/Part2/config.ini new file mode 100644 index 0000000000000000000000000000000000000000..a7868b48cebe9e52ce6f329c48d0a3fff358c065 --- /dev/null +++ b/Part2/config.ini @@ -0,0 +1,10 @@ +[aws] +aws_access_key_id = AKIAVEKYIBTQKSG2R342 +aws_secret_access_key = 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 +region = us-east-1 + +[opensearch] +endpoint = ku7nut9wcogpz1nw15j0.us-east-1.aoss.amazonaws.com +index_name = cloud-lecture + + diff --git a/Part 2/create_instance.py b/Part2/create_instance.py similarity index 86% rename from Part 2/create_instance.py rename to Part2/create_instance.py index 0bdc80625e6304942a0d765907c2f590c61d4dfe..c4a6ec8b6300c332d13f91ce628fad0d887c97cc 100644 --- a/Part 2/create_instance.py +++ b/Part2/create_instance.py @@ -16,12 +16,12 @@ ec2 = boto3.resource('ec2') # User code that's executed when the instance starts script = f"""#!/bin/bash -cat <<EOT > /home/ubuntu/chatbot-lab/Part\ 2/config.ini +cat <<EOT > /home/ubuntu/chatbot-lab/Part2/config.ini {config_content} EOT source /home/ubuntu/chatbotlab/bin/activate ## Run the apllication -cd /home/ubuntu/chatbot-lab/Part\ 2 +cd /home/ubuntu/chatbot-lab/Part2 streamlit run main.py """ @@ -29,7 +29,7 @@ encoded_script = base64.b64encode(script.encode()).decode('utf-8') # Create a new EC2 instance instance = ec2.create_instances( - ImageId='ami-03a1012f7ddc87219', + ImageId='ami-05747e7a13dac9d14', MinCount=1, MaxCount=1, InstanceType='t2.micro', @@ -37,7 +37,8 @@ instance = ec2.create_instances( SecurityGroupIds=['sg-06f3ca7153db92958'], UserData=encoded_script ) - print("Instance created with ID:", instance[0].id) + + diff --git a/Part 2/requirements.txt b/Part2/requirements.txt similarity index 100% rename from Part 2/requirements.txt rename to Part2/requirements.txt