From 09e5d3463216afb1e3834253d5ee8dfbb92fa041 Mon Sep 17 00:00:00 2001 From: "abir.chebbi" <abir.chebbi@hes-so.ch> Date: Thu, 12 Sep 2024 15:17:22 +0200 Subject: [PATCH] add argument local_path to main.py --- Part 1/main.py | 12 +++++------- README.md | 1 + 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Part 1/main.py b/Part 1/main.py index 3a6c717..6438a25 100644 --- a/Part 1/main.py +++ b/Part 1/main.py @@ -11,9 +11,6 @@ import json import argparse -## Local directory for storing PDF files -LOCAL_DIR = "pdfs" - ## S3_client @@ -127,7 +124,7 @@ def generate_store_embeddings(bedrock_client, chunks,awsauth,index_name): ## main -def main(bucket_name, endpoint,index_name): +def main(bucket_name, endpoint,index_name, local_path): ## Opensearch Client OpenSearch_client = OpenSearch( @@ -139,8 +136,8 @@ def main(bucket_name, endpoint,index_name): ) - download_documents(bucket_name,LOCAL_DIR) - loader= PyPDFDirectoryLoader(LOCAL_DIR) + download_documents(bucket_name,local_path) + loader= PyPDFDirectoryLoader(local_path) docs = loader.load() print(docs[1]) chunks = split_text(docs, 1000, 100) @@ -168,5 +165,6 @@ if __name__== "__main__": parser.add_argument("--bucket_name", help="The S3 bucket name where documents are stored") parser.add_argument("--endpoint", help="The OpenSearch service endpoint") parser.add_argument("--index_name", help="The name of the OpenSearch index") + parser.add_argument("--local_path", help="local path") args = parser.parse_args() - main(args.bucket_name, args.endpoint, args.index_name) + main(args.bucket_name, args.endpoint, args.index_name, args.local_path) diff --git a/README.md b/README.md index ec86e12..ebd7a71 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Where: - **--bucket_name**: The name of the S3 bucket containing the PDF files. - **--endpoint**: Endpoint for the vector database. - **--index_name**: The index_name where to store the embeddings in the collection. +- **--local_dir**: The main.py script will: -- GitLab