From 09e5d3463216afb1e3834253d5ee8dfbb92fa041 Mon Sep 17 00:00:00 2001
From: "abir.chebbi" <abir.chebbi@hes-so.ch>
Date: Thu, 12 Sep 2024 15:17:22 +0200
Subject: [PATCH] add argument local_path to main.py

---
 Part 1/main.py | 12 +++++-------
 README.md      |  1 +
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/Part 1/main.py b/Part 1/main.py
index 3a6c717..6438a25 100644
--- a/Part 1/main.py	
+++ b/Part 1/main.py	
@@ -11,9 +11,6 @@ import json
 import argparse
 
 
-##  Local directory for storing PDF files
-LOCAL_DIR = "pdfs" 
-
 
 
 ## S3_client
@@ -127,7 +124,7 @@ def generate_store_embeddings(bedrock_client, chunks,awsauth,index_name):
 
 
 ## main 
-def main(bucket_name, endpoint,index_name):
+def main(bucket_name, endpoint,index_name, local_path):
 
     ## Opensearch Client
     OpenSearch_client = OpenSearch(
@@ -139,8 +136,8 @@ def main(bucket_name, endpoint,index_name):
         
     )
 
-    download_documents(bucket_name,LOCAL_DIR)
-    loader= PyPDFDirectoryLoader(LOCAL_DIR)
+    download_documents(bucket_name,local_path)
+    loader= PyPDFDirectoryLoader(local_path)
     docs = loader.load()
     print(docs[1])
     chunks = split_text(docs, 1000, 100)
@@ -168,5 +165,6 @@ if __name__== "__main__":
     parser.add_argument("--bucket_name", help="The S3 bucket name where documents are stored")
     parser.add_argument("--endpoint", help="The OpenSearch service endpoint")
     parser.add_argument("--index_name", help="The name of the OpenSearch index")
+    parser.add_argument("--local_path", help="local path")
     args = parser.parse_args()
-    main(args.bucket_name, args.endpoint, args.index_name)
+    main(args.bucket_name, args.endpoint, args.index_name, args.local_path)
diff --git a/README.md b/README.md
index ec86e12..ebd7a71 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,7 @@ Where:
 - **--bucket_name**: The name of the S3 bucket containing the PDF files.
 - **--endpoint**: Endpoint for the vector database.
 - **--index_name**: The index_name where to store the embeddings in the collection.
+- **--local_dir**: 
 
 The main.py script will:
 
-- 
GitLab