Skip to content
Snippets Groups Projects
Commit 09e5d346 authored by abir.chebbi's avatar abir.chebbi
Browse files

add argument local_path to main.py

parent 1ac59397
Branches
No related tags found
No related merge requests found
......@@ -11,9 +11,6 @@ import json
import argparse
## Local directory for storing PDF files
LOCAL_DIR = "pdfs"
## S3_client
......@@ -127,7 +124,7 @@ def generate_store_embeddings(bedrock_client, chunks,awsauth,index_name):
## main
def main(bucket_name, endpoint,index_name):
def main(bucket_name, endpoint,index_name, local_path):
## Opensearch Client
OpenSearch_client = OpenSearch(
......@@ -139,8 +136,8 @@ def main(bucket_name, endpoint,index_name):
)
download_documents(bucket_name,LOCAL_DIR)
loader= PyPDFDirectoryLoader(LOCAL_DIR)
download_documents(bucket_name,local_path)
loader= PyPDFDirectoryLoader(local_path)
docs = loader.load()
print(docs[1])
chunks = split_text(docs, 1000, 100)
......@@ -168,5 +165,6 @@ if __name__== "__main__":
parser.add_argument("--bucket_name", help="The S3 bucket name where documents are stored")
parser.add_argument("--endpoint", help="The OpenSearch service endpoint")
parser.add_argument("--index_name", help="The name of the OpenSearch index")
parser.add_argument("--local_path", help="local path")
args = parser.parse_args()
main(args.bucket_name, args.endpoint, args.index_name)
main(args.bucket_name, args.endpoint, args.index_name, args.local_path)
......@@ -48,6 +48,7 @@ Where:
- **--bucket_name**: The name of the S3 bucket containing the PDF files.
- **--endpoint**: Endpoint for the vector database.
- **--index_name**: The index_name where to store the embeddings in the collection.
- **--local_dir**:
The main.py script will:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment