Skip to content
Snippets Groups Projects
Commit 09e5d346 authored by abir.chebbi's avatar abir.chebbi
Browse files

add argument local_path to main.py

parent 1ac59397
No related branches found
No related tags found
No related merge requests found
...@@ -11,9 +11,6 @@ import json ...@@ -11,9 +11,6 @@ import json
import argparse import argparse
## Local directory for storing PDF files
LOCAL_DIR = "pdfs"
## S3_client ## S3_client
...@@ -127,7 +124,7 @@ def generate_store_embeddings(bedrock_client, chunks,awsauth,index_name): ...@@ -127,7 +124,7 @@ def generate_store_embeddings(bedrock_client, chunks,awsauth,index_name):
## main ## main
def main(bucket_name, endpoint,index_name): def main(bucket_name, endpoint,index_name, local_path):
## Opensearch Client ## Opensearch Client
OpenSearch_client = OpenSearch( OpenSearch_client = OpenSearch(
...@@ -139,8 +136,8 @@ def main(bucket_name, endpoint,index_name): ...@@ -139,8 +136,8 @@ def main(bucket_name, endpoint,index_name):
) )
download_documents(bucket_name,LOCAL_DIR) download_documents(bucket_name,local_path)
loader= PyPDFDirectoryLoader(LOCAL_DIR) loader= PyPDFDirectoryLoader(local_path)
docs = loader.load() docs = loader.load()
print(docs[1]) print(docs[1])
chunks = split_text(docs, 1000, 100) chunks = split_text(docs, 1000, 100)
...@@ -168,5 +165,6 @@ if __name__== "__main__": ...@@ -168,5 +165,6 @@ if __name__== "__main__":
parser.add_argument("--bucket_name", help="The S3 bucket name where documents are stored") parser.add_argument("--bucket_name", help="The S3 bucket name where documents are stored")
parser.add_argument("--endpoint", help="The OpenSearch service endpoint") parser.add_argument("--endpoint", help="The OpenSearch service endpoint")
parser.add_argument("--index_name", help="The name of the OpenSearch index") parser.add_argument("--index_name", help="The name of the OpenSearch index")
parser.add_argument("--local_path", help="local path")
args = parser.parse_args() args = parser.parse_args()
main(args.bucket_name, args.endpoint, args.index_name) main(args.bucket_name, args.endpoint, args.index_name, args.local_path)
...@@ -48,6 +48,7 @@ Where: ...@@ -48,6 +48,7 @@ Where:
- **--bucket_name**: The name of the S3 bucket containing the PDF files. - **--bucket_name**: The name of the S3 bucket containing the PDF files.
- **--endpoint**: Endpoint for the vector database. - **--endpoint**: Endpoint for the vector database.
- **--index_name**: The index_name where to store the embeddings in the collection. - **--index_name**: The index_name where to store the embeddings in the collection.
- **--local_dir**:
The main.py script will: The main.py script will:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment