diff --git a/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc b/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e650d570dcc84a349d496c81cb5c4b7a99fc2a2
Binary files /dev/null and b/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc differ
diff --git a/dataSources/PubMed/data/save_3_years.json b/dataSources/PubMed/data/save_3_years.json
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/dataSources/PubMed/data/save_3_years.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/dataSources/PubMed/pubmedApi.py b/dataSources/PubMed/pubmedApi.py
index 83712503544b3d5c0464b1e9adb6cedb497f7abf..8523c34c8bfe4b1090a354ad457787bd1f1b603c 100644
--- a/dataSources/PubMed/pubmedApi.py
+++ b/dataSources/PubMed/pubmedApi.py
@@ -7,6 +7,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../")))
 from requests import get
 from parsers.xmlParser import parseXmlFile
 import json
+from variables.pubmed import PUBMED_API_KEY
+import xmltodict
 
 TMP_DIR_NAME = "./tmp"
 TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), TMP_DIR_NAME))
@@ -28,9 +30,14 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store
     print(f"Date min: {date_min}")
     print(f"Date max: {date_max}")
 
-    url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={term}&retmode=json&mindate={date_min}&maxdate={date_max}&usehistory=y'
+    url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&api_key={PUBMED_API_KEY}&term={term}&retmode=json&mindate={date_min}&maxdate={date_max}&usehistory=y'
 
-    response = get(url)
+    while(True):
+        try:  
+            response = get(url)
+            break
+        except Exception as e:
+            print(e)
 
     search_res = response.json()
 
@@ -42,35 +49,81 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store
 
     url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&query_key={query_key}&WebEnv={webenv}"
 
-    response = get(url)
+    while(True):
+        try:
+            response = get(url)
+            break
+        except Exception as e:
+            print(e)
 
     with open(f"{TMP_DIR}/{TMP_FILENAME}", "w+", encoding="utf-8") as file:
         file.write(response.text)
 
-    obj = parseXmlFile(f"{TMP_DIR}/{TMP_FILENAME}")
+    # obj = parseXmlFile(f"{TMP_DIR}/{TMP_FILENAME}")
+
+    obj = xmltodict.parse(response.text)
+    obj = obj["PubmedArticleSet"]
+
+    print()
 
     data_list = []
 
-    for key in obj.keys():
+    for key in obj:
         if isinstance(obj[key], list):
             i = 0
             for entrie in obj[key]:
                 if "MedlineCitation" in entrie:
+
+                    print("---------------------------------------------------------")
+
                     if "MeshHeadingList" in entrie["MedlineCitation"]:
                         data = {}
-                        data["PMID"] = entrie["MedlineCitation"]["PMID"]
-                        data["Title"] = entrie["MedlineCitation"]["Article"]["Journal"]["Title"]
-                        data["ArticleTitle"] = entrie["MedlineCitation"]["Article"]["ArticleTitle"]
+                        data["PMID"] = entrie["MedlineCitation"]["PMID"]["#text"]
+
+                        data["Title"] = ""
+                        if isinstance(entrie["MedlineCitation"]["Article"]["Journal"]["Title"], list):
+                            for part in entrie["MedlineCitation"]["Article"]["Journal"]["Title"]:
+                                if "#text" in part:
+                                    data["Title"] += part["#text"]
+                            else:
+                                data["Title"] = entrie["MedlineCitation"]["Article"]["Journal"]["Title"]
+                        
+                        data["ArticleTitle"] = ""
+                        if isinstance(entrie["MedlineCitation"]["Article"]["ArticleTitle"], list):
+                            for part in entrie["MedlineCitation"]["Article"]["ArticleTitle"]:
+                                if "#text" in part:
+                                    data["ArticleTitle"] += part["#text"]
+                            else:
+                                data["ArticleTitle"] = entrie["MedlineCitation"]["Article"]["ArticleTitle"]
                         
                         data["Abstract"] = ""
                         if "Abstract" in entrie["MedlineCitation"]["Article"] :
-                            data["Abstract"] = entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
+                            if isinstance(entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"], list):
+                                for part in entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]:
+                                    if "#text" in part:
+                                        data["Abstract"] += part["#text"]
+                            else:
+                                data["Abstract"] = entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
                         
                         data["Predictions"] = []
 
                         data["MeshTerms"] = []
-                        for meshTerm in entrie["MedlineCitation"]["MeshHeadingList"]["MeshHeading"]:
-                            data["MeshTerms"].append(meshTerm["DescriptorName"])
+                        if isinstance(entrie["MedlineCitation"]["MeshHeadingList"]["MeshHeading"], list):
+                            for meshTerm in entrie["MedlineCitation"]["MeshHeadingList"]["MeshHeading"]:
+                                data["MeshTerms"].append(meshTerm["DescriptorName"]["#text"])
+                        else:
+                            data["MeshTerms"].append(entrie["MedlineCitation"]["MeshHeadingList"]["MeshHeading"]["DescriptorName"]["#text"])
+
+                        for date in entrie["PubmedData"]["History"]["PubMedPubDate"]:
+                            if date["@PubStatus"] == "pubmed":
+                                data["Date"] = {
+                                    "Year": date["Year"],
+                                    "Month": date["Month"],
+                                    "Day": date["Day"]
+                                }
+                                break
+
+                        print(data)
 
                         if debug:
                             print(f"Index: {obj[key].index(entrie)}")
diff --git a/dataSources/PubMed/store_data_localy.py b/dataSources/PubMed/store_data_localy.py
new file mode 100644
index 0000000000000000000000000000000000000000..718ff1b6f709d20ec3aaffa44344c87a2e13a9fc
--- /dev/null
+++ b/dataSources/PubMed/store_data_localy.py
@@ -0,0 +1,37 @@
+import sys
+import os
+from datetime import datetime, timedelta
+import time
+import json
+
+# Ajouter le répertoire parent au chemin de recherche
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
+
+from dataSources.PubMed.pubmedApi import getPubmedData
+from variables.pubmed import *
+from dataSources.PubMed.util import *
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
+
+ncds_mesh_noexp = get_mesh_noexp_term(NCDS_MESH_TERM)
+
+search_term = url_encode(" OR ".join(ncds_mesh_noexp))
+
+data = []
+
+with open(f"{DATA_DIR}/save_3_years.json", "w") as json_file:
+    json.dump(data, json_file, indent=4)
+
+current_date = datetime(2022, 1, 1)
+
+while(current_date < datetime(2024, 12, 31)):
+    next_date = current_date + timedelta(weeks=1)
+
+    data += getPubmedData(search_term, current_date.strftime("%Y/%m/%d"), next_date.strftime("%Y/%m/%d"))
+
+    current_date = next_date
+
+    time.sleep(0.1)
+
+with open(f"{DATA_DIR}/save_3_years.json", "w") as json_file:
+    json.dump(data, json_file, indent=4)
\ No newline at end of file
diff --git a/models/LLM/Tokenizer/test.py b/models/LLM/Tokenizer/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..523b01ca6eb43db3cecd5414d565e297ade8bcfc
--- /dev/null
+++ b/models/LLM/Tokenizer/test.py
@@ -0,0 +1,13 @@
+from transformers import AutoTokenizer
+
+# Choose a tokenizer (e.g., GPT-2, BERT, T5, etc.)
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+
+# Your text
+text = "Hugging Face makes NLP easy!"
+
+# Tokenize and count tokens
+tokens = tokenizer(text, return_tensors="pt")  # You can also use return_tensors="tf" or "np"
+num_tokens = len(tokens["input_ids"][0])
+
+print(f"Number of tokens: {num_tokens}")
\ No newline at end of file
diff --git a/parsers/__pycache__/xmlParser.cpython-313.pyc b/parsers/__pycache__/xmlParser.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7fac1b757ad2110117af27254fa0d7e20f356b6
Binary files /dev/null and b/parsers/__pycache__/xmlParser.cpython-313.pyc differ
diff --git a/parsers/xmlParser.py b/parsers/xmlParser.py
index 6c88cd6386dfe4b6940e13d8b904e124920f6145..5a9ecd7a9a70635978f193c2d4501367135d863d 100644
--- a/parsers/xmlParser.py
+++ b/parsers/xmlParser.py
@@ -1,26 +1,30 @@
 import xml.etree.ElementTree as ET
 
 def xml_to_obj(xml_element):
-    res = {}   
+    res = {}
     
+    if xml_element.attrib:
+        res["@attributes"] = xml_element.attrib
+
+    text = xml_element.text.strip() if xml_element.text and xml_element.text.strip() else None
+
     for child in xml_element:
-        if child.text:
-            res[child.tag] = child.text
-        else:
-            child_dict = xml_to_obj(child)
+        child_dict = xml_to_obj(child)
 
-            if child.tag in res:
-                if isinstance(res[child.tag], list):
-                    res[child.tag].append(child_dict)
-                else:
-                    res[child.tag] = [res[child.tag], child_dict]
+        if child.tag in res:
+            if isinstance(res[child.tag], list):
+                res[child.tag].append(child_dict)
             else:
-                res[child.tag] = child_dict
-    
+                res[child.tag] = [res[child.tag], child_dict]
+        else:
+            res[child.tag] = child_dict
+
+    if text and not res:
+        return text
+
     return res
 
 def parseXmlFile(filename):
     tree = ET.parse(filename)
     root = tree.getroot()
-
-    return xml_to_obj(root)
\ No newline at end of file
+    return xml_to_obj(root)