From 088f25471f97b8653a3c2e498471f630d0d0ea7f Mon Sep 17 00:00:00 2001
From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch>
Date: Thu, 13 Mar 2025 16:50:33 +0100
Subject: [PATCH] =?UTF-8?q?Cr=C3=A9ation=20du=20script=20de=20comptage=20d?=
 =?UTF-8?q?'article=20pubmed=20par=20jour/semaine/mois=20depuis=20sauvegar?=
 =?UTF-8?q?de=20locale?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dataSources/PubMed/data_num_locale.py | 176 +++++++++++++++++++++-----
 1 file changed, 146 insertions(+), 30 deletions(-)

diff --git a/dataSources/PubMed/data_num_locale.py b/dataSources/PubMed/data_num_locale.py
index c08b13da0..618b4d196 100644
--- a/dataSources/PubMed/data_num_locale.py
+++ b/dataSources/PubMed/data_num_locale.py
@@ -12,6 +12,17 @@ DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
 
 file_path = f"{DATA_DIR}/save_3_years.json"
 
+def get_date_indices(date, start_date):
+    delta_days = (date - start_date).days
+    day_index = delta_days
+
+    week_index = (delta_days // 7)
+
+    delta_months = (date.year - start_date.year) * 12 + (date.month - start_date.month)
+    month_index = delta_months
+
+    return day_index, week_index, month_index
+
 def match_mesh_terms(article_mesh_terms, ncd, keyword):
     if ncd in article_mesh_terms:
         if isinstance(keyword, list):
@@ -26,47 +37,152 @@ def match_mesh_terms(article_mesh_terms, ncd, keyword):
     else:
         return False
 
-def filter_articles(articles, start_date, end_date, ncd, keyword):
-    filtered_articles = []
-    
-    for article in articles:
-        article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
-        
-        if start_date <= article_date < end_date:
-
-            mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
-            if match_mesh_terms(mesh_terms, ncd, keyword):
-                filtered_articles.append(article)
-
-    return filtered_articles
-
 with open(file_path, "r", encoding="utf-8") as file:
     data = json.load(file)
 
 print(len(data))
 
-
-ncds_mesh_terms = [mesh_term.lower() for mesh_term in NCDS_MESH_TERM]
-keywords_mesh_terms = [mesh_term.lower() for mesh_term in KEYWORDS_MESH_TERM]
+ncds_mesh_terms = [mesh_term.lower() for ncd, mesh_term in NCDS_MESH_TERM.items()]
+keywords_mesh_terms = [mesh_term.lower() for keyword, mesh_term in KEYWORDS_MESH_TERM.items()]
 keywords_subheading_mesh_terms = keywords_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_SUBHEADING]
 keywords_site_proposition_mesh_terms = keywords_subheading_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_SITE_PROPOSITION]
 keywords_proposition_mesh_terms = keywords_site_proposition_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_PROPOSITION]
 
-for ncd in ncds_mesh_terms:
-    count = []
+counts = {}
+start_date = datetime(2022, 1, 1)
+
+for article in data:
+
+    mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
+
+    if "ALL" not in counts:
+        counts["ALL"] = {}
+
+    for ncd in ncds_mesh_terms:
+        if ncd not in counts:
+            counts[ncd] = {
+                "KEYWORDS" : {
+                    "day": {},
+                    "week": {},
+                    "month": {}
+                },
+                "SUBHEADINGS" : {
+                    "day": {},
+                    "week": {},
+                    "month": {}
+                },
+                "SITE PROPOSITION" : {
+                    "day": {},
+                    "week": {},
+                    "month": {}
+                },
+                "PROPOSITION" : {
+                    "day": {},
+                    "week": {},
+                    "month": {}
+                },
+            }
+
+        for keyword in keywords_mesh_terms:
+
+            if match_mesh_terms(mesh_terms, ncd, mesh_terms):
+
+                article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
+
+                day_index, week_index, month_index = get_date_indices(article_date, start_date)
+
+                if day_index not in counts[ncd]["KEYWORDS"]["day"]:
+                    counts[ncd]["KEYWORDS"]["day"][day_index] = []
+
+                if week_index not in counts[ncd]["KEYWORDS"]["week"]:
+                    counts[ncd]["KEYWORDS"]["week"][week_index] = []
+
+                if month_index not in counts[ncd]["KEYWORDS"]["month"]:
+                    counts[ncd]["KEYWORDS"]["month"][month_index] = []
+
+                if article["PMID"] not in counts[ncd]["KEYWORDS"]["day"][day_index]:
+                    counts[ncd]["KEYWORDS"]["day"][day_index].append(article["PMID"])
+
+                if article["PMID"] not in counts[ncd]["KEYWORDS"]["week"][week_index]:
+                    counts[ncd]["KEYWORDS"]["week"][week_index].append(article["PMID"])
+
+                if article["PMID"] not in counts[ncd]["KEYWORDS"]["month"][month_index]:
+                    counts[ncd]["KEYWORDS"]["month"][month_index].append(article["PMID"])
+
+        for keyword in keywords_subheading_mesh_terms:
+
+            if match_mesh_terms(mesh_terms, ncd, mesh_terms):
+
+                article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
+
+                day_index, week_index, month_index = get_date_indices(article_date, start_date)
+
+                if day_index not in counts[ncd]["SUBHEADINGS"]["day"]:
+                    counts[ncd]["SUBHEADINGS"]["day"][day_index] = []
+
+                if week_index not in counts[ncd]["SUBHEADINGS"]["week"]:
+                    counts[ncd]["SUBHEADINGS"]["week"][week_index] = []
+
+                if month_index not in counts[ncd]["SUBHEADINGS"]["month"]:
+                    counts[ncd]["SUBHEADINGS"]["month"][month_index] = []
+
+                if article["PMID"] not in counts[ncd]["SUBHEADINGS"]["day"][day_index]:
+                    counts[ncd]["SUBHEADINGS"]["day"][day_index].append(article["PMID"])
+
+                if article["PMID"] not in counts[ncd]["SUBHEADINGS"]["week"][week_index]:
+                    counts[ncd]["SUBHEADINGS"]["week"][week_index].append(article["PMID"])
+
+                if article["PMID"] not in counts[ncd]["SUBHEADINGS"]["month"][month_index]:
+                    counts[ncd]["SUBHEADINGS"]["month"][month_index].append(article["PMID"])
+
+        for keyword in keywords_site_proposition_mesh_terms:
+
+            if match_mesh_terms(mesh_terms, ncd, mesh_terms):
+
+                article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
+
+                day_index, week_index, month_index = get_date_indices(article_date, start_date)
+
+                if day_index not in counts[ncd]["SITE PROPOSITION"]["day"]:
+                    counts[ncd]["SITE PROPOSITION"]["day"][day_index] = []
+
+                if week_index not in counts[ncd]["SITE PROPOSITION"]["week"]:
+                    counts[ncd]["SITE PROPOSITION"]["week"][week_index] = []
+
+                if month_index not in counts[ncd]["SITE PROPOSITION"]["month"]:
+                    counts[ncd]["SITE PROPOSITION"]["month"][month_index] = []
+
+                if article["PMID"] not in counts[ncd]["SITE PROPOSITION"]["day"][day_index]:
+                    counts[ncd]["SITE PROPOSITION"]["day"][day_index].append(article["PMID"])
+
+                if article["PMID"] not in counts[ncd]["SITE PROPOSITION"]["week"][week_index]:
+                    counts[ncd]["SITE PROPOSITION"]["week"][week_index].append(article["PMID"])
+
+                if article["PMID"] not in counts[ncd]["SITE PROPOSITION"]["month"][month_index]:
+                    counts[ncd]["SITE PROPOSITION"]["month"][month_index].append(article["PMID"])
+
+        for keyword in keywords_proposition_mesh_terms:
+
+            if match_mesh_terms(mesh_terms, ncd, mesh_terms):
+
+                article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
+
+                day_index, week_index, month_index = get_date_indices(article_date, start_date)
+
+                if day_index not in counts[ncd]["PROPOSITION"]["day"]:
+                    counts[ncd]["PROPOSITION"]["day"][day_index] = []
 
-    for keyword in keywords_mesh_terms:
-        start_date = datetime(2022, 1, 1)
+                if week_index not in counts[ncd]["PROPOSITION"]["week"]:
+                    counts[ncd]["PROPOSITION"]["week"][week_index] = []
 
-        while(start_date < datetime(2024, 12, 31)):
-            end_date = start_date + timedelta(weeks=1)
+                if month_index not in counts[ncd]["PROPOSITION"]["month"]:
+                    counts[ncd]["PROPOSITION"]["month"][month_index] = []
 
-            count.append(len(filter_articles(data, start_date, end_date, ncd, keyword)))
+                if article["PMID"] not in counts[ncd]["PROPOSITION"]["day"][day_index]:
+                    counts[ncd]["PROPOSITION"]["day"][day_index].append(article["PMID"])
 
-            start_date = end_date
+                if article["PMID"] not in counts[ncd]["PROPOSITION"]["week"][week_index]:
+                    counts[ncd]["PROPOSITION"]["week"][week_index].append(article["PMID"])
 
-    print(f"NCD: {ncd}")
-    print(f"Min: {min(count)}")
-    print(f"Max: {max(count)}")
-    print(f"Mean: {statistics.mean(count)}")
-    print(f"Median: {statistics.median(count)}")
+                if article["PMID"] not in counts[ncd]["PROPOSITION"]["month"][month_index]:
+                    counts[ncd]["PROPOSITION"]["month"][month_index].append(article["PMID"])
-- 
GitLab