From 623e3f6fc09dad1319445dd2c3a82db3f3155f7c Mon Sep 17 00:00:00 2001
From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch>
Date: Mon, 17 Mar 2025 18:38:22 +0100
Subject: [PATCH] =?UTF-8?q?Regeneration=20de=20la=20sauvegarde=20locale=20?=
 =?UTF-8?q?en=20enlevant=20les=20doublons=20et=20en=20ajoutant=20les=20pot?=
 =?UTF-8?q?entiels=20articles=20loup=C3=A9s.=20Regeration=20du=20calcule?=
 =?UTF-8?q?=20d'articles=20post=C3=A9s=20dans=20pubmed=20avec=20le=20champ?=
 =?UTF-8?q?=20SANS=20KEYWORDS?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                                    |   1 -
 .../__pycache__/pubmedApi.cpython-313.pyc     | Bin 6947 -> 6976 bytes
 .../PubMed/{doc => data}/data_num.json        |   0
 .../data_num_keyword_no_mesh.json             |   0
 .../{doc => data}/locale_articles_count.json  | 581 +++++++++++-------
 dataSources/PubMed/data_num_locale.py         | 287 +++------
 dataSources/PubMed/getPubmedData.py           |  15 +
 dataSources/PubMed/pubmedApi.py               |   3 +
 dataSources/PubMed/store_data_localy.py       |  27 +-
 9 files changed, 490 insertions(+), 424 deletions(-)
 rename dataSources/PubMed/{doc => data}/data_num.json (100%)
 rename dataSources/PubMed/{doc => data}/data_num_keyword_no_mesh.json (100%)
 rename dataSources/PubMed/{doc => data}/locale_articles_count.json (52%)

diff --git a/.gitignore b/.gitignore
index 575b7a337..f13d16f8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,2 @@
 dataSources/PubMed/tmp/*
-dataSources/PubMed/data/*
 .venv
\ No newline at end of file
diff --git a/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc b/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc
index 2b6898c16a135b047d0a3d8a3b5d360a8a9eb13e..30b6928481176e9631551ca208cae0ccac747add 100644
GIT binary patch
delta 213
zcmZ2%cEF7HGcPX}0|Ns?pw5l-{*AmYOpJdv2Qk?*2?R@o@>w%7FeEZDFz7R=Fl?U4
zBFn*edGdN5Pv&I|43p(~rC8iq{Bkx+@;+f<tls=jFpZIyU2=i&Wp<St{DK{v4>qR=
zZ)Ro8oh%{g8CAu^z^8bHSK$L2gMi4dVh;uehJz~HA%YBt%(z3$7!FIxgi14C))x=e
zWxgWF6=J}AMVdQAoB4`1D~N5z6)MVnm4Q1{fcdHbD~K)46{^L4RhKK&dh>3{ct%ba
MCPvAkI0gm=07Dx#UjP6A

delta 165
zcmX?Lw%CmKGcPX}0|Ns?gX!gTzm2>uOpI?f2Qk?*ZC=PC%fWbh@_rsq=6MVZll6I}
zSln6sQZ{SyK4D?Z+sr4F#<;mrcrz<w`eai{PsWnT^^#)FoD7G|xI^?94ok^|3Nv3;
z7Y|iszQV~BqQQJcm^(z7`HC_th^@yJ%FBGkf;*Ih`6>r%C@=F>VXjam_N%H~p~jnU
SNX9dAIx;az7R50zFaQA6t}7b=

diff --git a/dataSources/PubMed/doc/data_num.json b/dataSources/PubMed/data/data_num.json
similarity index 100%
rename from dataSources/PubMed/doc/data_num.json
rename to dataSources/PubMed/data/data_num.json
diff --git a/dataSources/PubMed/doc/data_num_keyword_no_mesh.json b/dataSources/PubMed/data/data_num_keyword_no_mesh.json
similarity index 100%
rename from dataSources/PubMed/doc/data_num_keyword_no_mesh.json
rename to dataSources/PubMed/data/data_num_keyword_no_mesh.json
diff --git a/dataSources/PubMed/doc/locale_articles_count.json b/dataSources/PubMed/data/locale_articles_count.json
similarity index 52%
rename from dataSources/PubMed/doc/locale_articles_count.json
rename to dataSources/PubMed/data/locale_articles_count.json
index cb2b3056f..74066dd3e 100644
--- a/dataSources/PubMed/doc/locale_articles_count.json
+++ b/dataSources/PubMed/data/locale_articles_count.json
@@ -1,631 +1,784 @@
 {
+    "ALL": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 700,
+                "mean": 142.64
+            },
+            "week": {
+                "min": 0,
+                "max": 1436,
+                "mean": 996.7080745341615
+            },
+            "month": {
+                "min": 95,
+                "max": 5131,
+                "mean": 4337.027027027027
+            }
+        },
+        "KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 39,
+                "mean": 6.383111111111111
+            },
+            "week": {
+                "min": 0,
+                "max": 69,
+                "mean": 44.60248447204969
+            },
+            "month": {
+                "min": 9,
+                "max": 263,
+                "mean": 194.0810810810811
+            }
+        },
+        "SUBHEADINGS": {
+            "day": {
+                "min": 0,
+                "max": 39,
+                "mean": 6.383111111111111
+            },
+            "week": {
+                "min": 0,
+                "max": 69,
+                "mean": 44.60248447204969
+            },
+            "month": {
+                "min": 9,
+                "max": 263,
+                "mean": 194.0810810810811
+            }
+        },
+        "SITE PROPOSITION": {
+            "day": {
+                "min": 0,
+                "max": 47,
+                "mean": 8.278222222222222
+            },
+            "week": {
+                "min": 0,
+                "max": 93,
+                "mean": 57.84472049689441
+            },
+            "month": {
+                "min": 10,
+                "max": 311,
+                "mean": 251.7027027027027
+            }
+        },
+        "PROPOSITION": {
+            "day": {
+                "min": 0,
+                "max": 68,
+                "mean": 11.38488888888889
+            },
+            "week": {
+                "min": 0,
+                "max": 124,
+                "mean": 79.5527950310559
+            },
+            "month": {
+                "min": 14,
+                "max": 432,
+                "mean": 346.1621621621622
+            }
+        }
+    },
     "noncommunicable diseases": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 7,
+                "mean": 1.3555555555555556
+            },
+            "week": {
+                "min": 0,
+                "max": 23,
+                "mean": 9.472049689440993
+            },
+            "month": {
+                "min": 0,
+                "max": 57,
+                "mean": 41.21621621621622
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.20639269406392693
+                "mean": 0.2008888888888889
             },
             "week": {
                 "min": 0,
                 "max": 6,
-                "mean": 1.4394904458598725
+                "mean": 1.4037267080745341
             },
             "month": {
-                "min": 2,
+                "min": 0,
                 "max": 12,
-                "mean": 6.277777777777778
+                "mean": 6.108108108108108
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.20639269406392693
+                "mean": 0.2008888888888889
             },
             "week": {
                 "min": 0,
                 "max": 6,
-                "mean": 1.4394904458598725
+                "mean": 1.4037267080745341
             },
             "month": {
-                "min": 2,
+                "min": 0,
                 "max": 12,
-                "mean": 6.277777777777778
+                "mean": 6.108108108108108
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.2328767123287671
+                "mean": 0.22666666666666666
             },
             "week": {
                 "min": 0,
                 "max": 6,
-                "mean": 1.624203821656051
+                "mean": 1.5838509316770186
             },
             "month": {
-                "min": 2,
+                "min": 0,
                 "max": 14,
-                "mean": 7.083333333333333
+                "mean": 6.891891891891892
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 4,
-                "mean": 0.34885844748858447
+                "mean": 0.33955555555555555
             },
             "week": {
                 "min": 0,
                 "max": 8,
-                "mean": 2.43312101910828
+                "mean": 2.372670807453416
             },
             "month": {
-                "min": 4,
+                "min": 0,
                 "max": 17,
-                "mean": 10.61111111111111
+                "mean": 10.324324324324325
             }
         }
     },
     "diabetes mellitus": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 58,
+                "mean": 16.045333333333332
+            },
+            "week": {
+                "min": 0,
+                "max": 183,
+                "mean": 112.11801242236025
+            },
+            "month": {
+                "min": 6,
+                "max": 662,
+                "mean": 487.86486486486484
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 5,
-                "mean": 0.7497725204731575
+                "mean": 0.7315555555555555
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 11,
-                "mean": 5.248407643312102
+                "mean": 5.111801242236025
             },
             "month": {
-                "min": 3,
+                "min": 1,
                 "max": 31,
-                "mean": 22.27027027027027
+                "mean": 22.243243243243242
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 5,
-                "mean": 0.7497725204731575
+                "mean": 0.7315555555555555
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 11,
-                "mean": 5.248407643312102
+                "mean": 5.111801242236025
             },
             "month": {
-                "min": 3,
+                "min": 1,
                 "max": 31,
-                "mean": 22.27027027027027
+                "mean": 22.243243243243242
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 8,
-                "mean": 1.0454959053685169
+                "mean": 1.0222222222222221
             },
             "week": {
-                "min": 2,
+                "min": 0,
                 "max": 15,
-                "mean": 7.318471337579618
+                "mean": 7.142857142857143
             },
             "month": {
-                "min": 3,
+                "min": 1,
                 "max": 44,
-                "mean": 31.054054054054053
+                "mean": 31.08108108108108
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
-                "max": 10,
-                "mean": 1.4249317561419472
+                "max": 11,
+                "mean": 1.392
             },
             "week": {
-                "min": 3,
-                "max": 20,
-                "mean": 9.97452229299363
+                "min": 0,
+                "max": 19,
+                "mean": 9.726708074534162
             },
             "month": {
-                "min": 5,
+                "min": 2,
                 "max": 57,
                 "mean": 42.32432432432432
             }
         }
     },
     "neoplasms": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 311,
+                "mean": 58.556444444444445
+            },
+            "week": {
+                "min": 0,
+                "max": 598,
+                "mean": 409.167701863354
+            },
+            "month": {
+                "min": 35,
+                "max": 2108,
+                "mean": 1780.4324324324325
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 16,
-                "mean": 2.692447679708826
+                "mean": 2.6266666666666665
             },
             "week": {
-                "min": 2,
+                "min": 0,
                 "max": 33,
-                "mean": 18.727848101265824
+                "mean": 18.354037267080745
             },
             "month": {
-                "min": 14,
+                "min": 4,
                 "max": 101,
-                "mean": 79.97297297297297
+                "mean": 79.86486486486487
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 16,
-                "mean": 2.692447679708826
+                "mean": 2.6266666666666665
             },
             "week": {
-                "min": 2,
+                "min": 0,
                 "max": 33,
-                "mean": 18.727848101265824
+                "mean": 18.354037267080745
             },
             "month": {
-                "min": 14,
+                "min": 4,
                 "max": 101,
-                "mean": 79.97297297297297
+                "mean": 79.86486486486487
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 16,
-                "mean": 2.735213830755232
+                "mean": 2.6684444444444444
             },
             "week": {
-                "min": 2,
+                "min": 0,
                 "max": 33,
-                "mean": 19.025316455696203
+                "mean": 18.645962732919255
             },
             "month": {
-                "min": 14,
+                "min": 4,
                 "max": 101,
-                "mean": 81.24324324324324
+                "mean": 81.13513513513513
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 21,
-                "mean": 3.5281818181818183
+                "mean": 3.447111111111111
             },
             "week": {
-                "min": 2,
+                "min": 0,
                 "max": 43,
-                "mean": 24.563291139240505
+                "mean": 24.08695652173913
             },
             "month": {
-                "min": 17,
+                "min": 4,
                 "max": 127,
-                "mean": 104.89189189189189
+                "mean": 104.8108108108108
             }
         }
     },
     "respiratory tract diseases": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 7,
+                "mean": 0.7795555555555556
+            },
+            "week": {
+                "min": 0,
+                "max": 13,
+                "mean": 5.447204968944099
+            },
+            "month": {
+                "min": 3,
+                "max": 41,
+                "mean": 23.7027027027027
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 1,
-                "mean": 0.02281021897810219
+                "mean": 0.021333333333333333
             },
             "week": {
                 "min": 0,
                 "max": 2,
-                "mean": 0.15822784810126583
+                "mean": 0.14906832298136646
             },
             "month": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.6756756756756757
+                "mean": 0.6486486486486487
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 1,
-                "mean": 0.02281021897810219
+                "mean": 0.021333333333333333
             },
             "week": {
                 "min": 0,
                 "max": 2,
-                "mean": 0.15822784810126583
+                "mean": 0.14906832298136646
             },
             "month": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.6756756756756757
+                "mean": 0.6486486486486487
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 1,
-                "mean": 0.02281021897810219
+                "mean": 0.021333333333333333
             },
             "week": {
                 "min": 0,
                 "max": 2,
-                "mean": 0.15822784810126583
+                "mean": 0.14906832298136646
             },
             "month": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.6756756756756757
+                "mean": 0.6486486486486487
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 1,
-                "mean": 0.0364963503649635
+                "mean": 0.034666666666666665
             },
             "week": {
                 "min": 0,
                 "max": 2,
-                "mean": 0.25316455696202533
+                "mean": 0.2422360248447205
             },
             "month": {
                 "min": 0,
                 "max": 3,
-                "mean": 1.0810810810810811
+                "mean": 1.054054054054054
             }
         }
     },
     "cardiovascular diseases": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 119,
+                "mean": 21.612444444444446
+            },
+            "week": {
+                "min": 0,
+                "max": 260,
+                "mean": 151.01863354037266
+            },
+            "month": {
+                "min": 20,
+                "max": 790,
+                "mean": 657.1351351351351
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 5,
-                "mean": 0.6584699453551912
+                "mean": 0.64
             },
             "week": {
                 "min": 0,
                 "max": 13,
-                "mean": 4.575949367088608
+                "mean": 4.472049689440993
             },
             "month": {
-                "min": 3,
+                "min": 0,
                 "max": 40,
-                "mean": 19.54054054054054
+                "mean": 19.45945945945946
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 5,
-                "mean": 0.6584699453551912
+                "mean": 0.64
             },
             "week": {
                 "min": 0,
                 "max": 13,
-                "mean": 4.575949367088608
+                "mean": 4.472049689440993
             },
             "month": {
-                "min": 3,
+                "min": 0,
                 "max": 40,
-                "mean": 19.54054054054054
+                "mean": 19.45945945945946
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 5,
-                "mean": 0.6930783242258652
+                "mean": 0.6737777777777778
             },
             "week": {
                 "min": 0,
                 "max": 13,
-                "mean": 4.8164556962025316
+                "mean": 4.708074534161491
             },
             "month": {
-                "min": 3,
+                "min": 0,
                 "max": 42,
-                "mean": 20.56756756756757
+                "mean": 20.486486486486488
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 8,
-                "mean": 1.1474067333939946
+                "mean": 1.1173333333333333
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 24,
-                "mean": 7.981012658227848
+                "mean": 7.807453416149069
             },
             "month": {
-                "min": 5,
+                "min": 1,
                 "max": 58,
-                "mean": 34.08108108108108
+                "mean": 33.972972972972975
             }
         }
     },
     "mental health": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 74,
+                "mean": 16.589333333333332
+            },
+            "week": {
+                "min": 0,
+                "max": 213,
+                "mean": 115.9192546583851
+            },
+            "month": {
+                "min": 8,
+                "max": 679,
+                "mean": 504.4054054054054
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 8,
-                "mean": 1.1856232939035487
+                "mean": 1.1564444444444444
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 21,
-                "mean": 8.246835443037975
+                "mean": 8.080745341614907
             },
             "month": {
-                "min": 4,
+                "min": 1,
                 "max": 52,
-                "mean": 35.21621621621622
+                "mean": 35.16216216216216
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 8,
-                "mean": 1.1856232939035487
+                "mean": 1.1564444444444444
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 21,
-                "mean": 8.246835443037975
+                "mean": 8.080745341614907
             },
             "month": {
-                "min": 4,
+                "min": 1,
                 "max": 52,
-                "mean": 35.21621621621622
+                "mean": 35.16216216216216
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 8,
-                "mean": 1.2438580527752503
+                "mean": 1.2133333333333334
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 22,
-                "mean": 8.651898734177216
+                "mean": 8.478260869565217
             },
             "month": {
-                "min": 4,
+                "min": 1,
                 "max": 55,
-                "mean": 36.945945945945944
+                "mean": 36.891891891891895
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 11,
-                "mean": 2.1618181818181816
+                "mean": 2.1093333333333333
             },
             "week": {
-                "min": 1,
+                "min": 0,
                 "max": 32,
-                "mean": 15.050632911392405
+                "mean": 14.73913043478261
             },
             "month": {
-                "min": 8,
-                "max": 84,
-                "mean": 64.27027027027027
+                "min": 2,
+                "max": 85,
+                "mean": 64.13513513513513
             }
         }
     },
     "diabetes mellitus, type 1": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 40,
+                "mean": 7.019555555555556
+            },
+            "week": {
+                "min": 0,
+                "max": 78,
+                "mean": 49.04968944099379
+            },
+            "month": {
+                "min": 2,
+                "max": 257,
+                "mean": 213.43243243243242
+            }
+        },
         "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.3072014585232452
+                "mean": 0.29688888888888887
             },
             "week": {
                 "min": 0,
                 "max": 8,
-                "mean": 2.132911392405063
+                "mean": 2.0745341614906834
             },
             "month": {
-                "min": 3,
+                "min": 1,
                 "max": 19,
-                "mean": 9.108108108108109
+                "mean": 9.027027027027026
             }
         },
         "SUBHEADINGS": {
             "day": {
                 "min": 0,
                 "max": 3,
-                "mean": 0.3072014585232452
+                "mean": 0.29688888888888887
             },
             "week": {
                 "min": 0,
                 "max": 8,
-                "mean": 2.132911392405063
+                "mean": 2.0745341614906834
             },
             "month": {
-                "min": 3,
+                "min": 1,
                 "max": 19,
-                "mean": 9.108108108108109
+                "mean": 9.027027027027026
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 13,
-                "mean": 1.5141037306642402
+                "mean": 1.4755555555555555
             },
             "week": {
-                "min": 3,
+                "min": 0,
                 "max": 30,
-                "mean": 10.531645569620252
+                "mean": 10.31055900621118
             },
             "month": {
-                "min": 5,
+                "min": 1,
                 "max": 66,
-                "mean": 44.972972972972975
+                "mean": 44.86486486486486
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
                 "max": 13,
-                "mean": 1.5941765241128298
+                "mean": 1.5537777777777777
             },
             "week": {
-                "min": 3,
+                "min": 0,
                 "max": 30,
-                "mean": 11.08860759493671
+                "mean": 10.857142857142858
             },
             "month": {
-                "min": 5,
+                "min": 1,
                 "max": 68,
-                "mean": 47.351351351351354
+                "mean": 47.24324324324324
             }
         }
     },
     "diabetes mellitus, type 2": {
-        "KEYWORDS": {
+        "NO KEYWORDS": {
             "day": {
                 "min": 0,
-                "max": 7,
-                "mean": 0.9763421292083713
+                "max": 146,
+                "mean": 28.133333333333333
             },
             "week": {
                 "min": 0,
-                "max": 14,
-                "mean": 6.791139240506329
+                "max": 282,
+                "mean": 196.583850931677
             },
             "month": {
-                "min": 6,
-                "max": 45,
-                "mean": 29
+                "min": 26,
+                "max": 1021,
+                "mean": 855.4054054054054
             }
         },
-        "SUBHEADINGS": {
+        "KEYWORDS": {
             "day": {
                 "min": 0,
                 "max": 7,
-                "mean": 0.9763421292083713
+                "mean": 0.952
             },
             "week": {
                 "min": 0,
                 "max": 14,
-                "mean": 6.791139240506329
+                "mean": 6.6521739130434785
             },
             "month": {
-                "min": 6,
+                "min": 3,
                 "max": 45,
-                "mean": 29
+                "mean": 28.945945945945947
             }
         },
-        "SITE PROPOSITION": {
+        "SUBHEADINGS": {
             "day": {
                 "min": 0,
-                "max": 10,
-                "mean": 1.520909090909091
+                "max": 7,
+                "mean": 0.952
             },
             "week": {
-                "min": 1,
-                "max": 23,
-                "mean": 10.58860759493671
-            },
-            "month": {
-                "min": 8,
-                "max": 61,
-                "mean": 45.21621621621622
-            }
-        },
-        "PROPOSITION": {
-            "day": {
                 "min": 0,
                 "max": 14,
-                "mean": 2.099090909090909
-            },
-            "week": {
-                "min": 2,
-                "max": 28,
-                "mean": 14.613924050632912
-            },
-            "month": {
-                "min": 10,
-                "max": 84,
-                "mean": 62.4054054054054
-            }
-        }
-    },
-    "ALL": {
-        "KEYWORDS": {
-            "day": {
-                "min": 0,
-                "max": 39,
-                "mean": 6.54
-            },
-            "week": {
-                "min": 8,
-                "max": 69,
-                "mean": 45.53164556962025
-            },
-            "month": {
-                "min": 31,
-                "max": 262,
-                "mean": 194.43243243243242
-            }
-        },
-        "SUBHEADINGS": {
-            "day": {
-                "min": 0,
-                "max": 39,
-                "mean": 6.54
-            },
-            "week": {
-                "min": 8,
-                "max": 69,
-                "mean": 45.53164556962025
+                "mean": 6.6521739130434785
             },
             "month": {
-                "min": 31,
-                "max": 262,
-                "mean": 194.43243243243242
+                "min": 3,
+                "max": 45,
+                "mean": 28.945945945945947
             }
         },
         "SITE PROPOSITION": {
             "day": {
                 "min": 0,
-                "max": 47,
-                "mean": 8.478181818181818
+                "max": 10,
+                "mean": 1.4844444444444445
             },
             "week": {
-                "min": 8,
-                "max": 93,
-                "mean": 59.0253164556962
+                "min": 0,
+                "max": 23,
+                "mean": 10.372670807453416
             },
             "month": {
-                "min": 34,
-                "max": 310,
-                "mean": 252.05405405405406
+                "min": 4,
+                "max": 62,
+                "mean": 45.13513513513514
             }
         },
         "PROPOSITION": {
             "day": {
                 "min": 0,
-                "max": 67,
-                "mean": 11.658181818181818
+                "max": 14,
+                "mean": 2.049777777777778
             },
             "week": {
-                "min": 9,
-                "max": 124,
-                "mean": 81.16455696202532
+                "min": 0,
+                "max": 28,
+                "mean": 14.322981366459627
             },
             "month": {
-                "min": 46,
-                "max": 431,
-                "mean": 346.5945945945946
+                "min": 5,
+                "max": 84,
+                "mean": 62.32432432432432
             }
         }
     }
diff --git a/dataSources/PubMed/data_num_locale.py b/dataSources/PubMed/data_num_locale.py
index 35aad2e52..48c0c2f8c 100644
--- a/dataSources/PubMed/data_num_locale.py
+++ b/dataSources/PubMed/data_num_locale.py
@@ -8,23 +8,6 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"
 
 from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION
 
-CATEGORIES = [
-    "KEYWORDS",
-    "SUBHEADINGS",
-    "SITE PROPOSITION",
-    "PROPOSITION"
-]
-
-INTERVALS = [
-    "day",
-    "week",
-    "month"
-]
-
-DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
-
-file_path = f"{DATA_DIR}/save_3_years.json"
-
 def lower_keywords(mesh_terms):
     res = []
 
@@ -47,33 +30,6 @@ def get_date_indices(date, start_date):
 
     return day_index, week_index, month_index
 
-def match_mesh_terms(article_mesh_terms, ncd, keyword):
-    if ncd in article_mesh_terms:
-        if isinstance(keyword, list):
-            all_in = True
-            for k in keyword:
-                if k not in article_mesh_terms:
-                    all_in = False
-            
-            return all_in
-        else:
-            return keyword in article_mesh_terms
-    else:
-        return False
-
-def init_index(category, counts, ncd, article_date):
-    start_date = datetime(2022, 1, 1)
-    day_index, week_index, month_index = get_date_indices(article_date, start_date)
-
-    if day_index not in counts[ncd][category]["day"]:
-        counts[ncd][category]["day"][day_index] = []
-
-    if week_index not in counts[ncd][category]["week"]:
-        counts[ncd][category]["week"][week_index] = []
-
-    if month_index not in counts[ncd][category]["month"]:
-        counts[ncd][category]["month"][month_index] = []
-
 def add_article(article, category, counts, ncd, article_date):
     start_date = datetime(2022, 1, 1)
     day_index, week_index, month_index = get_date_indices(article_date, start_date)
@@ -87,6 +43,35 @@ def add_article(article, category, counts, ncd, article_date):
     if article["PMID"] not in counts[ncd][category]["month"][month_index]:
         counts[ncd][category]["month"][month_index].append(article["PMID"])
 
+def mesh_term_present(article_mesh_terms, mesh_term):
+        if isinstance(mesh_term, list):
+            all_in = True
+            for part in mesh_term:
+                if part not in article_mesh_terms:
+                    all_in = False
+            
+            return all_in
+        else:
+            return mesh_term in article_mesh_terms
+
+CATEGORIES = [
+    "NO KEYWORDS",
+    "KEYWORDS",
+    "SUBHEADINGS",
+    "SITE PROPOSITION",
+    "PROPOSITION"
+]
+
+INTERVALS = [
+    "day",
+    "week",
+    "month"
+]
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
+TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp"))
+
+file_path = f"{TMP_DIR}/save_3_years.json"
 
 with open(file_path, "r", encoding="utf-8") as file:
     data = json.load(file)
@@ -101,69 +86,23 @@ keywords_proposition_mesh_terms = lower_keywords(KEYWORDS_MESH_PROPOSITION)
 
 counts = {}
 
-for ncd in ncds_mesh_terms:
-    counts[ncd] = {
-        "KEYWORDS" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-        "SUBHEADINGS" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-        "SITE PROPOSITION" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-        "PROPOSITION" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-    }
-
-    start_date = datetime(2022, 1, 1)
-    end_date = datetime(2024, 12, 31)
-    current_date = start_date
+counts["ALL"] = {}
 
-    while(current_date < end_date):
-        day_index, week_index, month_index = get_date_indices(current_date, start_date)
+for category in CATEGORIES:
+    counts["ALL"][category] = {}
+    for interval in INTERVALS:
+        counts["ALL"][category][interval] = {}
 
-        for category in CATEGORIES:
-            counts[ncd][category]["day"][day_index] = []
-            counts[ncd][category]["week"][week_index] = []
-            counts[ncd][category]["month"][month_index] = []
+for ncd in ncds_mesh_terms:
+    counts[ncd] = {}
+    for category in CATEGORIES:
+        counts[ncd][category] = {}
+        for interval in INTERVALS:
+            counts[ncd][category][interval] = {}
 
-        current_date += timedelta(days=1)
-
-counts["ALL"] = {
-    "KEYWORDS" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-    "SUBHEADINGS" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-    "SITE PROPOSITION" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-    "PROPOSITION" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-}
 
 start_date = datetime(2022, 1, 1)
-end_date = datetime(2024, 12, 31)
+end_date = datetime(2025, 1, 30)
 current_date = start_date
 
 while(current_date < end_date):
@@ -174,134 +113,80 @@ while(current_date < end_date):
         counts["ALL"][category]["week"][week_index] = []
         counts["ALL"][category]["month"][month_index] = []
 
+    for ncd in ncds_mesh_terms:
+        for category in CATEGORIES:
+            counts[ncd][category]["day"][day_index] = []
+            counts[ncd][category]["week"][week_index] = []
+            counts[ncd][category]["month"][month_index] = []
+
     current_date += timedelta(days=1)
 
 for article in data:
 
-    mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
+    article_mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
     article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
 
-    if "ALL" not in counts:
-        counts["ALL"] = {
-            "KEYWORDS" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-            "SUBHEADINGS" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-            "SITE PROPOSITION" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-            "PROPOSITION" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-        }
-
     for ncd in ncds_mesh_terms:
-        if ncd not in counts:
-            counts[ncd] = {
-                "KEYWORDS" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-                "SUBHEADINGS" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-                "SITE PROPOSITION" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-                "PROPOSITION" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-            }
-
-        for keyword in keywords_mesh_terms:
-
-            if match_mesh_terms(mesh_terms, ncd, keyword):
-                init_index("KEYWORDS", counts, ncd, article_date)
-                init_index("SUBHEADINGS", counts, ncd, article_date)
-                init_index("SITE PROPOSITION", counts, ncd, article_date)
-                init_index("PROPOSITION", counts, ncd, article_date)
 
-                add_article(article, "KEYWORDS", counts, ncd, article_date)
-                add_article(article, "SUBHEADINGS", counts, ncd, article_date)
-                add_article(article, "SITE PROPOSITION", counts, ncd, article_date)
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+        if mesh_term_present(article_mesh_terms, ncd):
 
-                init_index("KEYWORDS", counts, "ALL", article_date)
-                init_index("SUBHEADINGS", counts, "ALL", article_date)
-                init_index("SITE PROPOSITION", counts, "ALL", article_date)
-                init_index("PROPOSITION", counts, "ALL", article_date)
+            add_article(article, CATEGORIES[0], counts, ncd, article_date)
+            add_article(article, CATEGORIES[0], counts, "ALL", article_date)
 
-                add_article(article, "KEYWORDS", counts, "ALL", article_date)
-                add_article(article, "SUBHEADINGS", counts, "ALL", article_date)
-                add_article(article, "SITE PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+            added = False
 
-        for keyword in keywords_subheading_mesh_terms:
+            for keyword in keywords_mesh_terms:
+                if added:
+                    break
 
-            if match_mesh_terms(mesh_terms, ncd, keyword):
-                init_index("SUBHEADINGS", counts, ncd, article_date)
-                init_index("SITE PROPOSITION", counts, ncd, article_date)
-                init_index("PROPOSITION", counts, ncd, article_date)
+                if mesh_term_present(article_mesh_terms, keyword):
 
-                add_article(article, "SUBHEADINGS", counts, ncd, article_date)
-                add_article(article, "SITE PROPOSITION", counts, ncd, article_date)
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+                    for category in CATEGORIES[1:]:
+                        add_article(article, category, counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
 
-                init_index("SUBHEADINGS", counts, "ALL", article_date)
-                init_index("SITE PROPOSITION", counts, "ALL", article_date)
-                init_index("PROPOSITION", counts, "ALL", article_date)
+                    added = True
 
-                add_article(article, "SUBHEADINGS", counts, "ALL", article_date)
-                add_article(article, "SITE PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+            for keyword in keywords_subheading_mesh_terms:
+                if added:
+                    break
 
-        for keyword in keywords_site_proposition_mesh_terms:
+                if mesh_term_present(article_mesh_terms, keyword):
 
-            if match_mesh_terms(mesh_terms, ncd, keyword):
-                init_index("SITE PROPOSITION", counts, ncd, article_date)
-                init_index("PROPOSITION", counts, ncd, article_date)
+                    for category in CATEGORIES[2:]:
+                        add_article(article, category, counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
+                    
+                    added = True
 
-                add_article(article, "SITE PROPOSITION", counts, ncd, article_date)
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+            for keyword in keywords_site_proposition_mesh_terms:
+                if added:
+                    break
 
-                init_index("SITE PROPOSITION", counts, "ALL", article_date)
-                init_index("PROPOSITION", counts, "ALL", article_date)
+                if mesh_term_present(article_mesh_terms, keyword):
 
-                add_article(article, "SITE PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+                    for category in CATEGORIES[3:]:
+                        add_article(article, category, counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
 
-        for keyword in keywords_proposition_mesh_terms:
+                    added = True
 
-            if match_mesh_terms(mesh_terms, ncd, keyword):
-                init_index("PROPOSITION", counts, ncd, article_date)
+            for keyword in keywords_proposition_mesh_terms:
+                if added:
+                    break
 
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+                if mesh_term_present(article_mesh_terms, keyword):
 
-                init_index("PROPOSITION", counts, "ALL", article_date)
+                    for category in CATEGORIES[4:]:
+                        add_article(article, category, counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
 
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+                    added = True
 
 for ncd in ncds_mesh_terms:
     for category in CATEGORIES:
         for interval in INTERVALS:
-            counts[ncd][category][interval] = [len(tmp) for key, tmp in counts[ncd][category][interval].items()]
+            counts[ncd][category][interval] = [len(tmp) for _, tmp in counts[ncd][category][interval].items()]
 
             counts[ncd][category][interval] = {
                 "min": min(counts[ncd][category][interval]),
diff --git a/dataSources/PubMed/getPubmedData.py b/dataSources/PubMed/getPubmedData.py
index e69de29bb..0ebb541c0 100644
--- a/dataSources/PubMed/getPubmedData.py
+++ b/dataSources/PubMed/getPubmedData.py
@@ -0,0 +1,15 @@
+import sys
+import os
+import json
+
+# Ajouter le répertoire parent au chemin de recherche
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
+
+TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp"))
+
+data = []
+
+with open(f"{TMP_DIR}/save_3_years.json", "r", encoding="utf-8") as file:
+    data = json.load(file)
+
+print(len(data))
\ No newline at end of file
diff --git a/dataSources/PubMed/pubmedApi.py b/dataSources/PubMed/pubmedApi.py
index 28c194590..c0b969794 100644
--- a/dataSources/PubMed/pubmedApi.py
+++ b/dataSources/PubMed/pubmedApi.py
@@ -62,6 +62,9 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store
     # obj = parseXmlFile(f"{TMP_DIR}/{TMP_FILENAME}")
 
     obj = xmltodict.parse(response.text)
+    if "PubmedArticleSet" not in obj:
+        return []
+        
     obj = obj["PubmedArticleSet"]
 
     print()
diff --git a/dataSources/PubMed/store_data_localy.py b/dataSources/PubMed/store_data_localy.py
index 718ff1b6f..64c3d2cff 100644
--- a/dataSources/PubMed/store_data_localy.py
+++ b/dataSources/PubMed/store_data_localy.py
@@ -11,27 +11,38 @@ from dataSources.PubMed.pubmedApi import getPubmedData
 from variables.pubmed import *
 from dataSources.PubMed.util import *
 
-DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
+TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp"))
 
 ncds_mesh_noexp = get_mesh_noexp_term(NCDS_MESH_TERM)
 
 search_term = url_encode(" OR ".join(ncds_mesh_noexp))
 
-data = []
+data_to_store = []
 
-with open(f"{DATA_DIR}/save_3_years.json", "w") as json_file:
-    json.dump(data, json_file, indent=4)
+with open(f"{TMP_DIR}/save_3_years.json", "w") as json_file:
+    json.dump(data_to_store, json_file, indent=4)
 
 current_date = datetime(2022, 1, 1)
 
+stored_pmid = []
+
 while(current_date < datetime(2024, 12, 31)):
-    next_date = current_date + timedelta(weeks=1)
+    next_date = current_date + timedelta(days=4)
+
+    data = getPubmedData(search_term, current_date.strftime("%Y/%m/%d"), next_date.strftime("%Y/%m/%d"))
+
+    if len(data) > 10000:
+        print("ERROR: MORE THAN 10000 ARTICLES")
+        exit(1)
 
-    data += getPubmedData(search_term, current_date.strftime("%Y/%m/%d"), next_date.strftime("%Y/%m/%d"))
+    for article in data:
+        if article["PMID"] not in stored_pmid:
+            data_to_store.append(article)
+            stored_pmid.append(article["PMID"])
 
     current_date = next_date
 
     time.sleep(0.1)
 
-with open(f"{DATA_DIR}/save_3_years.json", "w") as json_file:
-    json.dump(data, json_file, indent=4)
\ No newline at end of file
+with open(f"{TMP_DIR}/save_3_years.json", "w") as json_file:
+    json.dump(data_to_store, json_file, indent=4)
\ No newline at end of file
-- 
GitLab