Regeneration de la sauvegarde locale en enlevant les doublons et en ajoutant...

Regeneration de la sauvegarde locale en enlevant les doublons et en ajoutant les potentiels articles loupés. Regeration du calcule d'articles postés dans pubmed avec le champ SANS KEYWORDS

Regeneration de la sauvegarde locale en enlevant les doublons et en ajoutant...
623e3f6f · Ivan Pavlovich · 02f90f52 · 623e3f6f · 623e3f6f · 623e3f6f
Commit 623e3f6f authored 2 months ago by Ivan Pavlovich
--- a/.gitignore
+++ b/.gitignore
 dataSources/PubMed/tmp/*
-dataSources/PubMed/data/*
 .venv
\ No newline at end of file
--- a/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc
+++ b/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc
--- a/dataSources/PubMed/doc/data_num.json
+++ b/dataSources/PubMed/doc/data_num.json
--- a/dataSources/PubMed/doc/data_num_keyword_no_mesh.json
+++ b/dataSources/PubMed/doc/data_num_keyword_no_mesh.json
--- a/dataSources/PubMed/doc/locale_articles_count.json
+++ b/dataSources/PubMed/doc/locale_articles_count.json
 {
+    "ALL": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 700,
+                "mean": 142.64
+            },
+            "week": {
+                "min": 0,
+                "max": 1436,
+                "mean": 996.7080745341615
+            },
+            "month": {
+                "min": 95,
+                "max": 5131,
+                "mean": 4337.027027027027
+            }
+        },
+        "KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 39,
+                "mean": 6.383111111111111
+            },
+            "week": {
+                "min": 0,
+                "max": 69,
+                "mean": 44.60248447204969
+            },
+            "month": {
+                "min": 9,
+                "max": 263,
+                "mean": 194.0810810810811
+            }
+        },
+        "SUBHEADINGS": {
+            "day": {
+                "min": 0,
+                "max": 39,
+                "mean": 6.383111111111111
+            },
+            "week": {
+                "min": 0,
+                "max": 69,
+                "mean": 44.60248447204969
+            },
+            "month": {
+                "min": 9,
+                "max": 263,
+                "mean": 194.0810810810811
+            }
+        },
+        "SITE PROPOSITION": {
+            "day": {
+                "min": 0,
+                "max": 47,
+                "mean": 8.278222222222222
+            },
+            "week": {
+                "min": 0,
+                "max": 93,
+                "mean": 57.84472049689441
+            },
+            "month": {
+                "min": 10,
+                "max": 311,
+                "mean": 251.7027027027027
+            }
+        },
+        "PROPOSITION": {
+            "day": {
+                "min": 0,
+                "max": 68,
+                "mean": 11.38488888888889
+            },
+            "week": {
+                "min": 0,
+                "max": 124,
+                "mean": 79.5527950310559
+            },
+            "month": {
+                "min": 14,
+                "max": 432,
+                "mean": 346.1621621621622
+            }
+        }
+    },
    "noncommunicable diseases": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 7,
+                "mean": 1.3555555555555556
+            },
+            "week": {
+                "min": 0,
+                "max": 23,
+                "mean": 9.472049689440993
+            },
+            "month": {
+                "min": 0,
+                "max": 57,
+                "mean": 41.21621621621622
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 3,
-                "mean": 0.20639269406392693
+                "mean": 0.2008888888888889
            },
            "week": {
                "min": 0,
                "max": 6,
-                "mean": 1.4394904458598725
+                "mean": 1.4037267080745341
            },
            "month": {
-                "min": 2,
+                "min": 0,
                "max": 12,
-                "mean": 6.277777777777778
+                "mean": 6.108108108108108
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 3,
-                "mean": 0.20639269406392693
+                "mean": 0.2008888888888889
            },
            "week": {
                "min": 0,
                "max": 6,
-                "mean": 1.4394904458598725
+                "mean": 1.4037267080745341
            },
            "month": {
-                "min": 2,
+                "min": 0,
                "max": 12,
-                "mean": 6.277777777777778
+                "mean": 6.108108108108108
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 3,
-                "mean": 0.2328767123287671
+                "mean": 0.22666666666666666
            },
            "week": {
                "min": 0,
                "max": 6,
-                "mean": 1.624203821656051
+                "mean": 1.5838509316770186
            },
            "month": {
-                "min": 2,
+                "min": 0,
                "max": 14,
-                "mean": 7.083333333333333
+                "mean": 6.891891891891892
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
                "max": 4,
-                "mean": 0.34885844748858447
+                "mean": 0.33955555555555555
            },
            "week": {
                "min": 0,
                "max": 8,
-                "mean": 2.43312101910828
+                "mean": 2.372670807453416
            },
            "month": {
-                "min": 4,
+                "min": 0,
                "max": 17,
-                "mean": 10.61111111111111
+                "mean": 10.324324324324325
            }
        }
    },
    "diabetes mellitus": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 58,
+                "mean": 16.045333333333332
+            },
+            "week": {
+                "min": 0,
+                "max": 183,
+                "mean": 112.11801242236025
+            },
+            "month": {
+                "min": 6,
+                "max": 662,
+                "mean": 487.86486486486484
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 5,
-                "mean": 0.7497725204731575
+                "mean": 0.7315555555555555
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 11,
-                "mean": 5.248407643312102
+                "mean": 5.111801242236025
            },
            "month": {
-                "min": 3,
+                "min": 1,
                "max": 31,
-                "mean": 22.27027027027027
+                "mean": 22.243243243243242
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 5,
-                "mean": 0.7497725204731575
+                "mean": 0.7315555555555555
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 11,
-                "mean": 5.248407643312102
+                "mean": 5.111801242236025
            },
            "month": {
-                "min": 3,
+                "min": 1,
                "max": 31,
-                "mean": 22.27027027027027
+                "mean": 22.243243243243242
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 8,
-                "mean": 1.0454959053685169
+                "mean": 1.0222222222222221
            },
            "week": {
-                "min": 2,
+                "min": 0,
                "max": 15,
-                "mean": 7.318471337579618
+                "mean": 7.142857142857143
            },
            "month": {
-                "min": 3,
+                "min": 1,
                "max": 44,
-                "mean": 31.054054054054053
+                "mean": 31.08108108108108
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
-                "max": 10,
+                "max": 11,
-                "mean": 1.4249317561419472
+                "mean": 1.392
            },
            "week": {
-                "min": 3,
+                "min": 0,
-                "max": 20,
+                "max": 19,
-                "mean": 9.97452229299363
+                "mean": 9.726708074534162
            },
            "month": {
-                "min": 5,
+                "min": 2,
                "max": 57,
                "mean": 42.32432432432432
            }
        }
    },
    "neoplasms": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 311,
+                "mean": 58.556444444444445
+            },
+            "week": {
+                "min": 0,
+                "max": 598,
+                "mean": 409.167701863354
+            },
+            "month": {
+                "min": 35,
+                "max": 2108,
+                "mean": 1780.4324324324325
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 16,
-                "mean": 2.692447679708826
+                "mean": 2.6266666666666665
            },
            "week": {
-                "min": 2,
+                "min": 0,
                "max": 33,
-                "mean": 18.727848101265824
+                "mean": 18.354037267080745
            },
            "month": {
-                "min": 14,
+                "min": 4,
                "max": 101,
-                "mean": 79.97297297297297
+                "mean": 79.86486486486487
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 16,
-                "mean": 2.692447679708826
+                "mean": 2.6266666666666665
            },
            "week": {
-                "min": 2,
+                "min": 0,
                "max": 33,
-                "mean": 18.727848101265824
+                "mean": 18.354037267080745
            },
            "month": {
-                "min": 14,
+                "min": 4,
                "max": 101,
-                "mean": 79.97297297297297
+                "mean": 79.86486486486487
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 16,
-                "mean": 2.735213830755232
+                "mean": 2.6684444444444444
            },
            "week": {
-                "min": 2,
+                "min": 0,
                "max": 33,
-                "mean": 19.025316455696203
+                "mean": 18.645962732919255
            },
            "month": {
-                "min": 14,
+                "min": 4,
                "max": 101,
-                "mean": 81.24324324324324
+                "mean": 81.13513513513513
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
                "max": 21,
-                "mean": 3.5281818181818183
+                "mean": 3.447111111111111
            },
            "week": {
-                "min": 2,
+                "min": 0,
                "max": 43,
-                "mean": 24.563291139240505
+                "mean": 24.08695652173913
            },
            "month": {
-                "min": 17,
+                "min": 4,
                "max": 127,
-                "mean": 104.89189189189189
+                "mean": 104.8108108108108
            }
        }
    },
    "respiratory tract diseases": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 7,
+                "mean": 0.7795555555555556
+            },
+            "week": {
+                "min": 0,
+                "max": 13,
+                "mean": 5.447204968944099
+            },
+            "month": {
+                "min": 3,
+                "max": 41,
+                "mean": 23.7027027027027
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 1,
-                "mean": 0.02281021897810219
+                "mean": 0.021333333333333333
            },
            "week": {
                "min": 0,
                "max": 2,
-                "mean": 0.15822784810126583
+                "mean": 0.14906832298136646
            },
            "month": {
                "min": 0,
                "max": 3,
-                "mean": 0.6756756756756757
+                "mean": 0.6486486486486487
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 1,
-                "mean": 0.02281021897810219
+                "mean": 0.021333333333333333
            },
            "week": {
                "min": 0,
                "max": 2,
-                "mean": 0.15822784810126583
+                "mean": 0.14906832298136646
            },
            "month": {
                "min": 0,
                "max": 3,
-                "mean": 0.6756756756756757
+                "mean": 0.6486486486486487
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 1,
-                "mean": 0.02281021897810219
+                "mean": 0.021333333333333333
            },
            "week": {
                "min": 0,
                "max": 2,
-                "mean": 0.15822784810126583
+                "mean": 0.14906832298136646
            },
            "month": {
                "min": 0,
                "max": 3,
-                "mean": 0.6756756756756757
+                "mean": 0.6486486486486487
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
                "max": 1,
-                "mean": 0.0364963503649635
+                "mean": 0.034666666666666665
            },
            "week": {
                "min": 0,
                "max": 2,
-                "mean": 0.25316455696202533
+                "mean": 0.2422360248447205
            },
            "month": {
                "min": 0,
                "max": 3,
-                "mean": 1.0810810810810811
+                "mean": 1.054054054054054
            }
        }
    },
    "cardiovascular diseases": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 119,
+                "mean": 21.612444444444446
+            },
+            "week": {
+                "min": 0,
+                "max": 260,
+                "mean": 151.01863354037266
+            },
+            "month": {
+                "min": 20,
+                "max": 790,
+                "mean": 657.1351351351351
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 5,
-                "mean": 0.6584699453551912
+                "mean": 0.64
            },
            "week": {
                "min": 0,
                "max": 13,
-                "mean": 4.575949367088608
+                "mean": 4.472049689440993
            },
            "month": {
-                "min": 3,
+                "min": 0,
                "max": 40,
-                "mean": 19.54054054054054
+                "mean": 19.45945945945946
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 5,
-                "mean": 0.6584699453551912
+                "mean": 0.64
            },
            "week": {
                "min": 0,
                "max": 13,
-                "mean": 4.575949367088608
+                "mean": 4.472049689440993
            },
            "month": {
-                "min": 3,
+                "min": 0,
                "max": 40,
-                "mean": 19.54054054054054
+                "mean": 19.45945945945946
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 5,
-                "mean": 0.6930783242258652
+                "mean": 0.6737777777777778
            },
            "week": {
                "min": 0,
                "max": 13,
-                "mean": 4.8164556962025316
+                "mean": 4.708074534161491
            },
            "month": {
-                "min": 3,
+                "min": 0,
                "max": 42,
-                "mean": 20.56756756756757
+                "mean": 20.486486486486488
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
                "max": 8,
-                "mean": 1.1474067333939946
+                "mean": 1.1173333333333333
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 24,
-                "mean": 7.981012658227848
+                "mean": 7.807453416149069
            },
            "month": {
-                "min": 5,
+                "min": 1,
                "max": 58,
-                "mean": 34.08108108108108
+                "mean": 33.972972972972975
            }
        }
    },
    "mental health": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 74,
+                "mean": 16.589333333333332
+            },
+            "week": {
+                "min": 0,
+                "max": 213,
+                "mean": 115.9192546583851
+            },
+            "month": {
+                "min": 8,
+                "max": 679,
+                "mean": 504.4054054054054
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 8,
-                "mean": 1.1856232939035487
+                "mean": 1.1564444444444444
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 21,
-                "mean": 8.246835443037975
+                "mean": 8.080745341614907
            },
            "month": {
-                "min": 4,
+                "min": 1,
                "max": 52,
-                "mean": 35.21621621621622
+                "mean": 35.16216216216216
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 8,
-                "mean": 1.1856232939035487
+                "mean": 1.1564444444444444
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 21,
-                "mean": 8.246835443037975
+                "mean": 8.080745341614907
            },
            "month": {
-                "min": 4,
+                "min": 1,
                "max": 52,
-                "mean": 35.21621621621622
+                "mean": 35.16216216216216
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 8,
-                "mean": 1.2438580527752503
+                "mean": 1.2133333333333334
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 22,
-                "mean": 8.651898734177216
+                "mean": 8.478260869565217
            },
            "month": {
-                "min": 4,
+                "min": 1,
                "max": 55,
-                "mean": 36.945945945945944
+                "mean": 36.891891891891895
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
                "max": 11,
-                "mean": 2.1618181818181816
+                "mean": 2.1093333333333333
            },
            "week": {
-                "min": 1,
+                "min": 0,
                "max": 32,
-                "mean": 15.050632911392405
+                "mean": 14.73913043478261
            },
            "month": {
-                "min": 8,
+                "min": 2,
-                "max": 84,
+                "max": 85,
-                "mean": 64.27027027027027
+                "mean": 64.13513513513513
            }
        }
    },
    "diabetes mellitus, type 1": {
+        "NO KEYWORDS": {
+            "day": {
+                "min": 0,
+                "max": 40,
+                "mean": 7.019555555555556
+            },
+            "week": {
+                "min": 0,
+                "max": 78,
+                "mean": 49.04968944099379
+            },
+            "month": {
+                "min": 2,
+                "max": 257,
+                "mean": 213.43243243243242
+            }
+        },
        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 3,
-                "mean": 0.3072014585232452
+                "mean": 0.29688888888888887
            },
            "week": {
                "min": 0,
                "max": 8,
-                "mean": 2.132911392405063
+                "mean": 2.0745341614906834
            },
            "month": {
-                "min": 3,
+                "min": 1,
                "max": 19,
-                "mean": 9.108108108108109
+                "mean": 9.027027027027026
            }
        },
        "SUBHEADINGS": {
            "day": {
                "min": 0,
                "max": 3,
-                "mean": 0.3072014585232452
+                "mean": 0.29688888888888887
            },
            "week": {
                "min": 0,
                "max": 8,
-                "mean": 2.132911392405063
+                "mean": 2.0745341614906834
            },
            "month": {
-                "min": 3,
+                "min": 1,
                "max": 19,
-                "mean": 9.108108108108109
+                "mean": 9.027027027027026
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
                "max": 13,
-                "mean": 1.5141037306642402
+                "mean": 1.4755555555555555
            },
            "week": {
-                "min": 3,
+                "min": 0,
                "max": 30,
-                "mean": 10.531645569620252
+                "mean": 10.31055900621118
            },
            "month": {
-                "min": 5,
+                "min": 1,
                "max": 66,
-                "mean": 44.972972972972975
+                "mean": 44.86486486486486
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
                "max": 13,
-                "mean": 1.5941765241128298
+                "mean": 1.5537777777777777
            },
            "week": {
-                "min": 3,
+                "min": 0,
                "max": 30,
-                "mean": 11.08860759493671
+                "mean": 10.857142857142858
            },
            "month": {
-                "min": 5,
+                "min": 1,
                "max": 68,
-                "mean": 47.351351351351354
+                "mean": 47.24324324324324
            }
        }
    },
    "diabetes mellitus, type 2": {
-        "KEYWORDS": {
+        "NO KEYWORDS": {
            "day": {
                "min": 0,
-                "max": 7,
+                "max": 146,
-                "mean": 0.9763421292083713
+                "mean": 28.133333333333333
            },
            "week": {
                "min": 0,
-                "max": 14,
+                "max": 282,
-                "mean": 6.791139240506329
+                "mean": 196.583850931677
            },
            "month": {
-                "min": 6,
+                "min": 26,
-                "max": 45,
+                "max": 1021,
-                "mean": 29
+                "mean": 855.4054054054054
            }
        },
-        "SUBHEADINGS": {
+        "KEYWORDS": {
            "day": {
                "min": 0,
                "max": 7,
-                "mean": 0.9763421292083713
+                "mean": 0.952
            },
            "week": {
                "min": 0,
                "max": 14,
-                "mean": 6.791139240506329
+                "mean": 6.6521739130434785
            },
            "month": {
-                "min": 6,
+                "min": 3,
                "max": 45,
-                "mean": 29
+                "mean": 28.945945945945947
            }
        },
-        "SITE PROPOSITION": {
+        "SUBHEADINGS": {
            "day": {
                "min": 0,
-                "max": 10,
+                "max": 7,
-                "mean": 1.520909090909091
+                "mean": 0.952
            },
            "week": {
-                "min": 1,
-                "max": 23,
-                "mean": 10.58860759493671
-            },
-            "month": {
-                "min": 8,
-                "max": 61,
-                "mean": 45.21621621621622
-            }
-        },
-        "PROPOSITION": {
-            "day": {
                "min": 0,
                "max": 14,
-                "mean": 2.099090909090909
+                "mean": 6.6521739130434785
-            },
-            "week": {
-                "min": 2,
-                "max": 28,
-                "mean": 14.613924050632912
-            },
-            "month": {
-                "min": 10,
-                "max": 84,
-                "mean": 62.4054054054054
-            }
-        }
-    },
-    "ALL": {
-        "KEYWORDS": {
-            "day": {
-                "min": 0,
-                "max": 39,
-                "mean": 6.54
-            },
-            "week": {
-                "min": 8,
-                "max": 69,
-                "mean": 45.53164556962025
-            },
-            "month": {
-                "min": 31,
-                "max": 262,
-                "mean": 194.43243243243242
-            }
-        },
-        "SUBHEADINGS": {
-            "day": {
-                "min": 0,
-                "max": 39,
-                "mean": 6.54
-            },
-            "week": {
-                "min": 8,
-                "max": 69,
-                "mean": 45.53164556962025
            },
            "month": {
-                "min": 31,
+                "min": 3,
-                "max": 262,
+                "max": 45,
-                "mean": 194.43243243243242
+                "mean": 28.945945945945947
            }
        },
        "SITE PROPOSITION": {
            "day": {
                "min": 0,
-                "max": 47,
+                "max": 10,
-                "mean": 8.478181818181818
+                "mean": 1.4844444444444445
            },
            "week": {
-                "min": 8,
+                "min": 0,
-                "max": 93,
+                "max": 23,
-                "mean": 59.0253164556962
+                "mean": 10.372670807453416
            },
            "month": {
-                "min": 34,
+                "min": 4,
-                "max": 310,
+                "max": 62,
-                "mean": 252.05405405405406
+                "mean": 45.13513513513514
            }
        },
        "PROPOSITION": {
            "day": {
                "min": 0,
-                "max": 67,
+                "max": 14,
-                "mean": 11.658181818181818
+                "mean": 2.049777777777778
            },
            "week": {
-                "min": 9,
+                "min": 0,
-                "max": 124,
+                "max": 28,
-                "mean": 81.16455696202532
+                "mean": 14.322981366459627
            },
            "month": {
-                "min": 46,
+                "min": 5,
-                "max": 431,
+                "max": 84,
-                "mean": 346.5945945945946
+                "mean": 62.32432432432432
            }
        }
    }

--- a/dataSources/PubMed/data_num_locale.py
+++ b/dataSources/PubMed/data_num_locale.py
@@ -8,23 +8,6 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"
 from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION
-CATEGORIES = [
-    "KEYWORDS",
-    "SUBHEADINGS",
-    "SITE PROPOSITION",
-    "PROPOSITION"
-]
-INTERVALS = [
-    "day",
-    "week",
-    "month"
-]
-DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
-file_path = f"{DATA_DIR}/save_3_years.json"
 def lower_keywords(mesh_terms):
    res = []
@@ -47,33 +30,6 @@ def get_date_indices(date, start_date):
    return day_index, week_index, month_index
-def match_mesh_terms(article_mesh_terms, ncd, keyword):
-    if ncd in article_mesh_terms:
-        if isinstance(keyword, list):
-            all_in = True
-            for k in keyword:
-                if k not in article_mesh_terms:
-                    all_in = False
-            return all_in
-        else:
-            return keyword in article_mesh_terms
-    else:
-        return False
-def init_index(category, counts, ncd, article_date):
-    start_date = datetime(2022, 1, 1)
-    day_index, week_index, month_index = get_date_indices(article_date, start_date)
-    if day_index not in counts[ncd][category]["day"]:
-        counts[ncd][category]["day"][day_index] = []
-    if week_index not in counts[ncd][category]["week"]:
-        counts[ncd][category]["week"][week_index] = []
-    if month_index not in counts[ncd][category]["month"]:
-        counts[ncd][category]["month"][month_index] = []
 def add_article(article, category, counts, ncd, article_date):
    start_date = datetime(2022, 1, 1)
    day_index, week_index, month_index = get_date_indices(article_date, start_date)
@@ -87,6 +43,35 @@ def add_article(article, category, counts, ncd, article_date):
    if article["PMID"] not in counts[ncd][category]["month"][month_index]:
        counts[ncd][category]["month"][month_index].append(article["PMID"])
+def mesh_term_present(article_mesh_terms, mesh_term):
+        if isinstance(mesh_term, list):
+            all_in = True
+            for part in mesh_term:
+                if part not in article_mesh_terms:
+                    all_in = False
+            return all_in
+        else:
+            return mesh_term in article_mesh_terms
+CATEGORIES = [
+    "NO KEYWORDS",
+    "KEYWORDS",
+    "SUBHEADINGS",
+    "SITE PROPOSITION",
+    "PROPOSITION"
+]
+INTERVALS = [
+    "day",
+    "week",
+    "month"
+]
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
+TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp"))
+file_path = f"{TMP_DIR}/save_3_years.json"
 with open(file_path, "r", encoding="utf-8") as file:
    data = json.load(file)
@@ -101,69 +86,23 @@ keywords_proposition_mesh_terms = lower_keywords(KEYWORDS_MESH_PROPOSITION)
 counts = {}
-for ncd in ncds_mesh_terms:
+counts["ALL"] = {}
-    counts[ncd] = {
-        "KEYWORDS" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-        "SUBHEADINGS" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-        "SITE PROPOSITION" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-        "PROPOSITION" : {
-            "day": {},
-            "week": {},
-            "month": {}
-        },
-    }
-    start_date = datetime(2022, 1, 1)
-    end_date = datetime(2024, 12, 31)
-    current_date = start_date
-    while(current_date < end_date):
+for category in CATEGORIES:
-        day_index, week_index, month_index = get_date_indices(current_date, start_date)
+    counts["ALL"][category] = {}
+    for interval in INTERVALS:
+        counts["ALL"][category][interval] = {}
-        for category in CATEGORIES:
+for ncd in ncds_mesh_terms:
-            counts[ncd][category]["day"][day_index] = []
+    counts[ncd] = {}
-            counts[ncd][category]["week"][week_index] = []
+    for category in CATEGORIES:
-            counts[ncd][category]["month"][month_index] = []
+        counts[ncd][category] = {}
+        for interval in INTERVALS:
+            counts[ncd][category][interval] = {}
-        current_date += timedelta(days=1)
-counts["ALL"] = {
-    "KEYWORDS" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-    "SUBHEADINGS" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-    "SITE PROPOSITION" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-    "PROPOSITION" : {
-        "day": {},
-        "week": {},
-        "month": {}
-    },
-}
 start_date = datetime(2022, 1, 1)
-end_date = datetime(2024, 12, 31)
+end_date = datetime(2025, 1, 30)
 current_date = start_date
 while(current_date < end_date):
@@ -174,134 +113,80 @@ while(current_date < end_date):
        counts["ALL"][category]["week"][week_index] = []
        counts["ALL"][category]["month"][month_index] = []
+    for ncd in ncds_mesh_terms:
+        for category in CATEGORIES:
+            counts[ncd][category]["day"][day_index] = []
+            counts[ncd][category]["week"][week_index] = []
+            counts[ncd][category]["month"][month_index] = []
    current_date += timedelta(days=1)
 for article in data:
-    mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
+    article_mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
    article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
-    if "ALL" not in counts:
-        counts["ALL"] = {
-            "KEYWORDS" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-            "SUBHEADINGS" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-            "SITE PROPOSITION" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-            "PROPOSITION" : {
-                "day": {},
-                "week": {},
-                "month": {}
-            },
-        }
    for ncd in ncds_mesh_terms:
-        if ncd not in counts:
-            counts[ncd] = {
-                "KEYWORDS" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-                "SUBHEADINGS" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-                "SITE PROPOSITION" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-                "PROPOSITION" : {
-                    "day": {},
-                    "week": {},
-                    "month": {}
-                },
-            }
-        for keyword in keywords_mesh_terms:
-            if match_mesh_terms(mesh_terms, ncd, keyword):
-                init_index("KEYWORDS", counts, ncd, article_date)
-                init_index("SUBHEADINGS", counts, ncd, article_date)
-                init_index("SITE PROPOSITION", counts, ncd, article_date)
-                init_index("PROPOSITION", counts, ncd, article_date)
-                add_article(article, "KEYWORDS", counts, ncd, article_date)
+        if mesh_term_present(article_mesh_terms, ncd):
-                add_article(article, "SUBHEADINGS", counts, ncd, article_date)
-                add_article(article, "SITE PROPOSITION", counts, ncd, article_date)
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
-                init_index("KEYWORDS", counts, "ALL", article_date)
+            add_article(article, CATEGORIES[0], counts, ncd, article_date)
-                init_index("SUBHEADINGS", counts, "ALL", article_date)
+            add_article(article, CATEGORIES[0], counts, "ALL", article_date)
-                init_index("SITE PROPOSITION", counts, "ALL", article_date)
-                init_index("PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "KEYWORDS", counts, "ALL", article_date)
+            added = False
-                add_article(article, "SUBHEADINGS", counts, "ALL", article_date)
-                add_article(article, "SITE PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
-        for keyword in keywords_subheading_mesh_terms:
+            for keyword in keywords_mesh_terms:
+                if added:
+                    break
-            if match_mesh_terms(mesh_terms, ncd, keyword):
+                if mesh_term_present(article_mesh_terms, keyword):
-                init_index("SUBHEADINGS", counts, ncd, article_date)
-                init_index("SITE PROPOSITION", counts, ncd, article_date)
-                init_index("PROPOSITION", counts, ncd, article_date)
-                add_article(article, "SUBHEADINGS", counts, ncd, article_date)
+                    for category in CATEGORIES[1:]:
-                add_article(article, "SITE PROPOSITION", counts, ncd, article_date)
+                        add_article(article, category, counts, ncd, article_date)
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
-                init_index("SUBHEADINGS", counts, "ALL", article_date)
+                    added = True
-                init_index("SITE PROPOSITION", counts, "ALL", article_date)
-                init_index("PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "SUBHEADINGS", counts, "ALL", article_date)
+            for keyword in keywords_subheading_mesh_terms:
-                add_article(article, "SITE PROPOSITION", counts, "ALL", article_date)
+                if added:
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+                    break
-        for keyword in keywords_site_proposition_mesh_terms:
+                if mesh_term_present(article_mesh_terms, keyword):
-            if match_mesh_terms(mesh_terms, ncd, keyword):
+                    for category in CATEGORIES[2:]:
-                init_index("SITE PROPOSITION", counts, ncd, article_date)
+                        add_article(article, category, counts, ncd, article_date)
-                init_index("PROPOSITION", counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
+                    added = True
-                add_article(article, "SITE PROPOSITION", counts, ncd, article_date)
+            for keyword in keywords_site_proposition_mesh_terms:
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+                if added:
+                    break
-                init_index("SITE PROPOSITION", counts, "ALL", article_date)
+                if mesh_term_present(article_mesh_terms, keyword):
-                init_index("PROPOSITION", counts, "ALL", article_date)
-                add_article(article, "SITE PROPOSITION", counts, "ALL", article_date)
+                    for category in CATEGORIES[3:]:
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+                        add_article(article, category, counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
-        for keyword in keywords_proposition_mesh_terms:
+                    added = True
-            if match_mesh_terms(mesh_terms, ncd, keyword):
+            for keyword in keywords_proposition_mesh_terms:
-                init_index("PROPOSITION", counts, ncd, article_date)
+                if added:
+                    break
-                add_article(article, "PROPOSITION", counts, ncd, article_date)
+                if mesh_term_present(article_mesh_terms, keyword):
-                init_index("PROPOSITION", counts, "ALL", article_date)
+                    for category in CATEGORIES[4:]:
+                        add_article(article, category, counts, ncd, article_date)
+                        add_article(article, category, counts, "ALL", article_date)
-                add_article(article, "PROPOSITION", counts, "ALL", article_date)
+                    added = True
 for ncd in ncds_mesh_terms:
    for category in CATEGORIES:
        for interval in INTERVALS:
-            counts[ncd][category][interval] = [len(tmp) for key, tmp in counts[ncd][category][interval].items()]
+            counts[ncd][category][interval] = [len(tmp) for _, tmp in counts[ncd][category][interval].items()]
            counts[ncd][category][interval] = {
                "min": min(counts[ncd][category][interval]),

--- a/dataSources/PubMed/getPubmedData.py
+++ b/dataSources/PubMed/getPubmedData.py
+import sys
+import os
+import json
+# Ajouter le répertoire parent au chemin de recherche
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
+TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp"))
+data = []
+with open(f"{TMP_DIR}/save_3_years.json", "r", encoding="utf-8") as file:
+    data = json.load(file)
+print(len(data))
\ No newline at end of file
--- a/dataSources/PubMed/pubmedApi.py
+++ b/dataSources/PubMed/pubmedApi.py
@@ -62,6 +62,9 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store
    # obj = parseXmlFile(f"{TMP_DIR}/{TMP_FILENAME}")
    obj = xmltodict.parse(response.text)
+    if "PubmedArticleSet" not in obj:
+        return []
    obj = obj["PubmedArticleSet"]
    print()

--- a/dataSources/PubMed/store_data_localy.py
+++ b/dataSources/PubMed/store_data_localy.py
@@ -11,27 +11,38 @@ from dataSources.PubMed.pubmedApi import getPubmedData
 from variables.pubmed import *
 from dataSources.PubMed.util import *
-DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
+TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp"))
 ncds_mesh_noexp = get_mesh_noexp_term(NCDS_MESH_TERM)
 search_term = url_encode(" OR ".join(ncds_mesh_noexp))
-data = []
+data_to_store = []
-with open(f"{DATA_DIR}/save_3_years.json", "w") as json_file:
+with open(f"{TMP_DIR}/save_3_years.json", "w") as json_file:
-    json.dump(data, json_file, indent=4)
+    json.dump(data_to_store, json_file, indent=4)
 current_date = datetime(2022, 1, 1)
+stored_pmid = []
 while(current_date < datetime(2024, 12, 31)):
-    next_date = current_date + timedelta(weeks=1)
+    next_date = current_date + timedelta(days=4)
+    data = getPubmedData(search_term, current_date.strftime("%Y/%m/%d"), next_date.strftime("%Y/%m/%d"))
+    if len(data) > 10000:
+        print("ERROR: MORE THAN 10000 ARTICLES")
+        exit(1)
-    data += getPubmedData(search_term, current_date.strftime("%Y/%m/%d"), next_date.strftime("%Y/%m/%d"))
+    for article in data:
+        if article["PMID"] not in stored_pmid:
+            data_to_store.append(article)
+            stored_pmid.append(article["PMID"])
    current_date = next_date
    time.sleep(0.1)
-with open(f"{DATA_DIR}/save_3_years.json", "w") as json_file:
+with open(f"{TMP_DIR}/save_3_years.json", "w") as json_file:
-    json.dump(data, json_file, indent=4)
+    json.dump(data_to_store, json_file, indent=4)
\ No newline at end of file