Skip to content
Snippets Groups Projects
Commit 088f2547 authored by Ivan Pavlovich's avatar Ivan Pavlovich
Browse files

Création du script de comptage d'article pubmed par jour/semaine/mois depuis sauvegarde locale

parent b57f6cef
No related branches found
No related tags found
No related merge requests found
...@@ -12,6 +12,17 @@ DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data")) ...@@ -12,6 +12,17 @@ DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
file_path = f"{DATA_DIR}/save_3_years.json" file_path = f"{DATA_DIR}/save_3_years.json"
def get_date_indices(date, start_date):
delta_days = (date - start_date).days
day_index = delta_days
week_index = (delta_days // 7)
delta_months = (date.year - start_date.year) * 12 + (date.month - start_date.month)
month_index = delta_months
return day_index, week_index, month_index
def match_mesh_terms(article_mesh_terms, ncd, keyword): def match_mesh_terms(article_mesh_terms, ncd, keyword):
if ncd in article_mesh_terms: if ncd in article_mesh_terms:
if isinstance(keyword, list): if isinstance(keyword, list):
...@@ -26,47 +37,152 @@ def match_mesh_terms(article_mesh_terms, ncd, keyword): ...@@ -26,47 +37,152 @@ def match_mesh_terms(article_mesh_terms, ncd, keyword):
else: else:
return False return False
def filter_articles(articles, start_date, end_date, ncd, keyword):
filtered_articles = []
for article in articles:
article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
if start_date <= article_date < end_date:
mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
if match_mesh_terms(mesh_terms, ncd, keyword):
filtered_articles.append(article)
return filtered_articles
with open(file_path, "r", encoding="utf-8") as file: with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file) data = json.load(file)
print(len(data)) print(len(data))
ncds_mesh_terms = [mesh_term.lower() for ncd, mesh_term in NCDS_MESH_TERM.items()]
ncds_mesh_terms = [mesh_term.lower() for mesh_term in NCDS_MESH_TERM] keywords_mesh_terms = [mesh_term.lower() for keyword, mesh_term in KEYWORDS_MESH_TERM.items()]
keywords_mesh_terms = [mesh_term.lower() for mesh_term in KEYWORDS_MESH_TERM]
keywords_subheading_mesh_terms = keywords_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_SUBHEADING] keywords_subheading_mesh_terms = keywords_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_SUBHEADING]
keywords_site_proposition_mesh_terms = keywords_subheading_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_SITE_PROPOSITION] keywords_site_proposition_mesh_terms = keywords_subheading_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_SITE_PROPOSITION]
keywords_proposition_mesh_terms = keywords_site_proposition_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_PROPOSITION] keywords_proposition_mesh_terms = keywords_site_proposition_mesh_terms + [mesh_term.lower() for mesh_term in KEYWORDS_MESH_PROPOSITION]
for ncd in ncds_mesh_terms: counts = {}
count = [] start_date = datetime(2022, 1, 1)
for article in data:
mesh_terms = [mesh_term.lower() for mesh_term in article["MeshTerms"]]
if "ALL" not in counts:
counts["ALL"] = {}
for ncd in ncds_mesh_terms:
if ncd not in counts:
counts[ncd] = {
"KEYWORDS" : {
"day": {},
"week": {},
"month": {}
},
"SUBHEADINGS" : {
"day": {},
"week": {},
"month": {}
},
"SITE PROPOSITION" : {
"day": {},
"week": {},
"month": {}
},
"PROPOSITION" : {
"day": {},
"week": {},
"month": {}
},
}
for keyword in keywords_mesh_terms:
if match_mesh_terms(mesh_terms, ncd, mesh_terms):
article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
day_index, week_index, month_index = get_date_indices(article_date, start_date)
if day_index not in counts[ncd]["KEYWORDS"]["day"]:
counts[ncd]["KEYWORDS"]["day"][day_index] = []
if week_index not in counts[ncd]["KEYWORDS"]["week"]:
counts[ncd]["KEYWORDS"]["week"][week_index] = []
if month_index not in counts[ncd]["KEYWORDS"]["month"]:
counts[ncd]["KEYWORDS"]["month"][month_index] = []
if article["PMID"] not in counts[ncd]["KEYWORDS"]["day"][day_index]:
counts[ncd]["KEYWORDS"]["day"][day_index].append(article["PMID"])
if article["PMID"] not in counts[ncd]["KEYWORDS"]["week"][week_index]:
counts[ncd]["KEYWORDS"]["week"][week_index].append(article["PMID"])
if article["PMID"] not in counts[ncd]["KEYWORDS"]["month"][month_index]:
counts[ncd]["KEYWORDS"]["month"][month_index].append(article["PMID"])
for keyword in keywords_subheading_mesh_terms:
if match_mesh_terms(mesh_terms, ncd, mesh_terms):
article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
day_index, week_index, month_index = get_date_indices(article_date, start_date)
if day_index not in counts[ncd]["SUBHEADINGS"]["day"]:
counts[ncd]["SUBHEADINGS"]["day"][day_index] = []
if week_index not in counts[ncd]["SUBHEADINGS"]["week"]:
counts[ncd]["SUBHEADINGS"]["week"][week_index] = []
if month_index not in counts[ncd]["SUBHEADINGS"]["month"]:
counts[ncd]["SUBHEADINGS"]["month"][month_index] = []
if article["PMID"] not in counts[ncd]["SUBHEADINGS"]["day"][day_index]:
counts[ncd]["SUBHEADINGS"]["day"][day_index].append(article["PMID"])
if article["PMID"] not in counts[ncd]["SUBHEADINGS"]["week"][week_index]:
counts[ncd]["SUBHEADINGS"]["week"][week_index].append(article["PMID"])
if article["PMID"] not in counts[ncd]["SUBHEADINGS"]["month"][month_index]:
counts[ncd]["SUBHEADINGS"]["month"][month_index].append(article["PMID"])
for keyword in keywords_site_proposition_mesh_terms:
if match_mesh_terms(mesh_terms, ncd, mesh_terms):
article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
day_index, week_index, month_index = get_date_indices(article_date, start_date)
if day_index not in counts[ncd]["SITE PROPOSITION"]["day"]:
counts[ncd]["SITE PROPOSITION"]["day"][day_index] = []
if week_index not in counts[ncd]["SITE PROPOSITION"]["week"]:
counts[ncd]["SITE PROPOSITION"]["week"][week_index] = []
if month_index not in counts[ncd]["SITE PROPOSITION"]["month"]:
counts[ncd]["SITE PROPOSITION"]["month"][month_index] = []
if article["PMID"] not in counts[ncd]["SITE PROPOSITION"]["day"][day_index]:
counts[ncd]["SITE PROPOSITION"]["day"][day_index].append(article["PMID"])
if article["PMID"] not in counts[ncd]["SITE PROPOSITION"]["week"][week_index]:
counts[ncd]["SITE PROPOSITION"]["week"][week_index].append(article["PMID"])
if article["PMID"] not in counts[ncd]["SITE PROPOSITION"]["month"][month_index]:
counts[ncd]["SITE PROPOSITION"]["month"][month_index].append(article["PMID"])
for keyword in keywords_proposition_mesh_terms:
if match_mesh_terms(mesh_terms, ncd, mesh_terms):
article_date = datetime(int(article["Date"]["Year"]), int(article["Date"]["Month"]), int(article["Date"]["Day"]))
day_index, week_index, month_index = get_date_indices(article_date, start_date)
if day_index not in counts[ncd]["PROPOSITION"]["day"]:
counts[ncd]["PROPOSITION"]["day"][day_index] = []
for keyword in keywords_mesh_terms: if week_index not in counts[ncd]["PROPOSITION"]["week"]:
start_date = datetime(2022, 1, 1) counts[ncd]["PROPOSITION"]["week"][week_index] = []
while(start_date < datetime(2024, 12, 31)): if month_index not in counts[ncd]["PROPOSITION"]["month"]:
end_date = start_date + timedelta(weeks=1) counts[ncd]["PROPOSITION"]["month"][month_index] = []
count.append(len(filter_articles(data, start_date, end_date, ncd, keyword))) if article["PMID"] not in counts[ncd]["PROPOSITION"]["day"][day_index]:
counts[ncd]["PROPOSITION"]["day"][day_index].append(article["PMID"])
start_date = end_date if article["PMID"] not in counts[ncd]["PROPOSITION"]["week"][week_index]:
counts[ncd]["PROPOSITION"]["week"][week_index].append(article["PMID"])
print(f"NCD: {ncd}") if article["PMID"] not in counts[ncd]["PROPOSITION"]["month"][month_index]:
print(f"Min: {min(count)}") counts[ncd]["PROPOSITION"]["month"][month_index].append(article["PMID"])
print(f"Max: {max(count)}")
print(f"Mean: {statistics.mean(count)}")
print(f"Median: {statistics.median(count)}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment