Skip to content
Snippets Groups Projects
Commit 8297804c authored by ivan.pavlovic's avatar ivan.pavlovic
Browse files

Avancement test model et nombre de données par jour/semaine/moi

parent 158da3fd
No related branches found
No related tags found
No related merge requests found
Showing
with 5530 additions and 915 deletions
gptKey
geminiKey
.venv
\ No newline at end of file
from requests import get
from datetime import datetime, timedelta
import time
TERMS = [
# '"Noncommunicable+Diseases"', # NCDs (All)
# '"Diabetes+Mellitus"', # Diabetes (type 1 or 2)
# '"Neoplasms"', # Cancer
# '"Respiratory+Tract+Diseases"', # Chronic respiratory disease
# '"Cardiovascular+Diseases"', # Cardiovascular diseases
'"Mental+Health"', # Mental Health
'"Diabetes+Mellitus%2C+Type+1"', # Diabetes type 1
'"Diabetes+Mellitus%2C+Type+2"' # Diabetes type 2
]
INTERVALS = [
"day",
"week",
"month"
]
def get_count_for_year(year, term, interval = "month"):
current_date = datetime(year, 1, 1)
counts = []
while(current_date < datetime(year, 12, 31)):
if interval == "day":
next_date = current_date + timedelta(days=1)
elif interval == "week":
next_date = current_date + timedelta(weeks=1)
elif interval == "month":
next_date = (current_date.replace(day=28) + timedelta(days=4)).replace(day=1)
url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={term}&retmode=json&mindate={current_date.strftime("%Y/%m/%d")}&maxdate={next_date.strftime("%Y/%m/%d")}&usehistory=y'
response = get(url)
search_res = response.json()
counts.append(int(search_res["esearchresult"]["count"]))
current_date = next_date
time.sleep(1)
max_count = max(counts)
min_count = min(counts)
avg_count = sum(counts) / len(counts)
return {"max": max_count, "min": min_count, "avg": avg_count}
data = {}
for term in TERMS:
data[term] = {}
mesh = term + "[Mesh]"
print("TERM: ", mesh)
for interval in INTERVALS:
print("INTERVAL: ", interval)
counts = get_count_for_year(2024, mesh, interval)
print(counts)
data[term][interval] = counts
print(data)
\ No newline at end of file
{
"\"Noncommunicable+Diseases\"": {
"day": {
"max": 8,
"min": 0,
"avg": 2.473972602739726
},
"week": {
"max": 17,
"min": 2,
"avg": 9.471698113207546
},
"month": {
"max": 54,
"min": 20,
"avg": 38.5
}
},
"\"Diabetes+Mellitus\"": {
"day": {
"max": 352,
"min": 0,
"avg": 108.41643835616438
},
"week": {
"max": 583,
"min": 178,
"avg": 430.47169811320754
},
"month": {
"max": 2230,
"min": 1001,
"avg": 1701.75
}
},
"\"Neoplasms\"": {
"day": {
"max": 2135,
"min": 2,
"avg": 689.9424657534247
},
"week": {
"max": 3519,
"min": 1225,
"avg": 2726.6603773584907
},
"month": {
"max": 13160,
"min": 6198,
"avg": 10845.583333333334
}
},
"\"Respiratory+Tract+Diseases\"": {
"day": {
"max": 1109,
"min": 1,
"avg": 356.06575342465754
},
"week": {
"max": 1832,
"min": 624,
"avg": 1411.188679245283
},
"month": {
"max": 6824,
"min": 3360,
"avg": 5595.583333333333
}
},
"\"Cardiovascular+Diseases\"": {
"day": {
"max": 1091,
"min": 0,
"avg": 390.46301369863016
},
"week": {
"max": 2052,
"min": 608,
"avg": 1550.9433962264152
},
"month": {
"max": 7519,
"min": 3345,
"avg": 6134.416666666667
}
},
"\"Mental+Health\"": {
"day": {
"max": 79,
"min": 0,
"avg": 26.953424657534246
},
"week": {
"max": 164,
"min": 49,
"avg": 107.9622641509434
},
"month": {
"max": 532,
"min": 259,
"avg": 424.0
}
},
"\"Diabetes+Mellitus%2C+Type+1\"": {
"day": {
"max": 41,
"min": 0,
"avg": 12.991780821917809
},
"week": {
"max": 89,
"min": 19,
"avg": 51.886792452830186
},
"month": {
"max": 261,
"min": 101,
"avg": 203.5
}
},
"\"Diabetes+Mellitus%2C+Type+2\"": {
"day": {
"max": 168,
"min": 0,
"avg": 50.24109589041096
},
"week": {
"max": 280,
"min": 79,
"avg": 199.26415094339623
},
"month": {
"max": 1059,
"min": 440,
"avg": 789.5833333333334
}
}
}
No preview for this file type
......@@ -10,6 +10,14 @@ LABELS = [
"Diabetes type 2"
]
# LABELS = [
# "Neoplasms",
# "Diabetes Mellitus",
# "Male",
# "Blood Cells",
# "Arthritis, Infectious"
# ]
MODELS = [
"facebook/bart-large-mnli", # https://huggingface.co/facebook/bart-large-mnli
"MoritzLaurer/bge-m3-zeroshot-v2.0", # https://huggingface.co/MoritzLaurer/bge-m3-zeroshot-v2.0
......
File added
File added
import sys
import os
import json
import time
# Ajouter le répertoire parent au chemin de recherche
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../")))
from api.pubmedApi import getPubmedData
LABELS = [
"Noncommunicable Diseases",
"Diabetes",
"Cancer",
"Chronic respiratory disease",
"Cardiovascular diseases",
"Mental Health",
"Diabetes type 1",
"Diabetes type 2"
]
MESH = [
"Noncommunicable Diseases",
"Diabetes Mellitus",
"Neoplasms",
"Respiratory Tract Diseases",
"Cardiovascular Diseases",
"Mental Health",
"Diabetes Mellitus, Type 1",
"Diabetes Mellitus, Type 2"
]
MESH_TERMS = [
'"Noncommunicable+Diseases"[Mesh]', # NCDs (All)
'"Diabetes+Mellitus"[Mesh]', # Diabetes (type 1 or 2)
'"Neoplasms"[Mesh]', # Cancer
'"Respiratory+Tract+Diseases"[Mesh]', # Chronic respiratory disease
'"Cardiovascular+Diseases"[Mesh]', # Cardiovascular diseases
'"Mental+Health"[Mesh]', # Mental Health
'"Diabetes+Mellitus%2C+Type+1"[Mesh]', # Diabetes type 1
'"Diabetes+Mellitus%2C+Type+2"[Mesh]' # Diabetes type 2
]
date_min = "2024/01/01"
date_max = "2024/12/31"
for id_term, mesh_term in enumerate(MESH_TERMS):
data_list = getPubmedData(mesh_term, date_min, date_max, nb_items=1000)
data_store = []
i = 0
for data in data_list:
for id_mesh, mesh in enumerate(MESH):
if mesh in data["MeshTerms"]:
data["Predictions"].append(LABELS[id_mesh])
if len(data["Predictions"]) > 0:
data_store.append(data)
i+=1
if len(data_store) >= 20:
break
filename = LABELS[id_term].replace(" ", "_").replace(",", "").lower()
with open(f"./data/{filename}.json", "w") as json_file:
json.dump(data_store, json_file, indent=4)
time.sleep(1)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment