From e8e74d8f48894aa54c716b13776f7091b89852dd Mon Sep 17 00:00:00 2001 From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch> Date: Wed, 12 Mar 2025 00:59:03 +0100 Subject: [PATCH] Trying to get number of published articles on Pubmed (Problem: URI too long 414, not resolved), started looking at Ollama, started looking at HuggingFace's TextGeneration models and started looking at Tokenizers for price estimations --- .../PubMed/__pycache__/util.cpython-313.pyc | Bin 0 -> 1561 bytes dataSources/PubMed/data_keyword_num.py | 23 +- dataSources/PubMed/data_num.py | 287 +++++++++++++++-- .../PubMed/doc/data_num_keyword_no_mesh.json | 42 +++ .../results/tmp/\"Diabetes+Mellitus\".json" | 288 +++++++++++++++++- dataSources/PubMed/util.py | 28 ++ models/ZeroShotClassifier/HuggingFace/llm.py | 30 ++ .../tests/__pycache__/ollama.cpython-313.pyc | Bin 0 -> 584 bytes models/tests/ollama_test.py | 12 + models/tests/sources.txt | 20 ++ variables/__pycache__/pubmed.cpython-313.pyc | Bin 0 -> 5463 bytes variables/pubmed.py | 215 +++++++++++++ 12 files changed, 905 insertions(+), 40 deletions(-) create mode 100644 dataSources/PubMed/__pycache__/util.cpython-313.pyc create mode 100644 dataSources/PubMed/doc/data_num_keyword_no_mesh.json create mode 100644 dataSources/PubMed/util.py create mode 100644 models/ZeroShotClassifier/HuggingFace/llm.py create mode 100644 models/tests/__pycache__/ollama.cpython-313.pyc create mode 100644 models/tests/ollama_test.py create mode 100644 models/tests/sources.txt create mode 100644 variables/__pycache__/pubmed.cpython-313.pyc create mode 100644 variables/pubmed.py diff --git a/dataSources/PubMed/__pycache__/util.cpython-313.pyc b/dataSources/PubMed/__pycache__/util.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a66a1a424a46ddaa8cbc32cbaaaf6141e51b56cf GIT binary patch literal 1561 zcmey&%ge>Uz`zjGdm()Z69dCz5C?{tpp4IQ3=9lY8G;#t8NC^b7=xLl7>byJnWY$t zn1flQ7>ZcZ88um7f)x8DGlD26W@TVtU}j)o_-qH(9}N{|U<iVl3>RTw2m+}_;)9r> z48e@SOqNV444TY-Rcz6*8k$y0nvAztiWAdPHJNX*l@{gXWG3BWEl4aXPQArdSejpw z8efo8S`1QDl3Gzx404act#JK}{M=Oi%(BEheV6>?(%jU%l4AY5<P_bM#F9ka#Psx{ z)bzxX%=|pv#Jm*U<ebFf;>@(nWU!DvNKJ5lX;E@&v3@{ll5c8?erZW&j$T3KEw0j{ zocPqd<ouM>B2Ykryjv{8z`)SJaEDv4-=@>%I=9kAZlwh#D>5!{D_!6=zrbSt8RR#V zkYb006dNNbr1cr{7(*F?KyC*qMZ%^~wlIT7Dg#3vQ#8mzuvjQlGz*x)z`$S%4POnY zK%xZ$LnxClLl|>0lRiT+lQ4q^Cj&zsb1-8lGao}RlQDAulP@;|12=;LLmo>Wb0}jF z$VE`yFe;cih!e&}(4j1*P<O&;1qR;$1_m5vForTgY(!F>4zW3uQH7zHNr9o5DUUgg zL6gPr7Nb%XyHd1oYH>#FFIELdKNp3Sthd<85_3vZi*Ipd7H8%amn7yTr`}@8$t*6p z#g<r5keZirizO>RGjAp9E!L9MqTJ$J%tfiiw^%YuQgd%HX5L~h$t@@br8NZ*C<6KD z7H@iLNqlZ<aYj5yN0BrG0|Ofa14D5cI3jOw@iaI;5D=ZtGm&Q+UkA$_ZqW%Tb6l=- zD_-PQys52!S;%0O$%4cQffEWRB+n3^Y`4;+!})@c!3`e%ey>ii3FXskCe|!)zAUbB zjYsnVL~w@S43imZ3)nA!1wV2#2+DupW8mR$@cQ|Qn?cwR?85Z|j&^L=(+MB(=_Hec zbYerDbP~*hKc(<71hX15Bc*^4lF~~y*7U;WSH#P}z)-~tNikM=`Kc8Ju~keO3PpS% zaV7=LB7O!2h9Xf=)S{&TVR#AvYlEZ%Sq26MA&}xmBGQ4O>183aO(H8Cq3J;OvatF_ z5l})fgC>L-LNnaw`^@xNA$(az_ky(EilhtThSzwEP?H4j1#!J=Jo*?(!W^0;oS4`& zS#R+`N<KYs!B+(G47m6KC*2~DyH_$4af3VyPTw3hx%nxjIjMF<3JeSkpzK;K&cML% dftit!@iv3-Cl*FVi%)D^jQpP^xET4t8UZxIH(~$) literal 0 HcmV?d00001 diff --git a/dataSources/PubMed/data_keyword_num.py b/dataSources/PubMed/data_keyword_num.py index b3d14db47..2672a80dd 100644 --- a/dataSources/PubMed/data_keyword_num.py +++ b/dataSources/PubMed/data_keyword_num.py @@ -61,24 +61,27 @@ for term in TERMS: ncd_mesh = term + "[Mesh]" print("TERM: ", ncd_mesh) + keywords_term = "" + for keyword in KEYWORDS: - data[term][keyword] = {} keyword_term = keyword.replace(" ", "+").replace("&", "%26").replace("/", "%2F") print("KEYWORD: ", keyword_term) - search_term = ncd_mesh + '+AND+"' + keyword_term + '"' + keywords_term += keyword_term + "+OR+" + + search_term = ncd_mesh + "+AND+(+" + keywords_term + "+)" - print("SEARCH: ", search_term) + print("SEARCH: ", search_term) - for interval in INTERVALS: - print("INTERVAL: ", interval) - counts = get_count_for_year(2024, search_term, interval) - print(counts) - data[term][keyword][interval] = counts + for interval in INTERVALS: + print("INTERVAL: ", interval) + counts = get_count_for_year(2024, search_term, interval) + print(counts) + data[term][interval] = counts - with open(f"{RESULTS_DIR}/{term}.txt", "w+") as file: - print(data[term], file=file) + # with open(f"{RESULTS_DIR}/{term}.txt", "w+") as file: + # print(data[term], file=file) print("DATA: ", data) \ No newline at end of file diff --git a/dataSources/PubMed/data_num.py b/dataSources/PubMed/data_num.py index 31cef23f9..8421e93bc 100644 --- a/dataSources/PubMed/data_num.py +++ b/dataSources/PubMed/data_num.py @@ -1,21 +1,21 @@ +import sys +import os from requests import get from datetime import datetime, timedelta import time +import urllib.parse -TERMS = [ - '"Noncommunicable+Diseases"', # NCDs (All) - '"Diabetes+Mellitus"', # Diabetes (type 1 or 2) - '"Neoplasms"', # Cancer - '"Respiratory+Tract+Diseases"', # Chronic respiratory disease - '"Cardiovascular+Diseases"', # Cardiovascular diseases - '"Mental+Health"', # Mental Health - '"Diabetes+Mellitus%2C+Type+1"', # Diabetes type 1 - '"Diabetes+Mellitus%2C+Type+2"' # Diabetes type 2 -] +# Ajouter le rĂ©pertoire parent au chemin de recherche +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) + +from variables.pubmed import * +from dataSources.PubMed.util import * + +RESULTS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./results")) INTERVALS = [ - "day", - "week", + # "day", + # "week", "month" ] @@ -32,13 +32,24 @@ def get_count_for_year(year, term, interval = "month"): elif interval == "month": next_date = (current_date.replace(day=28) + timedelta(days=4)).replace(day=1) - url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={term}&retmode=json&mindate={current_date.strftime("%Y/%m/%d")}&maxdate={next_date.strftime("%Y/%m/%d")}&usehistory=y' - response = get(url) - search_res = response.json() - counts.append(int(search_res["esearchresult"]["count"])) + while (True): + try: + url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&api_key={PUBMED_API_KEY}&term={term}&retmode=json&mindate={current_date.strftime("%Y/%m/%d")}&maxdate={next_date.strftime("%Y/%m/%d")}' + print("Ok 1") + response = get(url) + print(response) + print("Ok 2") + search_res = response.json() + print("Ok 3") + counts.append(int(search_res["esearchresult"]["count"])) - current_date = next_date - time.sleep(1) # si plus de 3 requĂȘtes par seconde sinon adresse IP bann (normalement) + print("Ok 4") + current_date = next_date + break + except Exception as e: + print(e) + + time.sleep(0.2) max_count = max(counts) min_count = min(counts) @@ -46,17 +57,235 @@ def get_count_for_year(year, term, interval = "month"): return {"max": max_count, "min": min_count, "avg": avg_count} -data = {} +interval = "month" +tot = 0 +for ncd in NCDS: + search_term = url_encode(ncd) + print(f"SEARCH_TERM: {search_term}") + counts = get_count_for_year(2024, search_term, interval) + + tot += counts["avg"] + +search_term = url_encode(" OR ".join(NCDS)) +print(f"SEARCH_TERM: {search_term}") +counts = get_count_for_year(2024, search_term, interval) + + +print(tot) +print(counts["avg"]) + +exit(0) + +result = {} + +ncds_mesh = get_mesh_term(NCDS_MESH_TERM) +ncds_mesh_noexp = get_mesh_noexp_term(NCDS_MESH_TERM) + +print(url_encode(" OR ".join(ncds_mesh))) +print(url_encode(" OR ".join(ncds_mesh_noexp))) + +keywords_mesh = get_mesh_term(KEYWORDS_MESH_TERM) +keywords_mesh_noexp = get_mesh_noexp_term(KEYWORDS_MESH_TERM) + +print(url_encode(" OR ".join(keywords_mesh))) +print(url_encode(" OR ".join(keywords_mesh_noexp))) + +keywords_mesh_site_proposition = get_mesh_term(KEYWORDS_MESH_SITE_PROPOSITION) +keywords_mesh_site_proposition_noexp = get_mesh_noexp_term(KEYWORDS_MESH_SITE_PROPOSITION) + +print(url_encode(" OR ".join(keywords_mesh_site_proposition))) +print(url_encode(" OR ".join(keywords_mesh_site_proposition_noexp))) + +keywords_mesh_proposition = get_mesh_term(KEYWORDS_MESH_PROPOSITION) +keywords_mesh_proposition_noexp = get_mesh_noexp_term(KEYWORDS_MESH_PROPOSITION) + +print(url_encode(" OR ".join(keywords_mesh_proposition))) +print(url_encode(" OR ".join(keywords_mesh_proposition_noexp))) + +keywords_hubheading = [ f'"{subheading}"[Subheading]' for subheading in KEYWORDS_MESH_SUBHEADING] +keywords_hubheading_noexp = [ f'"{subheading}"[Subheading:noexp]' for subheading in KEYWORDS_MESH_SUBHEADING] + +print(url_encode(" OR ".join(keywords_hubheading))) +print(url_encode(" OR ".join(keywords_hubheading_noexp))) + +for interval in INTERVALS: + result[interval] = {} + + for ncd in NCDS: + result[interval][ncd] = {} + + search_term = url_encode(ncd) + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd]["WITHOUT_KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd} AND ( " + " OR ".join(KEYWORDS) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd]["KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd} AND ( " + " OR ".join(keywords_mesh) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd]["KEYWORDS_MESH"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd} AND ( " + " OR ".join(keywords_mesh_noexp) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd]["KEYWORDS_MESH_NOEXP"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd} AND ( " + " OR ".join(keywords_mesh + keywords_hubheading + keywords_mesh_site_proposition + keywords_mesh_proposition) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd]["KEYWORDS_MESH_ALL"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd} AND ( " + " OR ".join(keywords_mesh_noexp + keywords_hubheading_noexp + keywords_mesh_site_proposition_noexp + keywords_mesh_proposition_noexp) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd]["KEYWORDS_MESH_NOEXP_ALL"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(" OR ".join(NCDS)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS"]["WITHOUT_KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd} AND ( {" OR ".join(KEYWORDS)} ) )'] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS"]["KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd} AND ( {" OR ".join(keywords_mesh)} ) )'] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS"]["KEYWORDS_MESH"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd} AND ( {" OR ".join(keywords_mesh_noexp)} ) )'] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS"]["KEYWORDS_MESH_NOEXP"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd} AND ( {" OR ".join(keywords_mesh + keywords_hubheading + keywords_mesh_site_proposition + keywords_mesh_proposition)} ) )'] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS"]["KEYWORDS_MESH_ALL"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd} AND ( {" OR ".join(keywords_mesh_noexp + keywords_hubheading_noexp + keywords_mesh_site_proposition_noexp + keywords_mesh_proposition_noexp)} ) )'] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS"]["KEYWORDS_MESH_NOEXP_ALL"] = get_count_for_year(2024, search_term, interval) + + # ------------------------------------ + + for ncd_mesh in ncds_mesh: + result[interval][ncd_mesh] = {} + + search_term = url_encode(ncd_mesh) + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh]["WITHOUT_KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh} AND ( " + " OR ".join(KEYWORDS) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh]["KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh} AND ( " + " OR ".join(keywords_mesh) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh]["KEYWORDS_MESH"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh} AND ( " + " OR ".join(keywords_mesh_noexp) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh]["KEYWORDS_MESH_NOEXP"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh} AND ( " + " OR ".join(keywords_mesh + keywords_hubheading + keywords_mesh_site_proposition + keywords_mesh_proposition) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh]["KEYWORDS_MESH_ALL"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh} AND ( " + " OR ".join(keywords_mesh_noexp + keywords_hubheading_noexp + keywords_mesh_site_proposition_noexp + keywords_mesh_proposition_noexp) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh]["KEYWORDS_MESH_NOEXP_ALL"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(" OR ".join(ncds_mesh)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH"]["WITHOUT_KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh} AND ( {" OR ".join(KEYWORDS)} ) )' for ncd_mesh in ncds_mesh] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH"]["KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh} AND ( {" OR ".join(keywords_mesh)} ) )' for ncd_mesh in ncds_mesh] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH"]["KEYWORDS_MESH"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh} AND ( {" OR ".join(keywords_mesh_noexp)} ) )' for ncd_mesh in ncds_mesh] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH"]["KEYWORDS_MESH_NOEXP"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh} AND ( {" OR ".join(keywords_mesh + keywords_hubheading + keywords_mesh_site_proposition + keywords_mesh_proposition)} ) )' for ncd_mesh in ncds_mesh] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH"]["KEYWORDS_MESH_ALL"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh} AND ( {" OR ".join(keywords_mesh_noexp + keywords_hubheading_noexp + keywords_mesh_site_proposition_noexp + keywords_mesh_proposition_noexp)} ) )' for ncd_mesh in ncds_mesh] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH"]["KEYWORDS_MESH_NOEXP_ALL"] = get_count_for_year(2024, search_term, interval) + + # ---------------------------------------- + + for ncd_mesh_noexp in ncds_mesh_noexp: + result[interval][ncd_mesh_noexp] = {} + + search_term = url_encode(ncd_mesh_noexp) + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh_noexp]["WITHOUT_KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh_noexp} AND ( " + " OR ".join(KEYWORDS) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh_noexp]["KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh_noexp} AND ( " + " OR ".join(keywords_mesh) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh_noexp]["KEYWORDS_MESH"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh_noexp} AND ( " + " OR ".join(keywords_mesh_noexp) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh_noexp]["KEYWORDS_MESH_NOEXP"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh_noexp} AND ( " + " OR ".join(keywords_mesh + keywords_hubheading + keywords_mesh_site_proposition + keywords_mesh_proposition) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh_noexp]["KEYWORDS_MESH_ALL"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(f"{ncd_mesh_noexp} AND ( " + " OR ".join(keywords_mesh_noexp + keywords_hubheading_noexp + keywords_mesh_site_proposition_noexp + keywords_mesh_proposition_noexp) + " )") + print(f"SEARCH_TERM: {search_term}") + result[interval][ncd_mesh_noexp]["KEYWORDS_MESH_NOEXP_ALL"] = get_count_for_year(2024, search_term, interval) + + search_term = url_encode(" OR ".join(ncds_mesh_noexp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH NOEXP"]["WITHOUT_KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh_noexp} AND ( {" OR ".join(KEYWORDS)} ) )' for ncd_mesh_noexp in ncds_mesh_noexp] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH NOEXP"]["KEYWORDS"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh_noexp} AND ( {" OR ".join(keywords_mesh)} ) )' for ncd_mesh_noexp in ncds_mesh_noexp] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH NOEXP"]["KEYWORDS_MESH"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh_noexp} AND ( {" OR ".join(keywords_mesh_noexp)} ) )' for ncd_mesh_noexp in ncds_mesh_noexp] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH NOEXP"]["KEYWORDS_MESH_NOEXP"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh_noexp} AND ( {" OR ".join(keywords_mesh + keywords_hubheading + keywords_mesh_site_proposition + keywords_mesh_proposition)} ) )' for ncd_mesh_noexp in ncds_mesh_noexp] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH NOEXP"]["KEYWORDS_MESH_ALL"] = get_count_for_year(2024, search_term, interval) + + tmp = [f'( {ncd_mesh_noexp} AND ( {" OR ".join(keywords_mesh_noexp + keywords_hubheading_noexp + keywords_mesh_site_proposition_noexp + keywords_mesh_proposition_noexp)} ) )' for ncd_mesh_noexp in ncds_mesh_noexp] + search_term = url_encode(" OR ".join(tmp)) + print(f"SEARCH_TERM: {search_term}") + result[interval]["ALL NCDS MESH NOEXP"]["KEYWORDS_MESH_NOEXP_ALL"] = get_count_for_year(2024, search_term, interval) -for term in TERMS: - data[term] = {} - mesh = term + "[Mesh]" - print("TERM: ", mesh) +print(result) - for interval in INTERVALS: - print("INTERVAL: ", interval) - counts = get_count_for_year(2024, mesh, interval) - print(counts) - data[term][interval] = counts +with open(f"{RESULTS_DIR}/results.json", "w+") as json_file: + json.dump(result, json_file, indent=4) -print(data) \ No newline at end of file +print("END") \ No newline at end of file diff --git a/dataSources/PubMed/doc/data_num_keyword_no_mesh.json b/dataSources/PubMed/doc/data_num_keyword_no_mesh.json new file mode 100644 index 000000000..439a04c1e --- /dev/null +++ b/dataSources/PubMed/doc/data_num_keyword_no_mesh.json @@ -0,0 +1,42 @@ +{ + 'Noncommunicable+Diseases': { + 'month': { + 'max': 47, 'min': 17, 'avg': 34.0 + } + }, + 'Diabetes+Mellitus': { + 'month': { + 'max': 1622, 'min': 1015, 'avg': 1324.0833333333333 + } + }, + 'Neoplasms': { + 'month': { + 'max': 8468, 'min': 5817, 'avg': 7558.916666666667 + } + }, + 'Respiratory+Tract+Diseases': { + 'month': { + 'max': 4927, 'min': 3119, 'avg': 4292.583333333333 + } + }, + 'Cardiovascular+Diseases': { + 'month': { + 'max': 4565, 'min': 2981, 'avg': 4083.0833333333335 + } + }, + 'Mental+Health': { + 'month': { + 'max': 421, 'min': 269, 'avg': 358.0833333333333 + } + }, + 'Diabetes+Mellitus%2C+Type+1': { + 'month': { + 'max': 221, 'min': 121, 'avg': 173.33333333333334 + } + }, + 'Diabetes+Mellitus%2C+Type+2': { + 'month': { + 'max': 750, 'min': 447, 'avg': 608.4166666666666 + } + } +} \ No newline at end of file diff --git "a/dataSources/PubMed/results/tmp/\"Diabetes+Mellitus\".json" "b/dataSources/PubMed/results/tmp/\"Diabetes+Mellitus\".json" index 2f1225b2c..f2cd35778 100644 --- "a/dataSources/PubMed/results/tmp/\"Diabetes+Mellitus\".json" +++ "b/dataSources/PubMed/results/tmp/\"Diabetes+Mellitus\".json" @@ -1 +1,287 @@ -{'Availability': {'week': {'max': 18, 'min': 2, 'avg': 6.452830188679245}, 'month': {'max': 39, 'min': 16, 'avg': 24.916666666666668}}, 'Affordability': {'week': {'max': 3, 'min': 0, 'avg': 0.5094339622641509}, 'month': {'max': 6, 'min': 0, 'avg': 2.1666666666666665}}, 'Essential medecins': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Care therapy': {'week': {'max': 1, 'min': 0, 'avg': 0.09433962264150944}, 'month': {'max': 2, 'min': 0, 'avg': 0.4166666666666667}}, 'Care health': {'week': {'max': 3, 'min': 0, 'avg': 0.7924528301886793}, 'month': {'max': 7, 'min': 1, 'avg': 2.9166666666666665}}, 'Health Expenditures': {'week': {'max': 3, 'min': 0, 'avg': 0.8113207547169812}, 'month': {'max': 6, 'min': 0, 'avg': 3.1666666666666665}}, 'Health care costs': {'week': {'max': 4, 'min': 0, 'avg': 1.320754716981132}, 'month': {'max': 8, 'min': 3, 'avg': 5.5}}, 'Market': {'week': {'max': 7, 'min': 0, 'avg': 1.6603773584905661}, 'month': {'max': 16, 'min': 3, 'avg': 6.333333333333333}}, 'Special populations': {'week': {'max': 2, 'min': 0, 'avg': 0.1320754716981132}, 'month': {'max': 3, 'min': 0, 'avg': 0.6666666666666666}}, 'Child Health': {'week': {'max': 15, 'min': 4, 'avg': 8.735849056603774}, 'month': {'max': 41, 'min': 28, 'avg': 33.666666666666664}}, 'Womens Health': {'week': {'max': 8, 'min': 0, 'avg': 2.452830188679245}, 'month': {'max': 17, 'min': 5, 'avg': 9.916666666666666}}, 'Age': {'week': {'max': 157, 'min': 38, 'avg': 103.0754716981132}, 'month': {'max': 532, 'min': 281, 'avg': 408.0}}, 'Minority': {'week': {'max': 7, 'min': 0, 'avg': 2.660377358490566}, 'month': {'max': 16, 'min': 5, 'avg': 10.916666666666666}}, 'Primary Care': {'week': {'max': 27, 'min': 3, 'avg': 14.69811320754717}, 'month': {'max': 73, 'min': 42, 'avg': 57.916666666666664}}, 'Specialty Care': {'week': {'max': 2, 'min': 0, 'avg': 0.16981132075471697}, 'month': {'max': 2, 'min': 0, 'avg': 0.6666666666666666}}, 'Patient acceptance': {'week': {'max': 5, 'min': 0, 'avg': 1.3773584905660377}, 'month': {'max': 10, 'min': 2, 'avg': 5.166666666666667}}, 'Patient centered care': {'week': {'max': 4, 'min': 0, 'avg': 0.9433962264150944}, 'month': {'max': 7, 'min': 2, 'avg': 3.8333333333333335}}, 'Prevention and control': {'week': {'max': 48, 'min': 16, 'avg': 32.0377358490566}, 'month': {'max': 163, 'min': 93, 'avg': 127.25}}, 'Mass screening': {'week': {'max': 9, 'min': 0, 'avg': 4.509433962264151}, 'month': {'max': 27, 'min': 8, 'avg': 17.583333333333332}}, 'Palliative care': {'week': {'max': 5, 'min': 0, 'avg': 0.9433962264150944}, 'month': {'max': 7, 'min': 0, 'avg': 3.75}}, 'Quality': {'week': {'max': 59, 'min': 14, 'avg': 44.0377358490566}, 'month': {'max': 226, 'min': 121, 'avg': 173.25}}, 'Telemedicine': {'week': {'max': 11, 'min': 1, 'avg': 4.09433962264151}, 'month': {'max': 26, 'min': 10, 'avg': 16.166666666666668}}, 'Digital health': {'week': {'max': 5, 'min': 0, 'avg': 2.2452830188679247}, 'month': {'max': 15, 'min': 4, 'avg': 9.083333333333334}}, 'Supplies': {'week': {'max': 4, 'min': 0, 'avg': 0.9811320754716981}, 'month': {'max': 10, 'min': 2, 'avg': 4.0}}, 'Human Resources': {'week': {'max': 2, 'min': 0, 'avg': 0.49056603773584906}, 'month': {'max': 6, 'min': 1, 'avg': 1.9166666666666667}}, 'Enablers/barriers': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Gender equity': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Racial': {'week': {'max': 8, 'min': 0, 'avg': 3.056603773584906}, 'month': {'max': 24, 'min': 4, 'avg': 13.0}}, 'Equity': {'week': {'max': 9, 'min': 1, 'avg': 5.113207547169812}, 'month': {'max': 25, 'min': 11, 'avg': 19.25}}, 'Clinical': {'week': {'max': 273, 'min': 93, 'avg': 195.56603773584905}, 'month': {'max': 985, 'min': 635, 'avg': 765.5}}, 'Health promotion': {'week': {'max': 9, 'min': 1, 'avg': 5.2075471698113205}, 'month': {'max': 29, 'min': 17, 'avg': 21.166666666666668}}, 'Health education': {'week': {'max': 13, 'min': 0, 'avg': 3.3773584905660377}, 'month': {'max': 26, 'min': 8, 'avg': 13.666666666666666}}, 'Research & Innovation': {'week': {'max': 3, 'min': 0, 'avg': 0.7735849056603774}, 'month': {'max': 7, 'min': 0, 'avg': 3.0}}, 'Therapeutic Development': {'week': {'max': 1, 'min': 0, 'avg': 0.11320754716981132}, 'month': {'max': 2, 'min': 0, 'avg': 0.5}}, 'Technological Development': {'week': {'max': 3, 'min': 0, 'avg': 0.6226415094339622}, 'month': {'max': 5, 'min': 1, 'avg': 2.8333333333333335}}, 'Self-management': {'week': {'max': 16, 'min': 3, 'avg': 9.037735849056604}, 'month': {'max': 47, 'min': 28, 'avg': 35.75}}, 'Self-monitoring': {'week': {'max': 39, 'min': 6, 'avg': 15.622641509433961}, 'month': {'max': 86, 'min': 39, 'avg': 60.333333333333336}}, 'Dosing': {'week': {'max': 7, 'min': 0, 'avg': 2.452830188679245}, 'month': {'max': 14, 'min': 6, 'avg': 10.083333333333334}}, 'Injections': {'week': {'max': 20, 'min': 3, 'avg': 9.471698113207546}, 'month': {'max': 44, 'min': 28, 'avg': 37.083333333333336}}, 'Secondary Care': {'week': {'max': 3, 'min': 0, 'avg': 0.49056603773584906}, 'month': {'max': 6, 'min': 0, 'avg': 2.0}}, 'Integrated Care': {'week': {'max': 6, 'min': 0, 'avg': 1.2452830188679245}, 'month': {'max': 9, 'min': 2, 'avg': 4.75}}, 'Treatment management': {'week': {'max': 3, 'min': 0, 'avg': 0.2641509433962264}, 'month': {'max': 4, 'min': 0, 'avg': 1.0}}, 'Immunization': {'week': {'max': 2, 'min': 0, 'avg': 0.41509433962264153}, 'month': {'max': 4, 'min': 0, 'avg': 1.6666666666666667}}, 'Vaccination': {'week': {'max': 4, 'min': 0, 'avg': 1.490566037735849}, 'month': {'max': 11, 'min': 1, 'avg': 5.583333333333333}}, 'Adherence': {'week': {'max': 23, 'min': 6, 'avg': 13.528301886792454}, 'month': {'max': 69, 'min': 41, 'avg': 52.5}}, 'Control': {'week': {'max': 178, 'min': 67, 'avg': 136.9433962264151}, 'month': {'max': 621, 'min': 416, 'avg': 541.0}}, 'Rehabilitation services': {'week': {'max': 1, 'min': 0, 'avg': 0.09433962264150944}, 'month': {'max': 1, 'min': 0, 'avg': 0.3333333333333333}}, 'Clinical guidelines': {'week': {'max': 4, 'min': 0, 'avg': 1.6415094339622642}, 'month': {'max': 11, 'min': 2, 'avg': 6.416666666666667}}, 'Health policy': {'week': {'max': 11, 'min': 0, 'avg': 5.245283018867925}, 'month': {'max': 30, 'min': 12, 'avg': 20.833333333333332}}, 'Healthcare policy': {'week': {'max': 1, 'min': 0, 'avg': 0.22641509433962265}, 'month': {'max': 3, 'min': 0, 'avg': 0.9166666666666666}}, 'National health policy': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Regional health policy': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Health legislation': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Policy evaluation': {'week': {'max': 1, 'min': 0, 'avg': 0.1320754716981132}, 'month': {'max': 1, 'min': 0, 'avg': 0.5}}, 'Policy analysis': {'week': {'max': 2, 'min': 0, 'avg': 0.09433962264150944}, 'month': {'max': 2, 'min': 0, 'avg': 0.4166666666666667}}, 'Policy formulation': {'week': {'max': 1, 'min': 0, 'avg': 0.018867924528301886}, 'month': {'max': 1, 'min': 0, 'avg': 0.08333333333333333}}, 'Regulation': {'week': {'max': 39, 'min': 11, 'avg': 24.60377358490566}, 'month': {'max': 109, 'min': 85, 'avg': 95.83333333333333}}, 'Governance': {'week': {'max': 2, 'min': 0, 'avg': 0.49056603773584906}, 'month': {'max': 5, 'min': 0, 'avg': 1.8333333333333333}}, 'Global initiatives and organizations ': {'week': {'max': 2, 'min': 0, 'avg': 0.2830188679245283}, 'month': {'max': 2, 'min': 0, 'avg': 1.0833333333333333}}, 'Universal Health Care': {'week': {'max': 1, 'min': 0, 'avg': 0.05660377358490566}, 'month': {'max': 1, 'min': 0, 'avg': 0.25}}, 'Expansion': {'week': {'max': 8, 'min': 0, 'avg': 2.0943396226415096}, 'month': {'max': 16, 'min': 2, 'avg': 8.333333333333334}}, 'Health insurance': {'week': {'max': 10, 'min': 0, 'avg': 5.09433962264151}, 'month': {'max': 29, 'min': 13, 'avg': 19.5}}, 'Coverage': {'week': {'max': 6, 'min': 0, 'avg': 2.9245283018867925}, 'month': {'max': 17, 'min': 5, 'avg': 11.166666666666666}}, 'Funding and investment': {'week': {'max': 3, 'min': 0, 'avg': 0.5849056603773585}, 'month': {'max': 5, 'min': 0, 'avg': 2.3333333333333335}}, 'Health planning': {'week': {'max': 1, 'min': 0, 'avg': 0.11320754716981132}, 'month': {'max': 2, 'min': 0, 'avg': 0.5833333333333334}}, 'Health reform': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Policy monitoring': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Public health campaign': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Policy lobbying': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Patient advocacy': {'week': {'max': 2, 'min': 0, 'avg': 0.1320754716981132}, 'month': {'max': 2, 'min': 0, 'avg': 0.5833333333333334}}, 'Justice': {'week': {'max': 2, 'min': 0, 'avg': 0.24528301886792453}, 'month': {'max': 3, 'min': 0, 'avg': 0.9166666666666666}}, 'Awareness campaign': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Education': {'week': {'max': 88, 'min': 33, 'avg': 55.924528301886795}, 'month': {'max': 292, 'min': 175, 'avg': 221.58333333333334}}, 'Corporate accountability': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Social determinants of health': {'week': {'max': 5, 'min': 0, 'avg': 2.69811320754717}, 'month': {'max': 14, 'min': 8, 'avg': 10.75}}, 'Empowerment': {'week': {'max': 5, 'min': 0, 'avg': 0.9622641509433962}, 'month': {'max': 5, 'min': 1, 'avg': 3.3333333333333335}}, 'Community': {'week': {'max': 40, 'min': 8, 'avg': 24.566037735849058}, 'month': {'max': 120, 'min': 71, 'avg': 96.16666666666667}}, 'Peer support': {'week': {'max': 4, 'min': 0, 'avg': 0.8679245283018868}, 'month': {'max': 5, 'min': 1, 'avg': 3.3333333333333335}}, 'Civil society': {'week': {'max': 1, 'min': 0, 'avg': 0.03773584905660377}, 'month': {'max': 1, 'min': 0, 'avg': 0.16666666666666666}}, 'Patient education': {'week': {'max': 11, 'min': 1, 'avg': 4.981132075471698}, 'month': {'max': 28, 'min': 11, 'avg': 19.166666666666668}}, 'Parent education': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Educational materials': {'week': {'max': 1, 'min': 0, 'avg': 0.1509433962264151}, 'month': {'max': 2, 'min': 0, 'avg': 0.6666666666666666}}, 'Community heatlh education': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}, 'Awareness ': {'week': {'max': 14, 'min': 2, 'avg': 6.547169811320755}, 'month': {'max': 38, 'min': 18, 'avg': 25.333333333333332}}, 'Community engagement': {'week': {'max': 2, 'min': 0, 'avg': 0.24528301886792453}, 'month': {'max': 2, 'min': 0, 'avg': 1.0}}, 'Health literacy': {'week': {'max': 5, 'min': 0, 'avg': 1.5471698113207548}, 'month': {'max': 10, 'min': 3, 'avg': 6.25}}, 'Medical education': {'week': {'max': 6, 'min': 0, 'avg': 3.5849056603773586}, 'month': {'max': 20, 'min': 6, 'avg': 14.25}}, 'Training program': {'week': {'max': 5, 'min': 0, 'avg': 1.4150943396226414}, 'month': {'max': 11, 'min': 2, 'avg': 5.833333333333333}}, 'Technology education': {'week': {'max': 1, 'min': 0, 'avg': 0.05660377358490566}, 'month': {'max': 1, 'min': 0, 'avg': 0.25}}, 'Medical devices': {'week': {'max': 5, 'min': 0, 'avg': 1.3962264150943395}, 'month': {'max': 8, 'min': 2, 'avg': 5.416666666666667}}, 'Information Dissemination': {'week': {'max': 2, 'min': 0, 'avg': 0.11320754716981132}, 'month': {'max': 2, 'min': 0, 'avg': 0.4166666666666667}}, 'Behavioral change': {'week': {'max': 1, 'min': 0, 'avg': 0.20754716981132076}, 'month': {'max': 3, 'min': 0, 'avg': 1.0833333333333333}}, 'Nutrition education': {'week': {'max': 3, 'min': 0, 'avg': 0.5660377358490566}, 'month': {'max': 4, 'min': 1, 'avg': 2.3333333333333335}}, 'Risk communication': {'week': {'max': 1, 'min': 0, 'avg': 0.07547169811320754}, 'month': {'max': 1, 'min': 0, 'avg': 0.25}}, 'Sector integration': {'week': {'max': 0, 'min': 0, 'avg': 0.0}, 'month': {'max': 0, 'min': 0, 'avg': 0.0}}} +{'Availability': {'week': {'max': 18, 'min': 2, 'avg': 6.452830188679245 + }, 'month': {'max': 39, 'min': 16, 'avg': 24.916666666666668 + } + }, 'Affordability': {'week': {'max': 3, 'min': 0, 'avg': 0.5094339622641509 + }, 'month': {'max': 6, 'min': 0, 'avg': 2.1666666666666665 + } + }, 'Essential medecins': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Care therapy': {'week': {'max': 1, 'min': 0, 'avg': 0.09433962264150944 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.4166666666666667 + } + }, 'Care health': {'week': {'max': 3, 'min': 0, 'avg': 0.7924528301886793 + }, 'month': {'max': 7, 'min': 1, 'avg': 2.9166666666666665 + } + }, 'Health Expenditures': {'week': {'max': 3, 'min': 0, 'avg': 0.8113207547169812 + }, 'month': {'max': 6, 'min': 0, 'avg': 3.1666666666666665 + } + }, 'Health care costs': {'week': {'max': 4, 'min': 0, 'avg': 1.320754716981132 + }, 'month': {'max': 8, 'min': 3, 'avg': 5.5 + } + }, 'Market': {'week': {'max': 7, 'min': 0, 'avg': 1.6603773584905661 + }, 'month': {'max': 16, 'min': 3, 'avg': 6.333333333333333 + } + }, 'Special populations': {'week': {'max': 2, 'min': 0, 'avg': 0.1320754716981132 + }, 'month': {'max': 3, 'min': 0, 'avg': 0.6666666666666666 + } + }, 'Child Health': {'week': {'max': 15, 'min': 4, 'avg': 8.735849056603774 + }, 'month': {'max': 41, 'min': 28, 'avg': 33.666666666666664 + } + }, 'Womens Health': {'week': {'max': 8, 'min': 0, 'avg': 2.452830188679245 + }, 'month': {'max': 17, 'min': 5, 'avg': 9.916666666666666 + } + }, 'Age': {'week': {'max': 157, 'min': 38, 'avg': 103.0754716981132 + }, 'month': {'max': 532, 'min': 281, 'avg': 408.0 + } + }, 'Minority': {'week': {'max': 7, 'min': 0, 'avg': 2.660377358490566 + }, 'month': {'max': 16, 'min': 5, 'avg': 10.916666666666666 + } + }, 'Primary Care': {'week': {'max': 27, 'min': 3, 'avg': 14.69811320754717 + }, 'month': {'max': 73, 'min': 42, 'avg': 57.916666666666664 + } + }, 'Specialty Care': {'week': {'max': 2, 'min': 0, 'avg': 0.16981132075471697 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.6666666666666666 + } + }, 'Patient acceptance': {'week': {'max': 5, 'min': 0, 'avg': 1.3773584905660377 + }, 'month': {'max': 10, 'min': 2, 'avg': 5.166666666666667 + } + }, 'Patient centered care': {'week': {'max': 4, 'min': 0, 'avg': 0.9433962264150944 + }, 'month': {'max': 7, 'min': 2, 'avg': 3.8333333333333335 + } + }, 'Prevention and control': {'week': {'max': 48, 'min': 16, 'avg': 32.0377358490566 + }, 'month': {'max': 163, 'min': 93, 'avg': 127.25 + } + }, 'Mass screening': {'week': {'max': 9, 'min': 0, 'avg': 4.509433962264151 + }, 'month': {'max': 27, 'min': 8, 'avg': 17.583333333333332 + } + }, 'Palliative care': {'week': {'max': 5, 'min': 0, 'avg': 0.9433962264150944 + }, 'month': {'max': 7, 'min': 0, 'avg': 3.75 + } + }, 'Quality': {'week': {'max': 59, 'min': 14, 'avg': 44.0377358490566 + }, 'month': {'max': 226, 'min': 121, 'avg': 173.25 + } + }, 'Telemedicine': {'week': {'max': 11, 'min': 1, 'avg': 4.09433962264151 + }, 'month': {'max': 26, 'min': 10, 'avg': 16.166666666666668 + } + }, 'Digital health': {'week': {'max': 5, 'min': 0, 'avg': 2.2452830188679247 + }, 'month': {'max': 15, 'min': 4, 'avg': 9.083333333333334 + } + }, 'Supplies': {'week': {'max': 4, 'min': 0, 'avg': 0.9811320754716981 + }, 'month': {'max': 10, 'min': 2, 'avg': 4.0 + } + }, 'Human Resources': {'week': {'max': 2, 'min': 0, 'avg': 0.49056603773584906 + }, 'month': {'max': 6, 'min': 1, 'avg': 1.9166666666666667 + } + }, 'Enablers/barriers': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Gender equity': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Racial': {'week': {'max': 8, 'min': 0, 'avg': 3.056603773584906 + }, 'month': {'max': 24, 'min': 4, 'avg': 13.0 + } + }, 'Equity': {'week': {'max': 9, 'min': 1, 'avg': 5.113207547169812 + }, 'month': {'max': 25, 'min': 11, 'avg': 19.25 + } + }, 'Clinical': {'week': {'max': 273, 'min': 93, 'avg': 195.56603773584905 + }, 'month': {'max': 985, 'min': 635, 'avg': 765.5 + } + }, 'Health promotion': {'week': {'max': 9, 'min': 1, 'avg': 5.2075471698113205 + }, 'month': {'max': 29, 'min': 17, 'avg': 21.166666666666668 + } + }, 'Health education': {'week': {'max': 13, 'min': 0, 'avg': 3.3773584905660377 + }, 'month': {'max': 26, 'min': 8, 'avg': 13.666666666666666 + } + }, 'Research & Innovation': {'week': {'max': 3, 'min': 0, 'avg': 0.7735849056603774 + }, 'month': {'max': 7, 'min': 0, 'avg': 3.0 + } + }, 'Therapeutic Development': {'week': {'max': 1, 'min': 0, 'avg': 0.11320754716981132 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.5 + } + }, 'Technological Development': {'week': {'max': 3, 'min': 0, 'avg': 0.6226415094339622 + }, 'month': {'max': 5, 'min': 1, 'avg': 2.8333333333333335 + } + }, 'Self-management': {'week': {'max': 16, 'min': 3, 'avg': 9.037735849056604 + }, 'month': {'max': 47, 'min': 28, 'avg': 35.75 + } + }, 'Self-monitoring': {'week': {'max': 39, 'min': 6, 'avg': 15.622641509433961 + }, 'month': {'max': 86, 'min': 39, 'avg': 60.333333333333336 + } + }, 'Dosing': {'week': {'max': 7, 'min': 0, 'avg': 2.452830188679245 + }, 'month': {'max': 14, 'min': 6, 'avg': 10.083333333333334 + } + }, 'Injections': {'week': {'max': 20, 'min': 3, 'avg': 9.471698113207546 + }, 'month': {'max': 44, 'min': 28, 'avg': 37.083333333333336 + } + }, 'Secondary Care': {'week': {'max': 3, 'min': 0, 'avg': 0.49056603773584906 + }, 'month': {'max': 6, 'min': 0, 'avg': 2.0 + } + }, 'Integrated Care': {'week': {'max': 6, 'min': 0, 'avg': 1.2452830188679245 + }, 'month': {'max': 9, 'min': 2, 'avg': 4.75 + } + }, 'Treatment management': {'week': {'max': 3, 'min': 0, 'avg': 0.2641509433962264 + }, 'month': {'max': 4, 'min': 0, 'avg': 1.0 + } + }, 'Immunization': {'week': {'max': 2, 'min': 0, 'avg': 0.41509433962264153 + }, 'month': {'max': 4, 'min': 0, 'avg': 1.6666666666666667 + } + }, 'Vaccination': {'week': {'max': 4, 'min': 0, 'avg': 1.490566037735849 + }, 'month': {'max': 11, 'min': 1, 'avg': 5.583333333333333 + } + }, 'Adherence': {'week': {'max': 23, 'min': 6, 'avg': 13.528301886792454 + }, 'month': {'max': 69, 'min': 41, 'avg': 52.5 + } + }, 'Control': {'week': {'max': 178, 'min': 67, 'avg': 136.9433962264151 + }, 'month': {'max': 621, 'min': 416, 'avg': 541.0 + } + }, 'Rehabilitation services': {'week': {'max': 1, 'min': 0, 'avg': 0.09433962264150944 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.3333333333333333 + } + }, 'Clinical guidelines': {'week': {'max': 4, 'min': 0, 'avg': 1.6415094339622642 + }, 'month': {'max': 11, 'min': 2, 'avg': 6.416666666666667 + } + }, 'Health policy': {'week': {'max': 11, 'min': 0, 'avg': 5.245283018867925 + }, 'month': {'max': 30, 'min': 12, 'avg': 20.833333333333332 + } + }, 'Healthcare policy': {'week': {'max': 1, 'min': 0, 'avg': 0.22641509433962265 + }, 'month': {'max': 3, 'min': 0, 'avg': 0.9166666666666666 + } + }, 'National health policy': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Regional health policy': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Health legislation': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Policy evaluation': {'week': {'max': 1, 'min': 0, 'avg': 0.1320754716981132 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.5 + } + }, 'Policy analysis': {'week': {'max': 2, 'min': 0, 'avg': 0.09433962264150944 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.4166666666666667 + } + }, 'Policy formulation': {'week': {'max': 1, 'min': 0, 'avg': 0.018867924528301886 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.08333333333333333 + } + }, 'Regulation': {'week': {'max': 39, 'min': 11, 'avg': 24.60377358490566 + }, 'month': {'max': 109, 'min': 85, 'avg': 95.83333333333333 + } + }, 'Governance': {'week': {'max': 2, 'min': 0, 'avg': 0.49056603773584906 + }, 'month': {'max': 5, 'min': 0, 'avg': 1.8333333333333333 + } + }, 'Global initiatives and organizations ': {'week': {'max': 2, 'min': 0, 'avg': 0.2830188679245283 + }, 'month': {'max': 2, 'min': 0, 'avg': 1.0833333333333333 + } + }, 'Universal Health Care': {'week': {'max': 1, 'min': 0, 'avg': 0.05660377358490566 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.25 + } + }, 'Expansion': {'week': {'max': 8, 'min': 0, 'avg': 2.0943396226415096 + }, 'month': {'max': 16, 'min': 2, 'avg': 8.333333333333334 + } + }, 'Health insurance': {'week': {'max': 10, 'min': 0, 'avg': 5.09433962264151 + }, 'month': {'max': 29, 'min': 13, 'avg': 19.5 + } + }, 'Coverage': {'week': {'max': 6, 'min': 0, 'avg': 2.9245283018867925 + }, 'month': {'max': 17, 'min': 5, 'avg': 11.166666666666666 + } + }, 'Funding and investment': {'week': {'max': 3, 'min': 0, 'avg': 0.5849056603773585 + }, 'month': {'max': 5, 'min': 0, 'avg': 2.3333333333333335 + } + }, 'Health planning': {'week': {'max': 1, 'min': 0, 'avg': 0.11320754716981132 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.5833333333333334 + } + }, 'Health reform': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Policy monitoring': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Public health campaign': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Policy lobbying': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Patient advocacy': {'week': {'max': 2, 'min': 0, 'avg': 0.1320754716981132 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.5833333333333334 + } + }, 'Justice': {'week': {'max': 2, 'min': 0, 'avg': 0.24528301886792453 + }, 'month': {'max': 3, 'min': 0, 'avg': 0.9166666666666666 + } + }, 'Awareness campaign': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Education': {'week': {'max': 88, 'min': 33, 'avg': 55.924528301886795 + }, 'month': {'max': 292, 'min': 175, 'avg': 221.58333333333334 + } + }, 'Corporate accountability': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Social determinants of health': {'week': {'max': 5, 'min': 0, 'avg': 2.69811320754717 + }, 'month': {'max': 14, 'min': 8, 'avg': 10.75 + } + }, 'Empowerment': {'week': {'max': 5, 'min': 0, 'avg': 0.9622641509433962 + }, 'month': {'max': 5, 'min': 1, 'avg': 3.3333333333333335 + } + }, 'Community': {'week': {'max': 40, 'min': 8, 'avg': 24.566037735849058 + }, 'month': {'max': 120, 'min': 71, 'avg': 96.16666666666667 + } + }, 'Peer support': {'week': {'max': 4, 'min': 0, 'avg': 0.8679245283018868 + }, 'month': {'max': 5, 'min': 1, 'avg': 3.3333333333333335 + } + }, 'Civil society': {'week': {'max': 1, 'min': 0, 'avg': 0.03773584905660377 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.16666666666666666 + } + }, 'Patient education': {'week': {'max': 11, 'min': 1, 'avg': 4.981132075471698 + }, 'month': {'max': 28, 'min': 11, 'avg': 19.166666666666668 + } + }, 'Parent education': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Educational materials': {'week': {'max': 1, 'min': 0, 'avg': 0.1509433962264151 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.6666666666666666 + } + }, 'Community heatlh education': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + }, 'Awareness ': {'week': {'max': 14, 'min': 2, 'avg': 6.547169811320755 + }, 'month': {'max': 38, 'min': 18, 'avg': 25.333333333333332 + } + }, 'Community engagement': {'week': {'max': 2, 'min': 0, 'avg': 0.24528301886792453 + }, 'month': {'max': 2, 'min': 0, 'avg': 1.0 + } + }, 'Health literacy': {'week': {'max': 5, 'min': 0, 'avg': 1.5471698113207548 + }, 'month': {'max': 10, 'min': 3, 'avg': 6.25 + } + }, 'Medical education': {'week': {'max': 6, 'min': 0, 'avg': 3.5849056603773586 + }, 'month': {'max': 20, 'min': 6, 'avg': 14.25 + } + }, 'Training program': {'week': {'max': 5, 'min': 0, 'avg': 1.4150943396226414 + }, 'month': {'max': 11, 'min': 2, 'avg': 5.833333333333333 + } + }, 'Technology education': {'week': {'max': 1, 'min': 0, 'avg': 0.05660377358490566 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.25 + } + }, 'Medical devices': {'week': {'max': 5, 'min': 0, 'avg': 1.3962264150943395 + }, 'month': {'max': 8, 'min': 2, 'avg': 5.416666666666667 + } + }, 'Information Dissemination': {'week': {'max': 2, 'min': 0, 'avg': 0.11320754716981132 + }, 'month': {'max': 2, 'min': 0, 'avg': 0.4166666666666667 + } + }, 'Behavioral change': {'week': {'max': 1, 'min': 0, 'avg': 0.20754716981132076 + }, 'month': {'max': 3, 'min': 0, 'avg': 1.0833333333333333 + } + }, 'Nutrition education': {'week': {'max': 3, 'min': 0, 'avg': 0.5660377358490566 + }, 'month': {'max': 4, 'min': 1, 'avg': 2.3333333333333335 + } + }, 'Risk communication': {'week': {'max': 1, 'min': 0, 'avg': 0.07547169811320754 + }, 'month': {'max': 1, 'min': 0, 'avg': 0.25 + } + }, 'Sector integration': {'week': {'max': 0, 'min': 0, 'avg': 0.0 + }, 'month': {'max': 0, 'min': 0, 'avg': 0.0 + } + } +} \ No newline at end of file diff --git a/dataSources/PubMed/util.py b/dataSources/PubMed/util.py new file mode 100644 index 000000000..0d7c8c948 --- /dev/null +++ b/dataSources/PubMed/util.py @@ -0,0 +1,28 @@ +import urllib.parse + +def url_encode(text): + return urllib.parse.quote_plus(text, safe='[]():"') + +def get_mesh_term(terms): + res = [] + + for item in terms.values(): + if isinstance(item, list): + tmp = [f'"{i}"[Mesh]' for i in item] + res.append(" AND ".join(tmp)) + else: + res.append(f'"{item}"[Mesh]') + + return res + +def get_mesh_noexp_term(terms): + res = [] + + for item in terms.values(): + if isinstance(item, list): + tmp = [f'"{i}"[Mesh:noexp]' for i in item] + res.append(f'( {" AND ".join(tmp)} )') + else: + res.append(f'"{item}"[Mesh:noexp]') + + return res diff --git a/models/ZeroShotClassifier/HuggingFace/llm.py b/models/ZeroShotClassifier/HuggingFace/llm.py new file mode 100644 index 000000000..d210bc4bd --- /dev/null +++ b/models/ZeroShotClassifier/HuggingFace/llm.py @@ -0,0 +1,30 @@ +from transformers import pipeline +import torch + +MODELS = [ + "deepseek-ai/DeepSeek-V3" #https://huggingface.co/deepseek-ai/DeepSeek-V3 +] + +def create_generator(model = MODELS[0]): + print(f" CUDA available: {torch.cuda.is_available()}") + print(f"CUDA version: {torch.version.cuda}") + print(f"GPUs number: {torch.cuda.device_count()}") + device = 0 if torch.cuda.is_available() else -1 + return pipeline("text-generation", model=model, device=device) + + +def generate(generator, sequence, debug = False): + results = generator(sequence) + + if debug: + print(f"Sequence: {sequence}") + print(f"Labels: {results['labels']}") + print(f"Scores: {results['scores']}") + + return results + +generator = create_generator() + +res = generate(generator, "Hi, how are you?") + +print(res) diff --git a/models/tests/__pycache__/ollama.cpython-313.pyc b/models/tests/__pycache__/ollama.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44f2937a284ffb97993af5fef33a77c59a10c0d2 GIT binary patch literal 584 zcmey&%ge>Uz`!7K>_WOJBLl-@5C?|Ypp4Ix7#J9)GN>{HGXyhwGk7r;F)D!AOx_G$ zOhrrz3^9yK48hF7EWxb7Y<f)kjKS=dOhwEwOff9M9O?`V3^A-q3^B~XoU9BC48dF= z5@Z+1bOoqf5D$olii7x2`5+!bUpluY&r6VyCgUxZ<c!1;O~zY1&KZd%L8-+B`FX{u zRUA1viMff!dPcWcN{dsAszkywDitz|6-qKv6^gSf6_RpFQ|&dGZm|^Q=cL|ZPtMOP zNzE(KWV*$go1c=JbBiN4wYWGjJ+=52M-kM_TkKG=B6bD_20u-XTWtAYTZ)(&7#NCJ zKm;f}Z}G>+C+6klmn4>C=I0g1$KPTtD9X$$DdGmH0O?)H@EPRzTfzDn`MIh3nPrK2 z`Y!p&rManjCB^!A$tk)ii6x1;iRtM@sp()-bQAMZbdz%ui;FYUGLykV`e26^>zAY! zmlW$mT%uP{d5gm)H$SB`C)KV<h=GBDk%57sIE;aT;R7=xBjY^=@w*K24<%(6cs<|| zy1}pdk(HU5<pVRT71KusRw?ENjt-sA46KaIA9)xU*%~rBDlf80&d|KbrgDWr^&tam bL&kMBv5RbC*V*JQvdLX#kS`KoU|;|Mv0svq literal 0 HcmV?d00001 diff --git a/models/tests/ollama_test.py b/models/tests/ollama_test.py new file mode 100644 index 000000000..2c0bdf318 --- /dev/null +++ b/models/tests/ollama_test.py @@ -0,0 +1,12 @@ +from ollama import chat +from ollama import ChatResponse + +response: ChatResponse = chat(model='llama3.2', messages=[ + { + 'role': 'user', + 'content': 'Why is the sky blue?', + }, +]) +print(response['message']['content']) +# or access fields directly from the response object +print(response.message.content) \ No newline at end of file diff --git a/models/tests/sources.txt b/models/tests/sources.txt new file mode 100644 index 000000000..d868a5037 --- /dev/null +++ b/models/tests/sources.txt @@ -0,0 +1,20 @@ +- https://github.com/ollama/ollama-python +- https://github.com/ollama/ollama + +Check Bedrock: +- https://aws.amazon.com/bedrock/ + +Check Tokenizers: +- https://huggingface.co/docs/tokenizers/quicktour + +Fine-tunning Zero-shot classifier: +- https://stackoverflow.com/questions/76213873/how-to-finetune-a-zero-shot-model-for-text-classification + + +TODO: +- Store data from Pubmed localy for NCDs +- Calculate data published +- Better testing script +- Get avrage token number for pubmed article +- Make prica estimations for LLMs +- Look and teste new LLMs (local and cloud) \ No newline at end of file diff --git a/variables/__pycache__/pubmed.cpython-313.pyc b/variables/__pycache__/pubmed.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f342258b8039b2dfbad3a7041bbe0bce7765fc20 GIT binary patch literal 5463 zcmey&%ge>Uz`$^<@Itzr6a&L!5C?|&pp4It7#SF*G6XXeG3YY{GX^piF$OaQGY7K- zvj(#Tvj=koa|Sc&aTGD>GX!%5G8Qo#Fa&c4bH_6V^91w8GY0bo^T#s=3j_<sGX@I< z3&%4Civ)|rGX{$Wi^Veriw8@@GX_frOT{w=O9#uuGX~2B%f&MW%LmKHGX^UJE5tJf zD+VjZGX^UKE5$PgD+jB@GX|>$tHv`1s|BkEYXoZsYXxfu>jdit>jmov8|ZPlG8VA} z8wML0Fcz@}8wZ;Nn+BT&n+K}|s{~sFTLxPNTL;?&%LLo%@fNWeFa+BL+s88oJAhs0 z80-|!80;ME63-ay8tfL&80;SG5ziRx2^RMX_Ks%^_6hclXAJfW_K#-_4hRm6XABMs z4vuFG4has8XABMt4v%LHjtGv7XAF)Cjt-8|XNqSGjt!2BXEb04jt@?VXADjZPKsv? zP7Y3qXADja4i8QPt4R;e2vz~JGJ~_?8H2NfbK)6;bAv778H4kJ^WzzV3xW&d8H0<0 zi{lxCOM**-%fPD3gDc`0gDZop;u(XhgL8swz@oLmR`HC%b;0%VjINBq4Z)hhjlr7w zOu_!aO~K7zR!eZJK2va;9=j`J5qo;OX2&fRGvgFv!?Z-Rv}ALWMAIZQW1|#fQ!@ht z6SI^wb5nB@3nK#yO^zx_zx+I1=ltB<(!9*%#H5^51((d?)WqV{;#(XpnTbiMC8@=? z*qjsdl2ePSq@6R0^7As26^c@e3o?rmOY(~<6;dGTtHhlXi&8T4%My!|OLG#7VB*EU zczsjzN)mGvJW>;LN;0bWVa6(yR2HNv7@`XpRY~~e=OtrvQk5WFuWxEjPG(7I@hwij z)ck^+#NyoIDybkOXN43cCYQiHgW)=uFN;_i7#OM~G0fFb2!Xf`r<9Rq{4E~Gvc$}s z#H7re%#zAmypCyU`9&#EZk3R0adB#1NoHb>LT+kGYI0^?@h=|d#G+J%l8n@%#DdCS z++bb?I9z@SL&8VFwW1(3FD0|2v?#UsmmpLy8Kf;azqq9M7MpKkQFdy{FX7;V)MSw1 z1^ES~If*5i`FX`vJkA-JIVrH9<_*u!P0cHYa&Iv^rl;QG@XgH2FUl;b{KXScl$o1Y zRH*=RMHL^^xDp8emry`rNoHzZi9%v>a%w>dC;|Kug^4An=9Q!trKTu=-1SQ=peVHr z<kS2-g~YrRh2;FalA`>aUwpoa#l;H6$wjHDd6{|XzxV?Zb8<2hOESw+!5VL|2bLy+ zLyjjTH77MUH6=4SGcWZQpG#(XCMd-~Lgp4naA`q7PG)NHFMf~G+{8SEpw#00(xT+l z;wnMcJW!%7D%MX*EGo)OEh_%S>z<mIl3JvYT38B-6t<v5P$=DEbA|9ZoO3ckNj>M6 z05oh1it=;wLBR(XNlhtD1_$Ub(V*1g)Wo9X3<Wg>&%C_+GKjEv2sj2)OG`476<kuw zQgiYPa#Qn4eo2O;CTHa3=j5knCMV{gNbv`!=A`N7Cgvrkr-Ic&1@iMUOY(~{^U`mz zx#SmvXfDsZtkh(Xi;92o1*azG=cT}+=@-9eUP)?tQDRAI3OL?=iG&oTCYFE<R6v-0 zi^mh3qpQHqy~Q1tn4FxM2jOr!revfRrRF84-ePx##*}zaY6c|Pg4HM#rxulECZ`tv z5{8AALV9UtN@`AKUTX0#URc28=VT^V!qO-xDJT>`gv9*7<|O7QK%x*X9+aA%nV*Lt zE(A3(CpA5@7?PHM2?l`8Qb;XJ%qayM_lqAYl$e*8Q(2r@{7VQbkd|MR3(W(!xPnsC z;cWN(vecqHP~xppanH$5O3YEn%*!l+B*J2FYRxZ7Pt41NgjDe_(a^livecqtczFd5 z<y)Mt6$OcT#hLke@I;cCS6o^IGWiyVGsu?2^weKsZl!rCnR)48gERBWQj5Vc^NSzq zzk-~^JaATk1z}Mt$mvxuzvjXd!!NOb(xjZsWLN+uC*~F;W~S%C!yqR=DX9{as{~+~ zGBKqrKRGeE@)o;SX>mzra_TQ3$MVFY)V$Q<Vg;D~Tb!=2#9bxfoL^Ls4@yg*OrBqw zR{}``mA_<z^Ff(LAtkjWwJ0|;FEOvASRp?R>X=*HuDJ#I<*7xWRCkLLT6>gKR`CR+ zrWPp_mlhP{7nS_tb<Qly%uy)LPtHs&sr)4fb0;Eq2?T(gSE7Ir5rw-BRB9%cq!wi+ z<`n;ug4+RdLP<^rLdz{qxF@Sb5Xw^X(i36%3>x@3nI)-3iJ%1Q3o2<6a}b975(p_u z%ml@ZLP1e}dQoETFA;c7u0&GD57U~G3dtano_U}c1gB3>ZBv{I3NBFI{Uzv>nvqzR znO~HcqmZ1Dn3taVOW3cpq$m?4i)5xyP-bzq0=&Wl$yNykrzV%=7b#>yb9-ifUJ*9~ z149uHsJepH3ZQ~p!5LI56bXaGVdb-~Gpu|DsVEW!$w3O^U|3;XBnA>hE|I}XB|vsa zf~4SuFeLPfWI%$jvK3O67Ab(lV8v(vtQaj)L>7ieXpt&N7*go@!b`y-4UmnRAVLR3 z=z<6%kV;q<fn@O_8<3cwCoI$HKr6K>Sk?-FX00lCesF~5hawM<R#=jDK}*t5?|Fgj z4FoBK8R!E|E>%KsFX$*hqrWJefq_AjtH=(d%MnDlf(VdBMV=rQ+_<735H}b^fb<lJ zfUFQJz*=EeiG$-3TzG(33XafnuF3@BN>BnTNCanCSS=1#99Ei>ms$j>2o(a58Uy0N zrNt$Qncx}{xhkrXb;`-lPf>8sDNW8VPE`O`Ouq1<r%D`_Q$a~W0TEYKypXt1@J-Cl z%uBBlMXqEN0w5WuxQY*2ctA^`DxrX))V#!!#2f{9Kvs#tnTa_Hh@7U$Tf_khDo}r= zNCm`F0}<LF!WcxDfCvi^;SM5vK!hKN@COkAAc7qf!s1Su@JblDjaI}7QUdBIRSCnI zKhUrRdA5j`fq~%`XD+yfm6?}b#1Arz%P~Dw!M!NIw4k_103^T<4}GYpAjni9kdz`U z>>T0ECQ!kNY=<~VogB33qu^K!twvxe8eHKNNr4oJf$|x+VF8Y2STm?d8tlsO{G#l% z{G#O4B3Y1PUPwa+=1@71n2Za!GRrKf)KLJL1)@Fkl5<KyHCd57NQqQpZW5@v1U2Qs zRd`xrGN?J5nOa<=1hR(%)&we2261_inm)x<V(vxx<s}(l=OML&inKsRE4!rTfb6eC z3jrMkL?fn152RAbvA8(3xENHQfYd^A8K@vsfHzi(^g#;wpe+`dFAYFW;zMn{6d8h4 z3cJG_EDDLm3L*IgnaM?_AQ_SLR8SFLkdc|JkdqH-=oJ^4fu(#94G@GbbC4x$kfu$M zB`B(`K#Ikn=^W}L9R;`iqTIxsB5RN&qTz&Sf)v?<WaI*hQd283i%UT51m}EEQ$I65 zPXX43C~^SV#tCZm<>zK57vJJWv?Gd(oIobPT1%i5s}KZkK@>TIBzOyu+YCi6Ad@&? z4TT~%kjuS6>Li0wi!)O|O+W?bj6_gJDYXdXpyc8zDOmc2XHa-J+~O*MHEt_!aU?-2 zw<?i5Se=%bqfnfj2{N|$7E4iParP}<lv+)by~r2jh7b@D3L?TlL<ES41QAgT3=Dpn z9JlxaLY;hFUE&=BJmbAxBX6<zIlBbk;scTKzOKO@@gc53zPC8MT_eN&gIt1d3Bg!M zvJ$9b!J$qbu8uCAe(tyA&}2MAT;l_R`~&=hJwrVG{ccI3$|FgwWcUmkNVyfDpOK%N zs-Ibwn5XZOpIiz`x5fH-$tk)ii6x1;iRtM@sp;S*fo@`6if(dFVsUY1S|%i^>6axI zWrE7;V*P^Bq}<dLy@JYH95%W6DWy57c16Vu3=E763=GA!%nS@4m>C%vZwQ(<@O@(8 z<X~!G`5?idWOGA6`39fl4Q2Zq@)kElRc?qI-4HkWD99<q)WG*iEP;`Sse$8z3$u(E z(+w8M3(^K3K+Fd$;uj=!KCsG&F@0cTV3oNbYx02|#Nl9I6}=#)_JI?`;bLGFyCAOd zfg8l(VPF-W;P-(S#O8yl;s<dA7+6In#D5S3v4t2|#V<%`eGmq5M4%i|5JwDZmN<wb z!N4jwLFa=ch%E&*OB%$Hfmk97V#_hG3Qe&6AP-_IK<rQiv6Z0iRR(cX7$kLWh-=)C z(7GWgb3@hThOh#NA+B*lQul_U%_nmiF{TR)LLclHSY<EBnSEepl3}{QAn}2VK}`RK zbl?p|s~h5GH)Kq2NNU`W)4d_Cc0<bWhN$Wd37s3VrZ<#KK1%Z`F<oGg`=FpD%XEW* z?}C8l2WBl<rUxLV;s;hOSx^8n@?GFp`M?h1a4;}(c4&X#1hKgo7}*+XK5&EBJPeH7 z7kFep@Pase42=921XMrpgE#^VjG`CBEI$Z>I6@4JTo<^tKL~?3B49l#A4EYMF$PAV z3DzIPL2L<-CHfyEL2M}oM$QXd8Xu%V92u|<jSsRQjvND{#0;qq@*uVX1EbUozY97p z7o=T3D1t<k7#M{v2wQwq260pv7<n%68h%g(an!(Wkp7?!;%G21N?nlF{-6osXfZH~ zU63%jVCH>6+~<QfNJxi)QSySc;RgE)QqCWAK_Yr!|LA<s2XPD-7)2&Td@uyDjTjh3 zC**%H2C+>T7<n)7seLd7am>IR<qzf{js@5YIv*@S94iJ!$r;uwcrQqqeXs@z*f1~( zTo6?KU<=~dfn@bR*n`*(jG*xGn2~isSo4D;BS^@J5$X|VFxQ0<>`9G}u3)YkBZHXH z4H5YplBx@;FGv{Q5Z1mSDt|*lZv*!Qahn_BvNP&0i0OX{1}VPCAX!w%z`y_i+MyZB literal 0 HcmV?d00001 diff --git a/variables/pubmed.py b/variables/pubmed.py new file mode 100644 index 000000000..8f8b298fe --- /dev/null +++ b/variables/pubmed.py @@ -0,0 +1,215 @@ +PUBMED_API_KEY = "63d31fa6fc74a5b632d3560046df75748208" + +NCDS = [ + "Non-Communicable Diseases", + "Diabetes", + "Cancer", + "Chronic respiratory disease", + "Cardiovascular diseases", + "Mental Health", + "Diabetes type 1", + "Diabetes type 2" +] + +NCDS_MESH_TERM = { + 'Non-Communicable Diseases': "Noncommunicable Diseases", + 'Diabetes': "Diabetes Mellitus", + 'Cancer': "Neoplasms", + 'Chronic respiratory disease': "Respiratory Tract Diseases", + 'Cardiovascular diseases': "Cardiovascular Diseases", + 'Mental Health': "Mental Health", + 'Diabetes type 1': "Diabetes Mellitus, Type 1", + 'Diabetes type 2': "Diabetes Mellitus, Type 2" +} + +KEYWORDS = [ + "Availability", + "Affordability", + "Essential medecins", + "Care therapy", + "Care health", + "Health Expenditures", + "Health care costs", + "Market", + "Special populations", + "Child Health", + "Womens Health", + "Age", + "Minority", + "Primary Care", + "Specialty Care", + "Patient acceptance", + "Patient centered care", + "Prevention and control", + "Mass screening", + "Palliative care", + "Quality", + "Telemedicine", + "Digital health", + "Supplies", + "Human Resources", + "Enablers/barriers", + "Gender equity", + "Racial", + "Equity", + "Clinical", + "Health promotion", + "Health education", + "Research & Innovation", + "Therapeutic Development", + "Technological Development", + "Self-management", + "Self-monitoring", + "Dosing", + "Injections", + "Secondary Care", + "Integrated Care", + "Treatment management", + "Immunization", + "Vaccination", + "Adherence", + "Control", + "Rehabilitation services", + "Clinical guidelines", + "Health policy", + "Healthcare policy", + "National health policy", + "Regional health policy", + "Health legislation", + "Policy evaluation", + "Policy analysis", + "Policy formulation", + "Regulation", + "Governance", + "Global initiatives and organizations", + "Universal Health Care", + "Expansion", + "Health insurance", + "Coverage", + "Funding and investment", + "Health planning", + "Health reform", + "Policy monitoring", + "Public health campaign", + "Policy lobbying", + "Patient advocacy", + "Justice", + "Awareness campaign", + "Education", + "Corporate accountability", + "Social determinants of health", + "Empowerment", + "Community", + "Peer support", + "Civil society", + "Patient education", + "Parent education", + "Educational materials", + "Community heatlh education", + "Awareness", + "Community engagement", + "Health literacy", + "Medical education", + "Training program", + "Technology education", + "Medical devices", + "Information Dissemination", + "Behavioral change", + "Nutrition education", + "Risk communication", + "Sector integration" +] + +KEYWORDS_MESH_TERM = { + 'Health Expenditures': "Health Expenditures", + 'Health care costs': "Health Care Costs", + 'Patient centered care': "Patient-Centered Care", + 'Mass screening': "Mass Screening", + 'Palliative care': "Palliative Care", + 'Telemedicine': "Telemedicine", + 'Digital health': "Digital Health", + 'Gender equity': "Gender Equity", + 'Health promotion': "Health Promotion", + 'Health education': "Health Education", + 'Self-management': "Self-Management", + 'Injections': "Injections", + 'Secondary Care': "Secondary Care", + 'Immunization': "Immunization", + 'Vaccination': "Vaccination", + 'Health policy': "Health Policy", + 'Universal Health Care': "Universal Health Care", + 'Health insurance': "Insurance, Health", + 'Health planning': "Health Planning", + 'Patient advocacy': "Patient Advocacy", + 'Education': "Education", + 'Social determinants of health': "Social Determinants of Health", + 'Empowerment': "Empowerment", + 'Awareness': "Awareness", + 'Health literacy': "Health Literacy", + 'Medical education': "Education, Medical", + 'Information Dissemination': "Information Dissemination" +} + +KEYWORDS_MESH_SUBHEADING = { + 'Prevention and control': "prevention and control" +} + +KEYWORDS_MESH_SITE_PROPOSITION = { + 'Affordability': "Costs and Cost Analysis", + 'Special populations': "Health Disparate Minority and Vulnerable Populations", + 'Technological Development': "Sustainable Development", + 'Self-monitoring': "Blood Glucose Self-Monitoring", + 'Treatment management': "Patient Care Management", + 'Healthcare policy': "Health Policy", + 'National health policy': "Health Policy", + 'Policy analysis': "Policy Making", + 'Awareness campaign': "Immunization Programs", + 'Civil society': "Social Justice", + 'Parent education': "Prenatal Education", + 'Educational materials': "Educational Technology", + 'Community heatlh education': "Health Education" +} + +KEYWORDS_MESH_PROPOSITION = { +'Availability': "Biological Availability", +'Care therapy': "Palliative Care", +'Care health': "Primary Health Care", +'Market': "marketing", +'Age': "Age Groups", +'Minority': "Minority Groups", +'Primary Care': "Primary Health Care", +'Patient acceptance': "Patient Acceptance of Health Care", +'Quality': "Quality Assurance, Health Care", +'Supplies': "Equipment and Supplies", +'Human Resources': "Workforce", +'Racial': "Racial Groups", +'Equity': "Diversity, Equity, Inclusion", +'Clinical': "ambulatory care facilities", +'Research & Innovation': "Research", +'Therapeutic Development': ["Therapeutics", "Growth and Development"], +'Integrated Care': "Delivery of Health Care, Integrated", +'Adherence': "Assessment of Medication Adherence", +'Control': "Control Groups", +'Rehabilitation services': "Rehabilitation", +'Clinical guidelines': ["ambulatory care facilities", "Guidelines as Topic"], +'Regional health policy': ["geographic locations", "Health Policy"], +'Health legislation': "Legislation as Topic", +'Policy evaluation': "policy", +'Policy formulation': "Policy Making", +'Regulation': "Social Control, Formal", +'Governance': "Clinical Governance", +'Coverage': "Preexisting Condition Coverage", +'Funding and investment': ["economics", "investments"], +'Health reform': "Health Care Reform", +'Public health campaign': "public health", +'Policy lobbying': ["policy", "lobbying"], +'Justice': "Social Justice", +'Community': "Residence Characteristics", +'Patient education': "Patient Education as Topic", +'Training program': "Education", +'Technology education': ["technology", "Education"], +'Medical devices': "Equipment and Supplies", +'Behavioral change': "behavior", +'Nutrition education': ["nutritional sciences", "Education"], +'Risk communication': ["risk", "communication"] +} \ No newline at end of file -- GitLab