From 95236f09b45488f41c05ade635cc6d6d8ebe9a03 Mon Sep 17 00:00:00 2001 From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch> Date: Fri, 11 Apr 2025 18:18:10 +0200 Subject: [PATCH] Script for antoine. --- dataSources/PubMed/antoine_ex.py | 52 +++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/dataSources/PubMed/antoine_ex.py b/dataSources/PubMed/antoine_ex.py index b0ed7139..ad871542 100644 --- a/dataSources/PubMed/antoine_ex.py +++ b/dataSources/PubMed/antoine_ex.py @@ -13,6 +13,21 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../" from variables.pubmed import * # ------------------- +CATEGORIES = { + "Access" : ["Availability", "Affordability", "Essential medecins", "Care therapy ", "Care health", "Health Expenditures", "Health care costs", "Market", "Special populations", "Child Health", "Womens Health", "Age", "Minority", "Primary Care", "Specialty Care", "Patient acceptance", "Patient centered care", "Prevention and control", "Mass screening", "Palliative care", "Quality", "Telemedicine", "Digital health", "Supplies", "Human Resources", "Enablers/barriers", "Gender equity", "Racial", "Equity"], + "Care/Management" : ["Clinical", "Health promotion", "Health education", "Research & Innovation", "Therapeutic Development", "Technological Development", "Self-management", "Self-monitoring", "Dosing", "Injections", "Primary Care", "Secondary Care", "Integrated Care", "Treatment management", "Immunization", "Vaccination", "Prevention and control", "Mass screening", "Palliative care", "Adherence", "Control", "Rehabilitation services", "Clinical guidelines"], + "Policy" : ["Health policy", "Healthcare policy", "National health policy", "Regional health policy", "Health legislation", "Policy evaluation", "Policy analysis", "Policy formulation", "Regulation", "Governance", "Global initiatives and organizations ", "Universal Health Care", "Expansion", "Health insurance", "Coverage", "Funding and investment", "Health planning", "Health reform", "Policy monitoring"], + "Advocacy" : ["Public health campaign", "Policy lobbying", "Patient advocacy", "Justice", "Awareness campaign", "Education", "Corporate accountability", "Social determinants of health", "Empowerment", "Community", "Peer support", "Civil society"], + "Education" : ["Patient education", "Parent education", "Educational materials", "Community heatlh education", "Awareness ", "Community engagement", "Health literacy", "Medical education", "Training program", "Technology education", "Medical devices", "Information Dissemination", "Digital health", "Behavioral change", "Nutrition education", "Risk communication", "Sector integration"] +} + +KEYWORDS_GROUPS = { + "exact match" : KEYWORDS_MESH_TERM, + "site proposition" : KEYWORDS_MESH_SITE_PROPOSITION, + "personal proposition" : KEYWORDS_MESH_PROPOSITION, + "subheading" : KEYWORDS_MESH_SUBHEADING +} + def getPubmedData(term, date_min, date_max): print("--------------------") @@ -39,13 +54,13 @@ def getPubmedData(term, date_min, date_max): except Exception as e: print(e) - print(response) - search_res = response.json() query_key = search_res["esearchresult"]["querykey"] webenv = search_res["esearchresult"]["webenv"] + print(f"Count : {search_res["esearchresult"]["count"]}") + print("--------------------") print(f"Query key: {query_key}") print(f"Web env: {webenv}") @@ -165,6 +180,8 @@ def get_subheadings(terms): def main(): + SAVE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp")) + ncds_mesh_terms = get_mesh_terms(NCDS_MESH_TERM) keywords_mesh_terms = get_mesh_terms(KEYWORDS_MESH_TERM) keywords_site_proposition_mesh_terms = get_mesh_terms(KEYWORDS_MESH_SITE_PROPOSITION) @@ -189,6 +206,7 @@ def main(): print(f"Search term : {search_term}") + # Article PubMed publication date end_date = date.today() start_date = end_date - timedelta(days=1) @@ -201,14 +219,40 @@ def main(): stored_pmids.append(article["PMID"]) - article_mesh_terms = [mesh.lower() for mesh in atricle["MeshTerms"]] + article_mesh_terms = [mesh.lower() for mesh in article["MeshTerms"]] article["NCDs"] = [] - for ncd, ncd_mesh in NCDS_MESH_TERM: + article["Categories"] = [] + + # Get NCDs + for ncd, ncd_mesh in NCDS_MESH_TERM.items(): if ncd_mesh.lower() in article_mesh_terms: article["NCDs"].append(ncd) + # Get categories + for group in KEYWORDS_GROUPS.values(): + for keyword, mesh in group.items(): + present = True + if isinstance(mesh, list): + for tmp in mesh: + if tmp.lower() not in article_mesh_terms: + present = False + elif mesh.lower() not in article_mesh_terms: + present = False + + if present: + for category in CATEGORIES.keys(): + if keyword in CATEGORIES[category]: + if category not in article["Categories"]: + article["Categories"].append(category) + stored_articles.append(article) + print(stored_articles) + + # store article in json file + # with open(f"{SAVE_DIR}/articles.json", "w") as json_file: + # json.dump(stored_articles, json_file, indent=4) + if __name__ == "__main__": main() \ No newline at end of file -- GitLab