diff --git a/dataSources/PubMed/antoine_ex.py b/dataSources/PubMed/antoine_ex.py index b0ed71399cc83b7cfe9fbaab449cca3486bba1bd..ad871542f25d90f7534afac29681f1f71e4fdd54 100644 --- a/dataSources/PubMed/antoine_ex.py +++ b/dataSources/PubMed/antoine_ex.py @@ -13,6 +13,21 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../" from variables.pubmed import * # ------------------- +CATEGORIES = { + "Access" : ["Availability", "Affordability", "Essential medecins", "Care therapy ", "Care health", "Health Expenditures", "Health care costs", "Market", "Special populations", "Child Health", "Womens Health", "Age", "Minority", "Primary Care", "Specialty Care", "Patient acceptance", "Patient centered care", "Prevention and control", "Mass screening", "Palliative care", "Quality", "Telemedicine", "Digital health", "Supplies", "Human Resources", "Enablers/barriers", "Gender equity", "Racial", "Equity"], + "Care/Management" : ["Clinical", "Health promotion", "Health education", "Research & Innovation", "Therapeutic Development", "Technological Development", "Self-management", "Self-monitoring", "Dosing", "Injections", "Primary Care", "Secondary Care", "Integrated Care", "Treatment management", "Immunization", "Vaccination", "Prevention and control", "Mass screening", "Palliative care", "Adherence", "Control", "Rehabilitation services", "Clinical guidelines"], + "Policy" : ["Health policy", "Healthcare policy", "National health policy", "Regional health policy", "Health legislation", "Policy evaluation", "Policy analysis", "Policy formulation", "Regulation", "Governance", "Global initiatives and organizations ", "Universal Health Care", "Expansion", "Health insurance", "Coverage", "Funding and investment", "Health planning", "Health reform", "Policy monitoring"], + "Advocacy" : ["Public health campaign", "Policy lobbying", "Patient advocacy", "Justice", "Awareness campaign", "Education", "Corporate accountability", "Social determinants of health", "Empowerment", "Community", "Peer support", "Civil society"], + "Education" : ["Patient education", "Parent education", "Educational materials", "Community heatlh education", "Awareness ", "Community engagement", "Health literacy", "Medical education", "Training program", "Technology education", "Medical devices", "Information Dissemination", "Digital health", "Behavioral change", "Nutrition education", "Risk communication", "Sector integration"] +} + +KEYWORDS_GROUPS = { + "exact match" : KEYWORDS_MESH_TERM, + "site proposition" : KEYWORDS_MESH_SITE_PROPOSITION, + "personal proposition" : KEYWORDS_MESH_PROPOSITION, + "subheading" : KEYWORDS_MESH_SUBHEADING +} + def getPubmedData(term, date_min, date_max): print("--------------------") @@ -39,13 +54,13 @@ def getPubmedData(term, date_min, date_max): except Exception as e: print(e) - print(response) - search_res = response.json() query_key = search_res["esearchresult"]["querykey"] webenv = search_res["esearchresult"]["webenv"] + print(f"Count : {search_res["esearchresult"]["count"]}") + print("--------------------") print(f"Query key: {query_key}") print(f"Web env: {webenv}") @@ -165,6 +180,8 @@ def get_subheadings(terms): def main(): + SAVE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./tmp")) + ncds_mesh_terms = get_mesh_terms(NCDS_MESH_TERM) keywords_mesh_terms = get_mesh_terms(KEYWORDS_MESH_TERM) keywords_site_proposition_mesh_terms = get_mesh_terms(KEYWORDS_MESH_SITE_PROPOSITION) @@ -189,6 +206,7 @@ def main(): print(f"Search term : {search_term}") + # Article PubMed publication date end_date = date.today() start_date = end_date - timedelta(days=1) @@ -201,14 +219,40 @@ def main(): stored_pmids.append(article["PMID"]) - article_mesh_terms = [mesh.lower() for mesh in atricle["MeshTerms"]] + article_mesh_terms = [mesh.lower() for mesh in article["MeshTerms"]] article["NCDs"] = [] - for ncd, ncd_mesh in NCDS_MESH_TERM: + article["Categories"] = [] + + # Get NCDs + for ncd, ncd_mesh in NCDS_MESH_TERM.items(): if ncd_mesh.lower() in article_mesh_terms: article["NCDs"].append(ncd) + # Get categories + for group in KEYWORDS_GROUPS.values(): + for keyword, mesh in group.items(): + present = True + if isinstance(mesh, list): + for tmp in mesh: + if tmp.lower() not in article_mesh_terms: + present = False + elif mesh.lower() not in article_mesh_terms: + present = False + + if present: + for category in CATEGORIES.keys(): + if keyword in CATEGORIES[category]: + if category not in article["Categories"]: + article["Categories"].append(category) + stored_articles.append(article) + print(stored_articles) + + # store article in json file + # with open(f"{SAVE_DIR}/articles.json", "w") as json_file: + # json.dump(stored_articles, json_file, indent=4) + if __name__ == "__main__": main() \ No newline at end of file