From eed594171675eb2182f57fce7b853a23beac3def Mon Sep 17 00:00:00 2001 From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch> Date: Wed, 12 Mar 2025 19:25:14 +0100 Subject: [PATCH] Added wrapper Ollama to test ollama models --- .../__pycache__/pubmedApi.cpython-313.pyc | Bin 6208 -> 6743 bytes dataSources/PubMed/pubmedApi.py | 6 +-- .../Ollama/__pycache__/ollama.cpython-313.pyc | Bin 0 -> 2623 bytes models/LLM/Ollama/ollama_wrapper.py | 47 ++++++++++++++++++ testModel/test.py | 2 +- variables/models.py | 10 +++- 6 files changed, 59 insertions(+), 6 deletions(-) create mode 100644 models/LLM/Ollama/__pycache__/ollama.cpython-313.pyc create mode 100644 models/LLM/Ollama/ollama_wrapper.py diff --git a/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc b/dataSources/PubMed/__pycache__/pubmedApi.cpython-313.pyc index 4e650d570dcc84a349d496c81cb5c4b7a99fc2a2..9d5fa7641db53dafd2f09601a95d99a1e374299b 100644 GIT binary patch delta 882 zcmX?LaNUIOGcPX}0|Ntt`>~7ZQ>8ZYIWsYB;@TX{l*+`+?9n)RH;X$nvq$)3R@Rl1 zUD$;tA7J%n@^_z{&#Edd2hs+@rc5A`fk6X8F{Fr2Ue78c5G=>az`zhH94xQSz`)=y zIC&$h)MQmQZ$|COg=}F={&th|StTb2u(9DWNoaB*n;60*C9p|+lO5SaCf{aLXPh=U zmsNVQ9D55R$Vrlu<=A=f7$!QImtBSz?kr_>28PKC*k>^+PPXQ-WmKG8$#I!cd9p9( zbw<U>s$ANJ%)TBB455Od%vKQ$42diZ3<?bH%)aFyv0zJGUIvCdL3d`~C<X>@h)m1m zMlMT6mC3ugYMItCO#Z;-q1ed4pva(u2pL8OhBPJx26+YrkQNp{_sxsBZJ8N`CqLw? zW??QaDcT&(AIZoaz`(#D$-ux+ypV14egSKy`cJG3nu&~G*cg;_R=8a-cDbnIdLZ+n zPQZo0sEaz$7h;kx>ZDvr%e=0bbx|?vhK>;qB^Oe2F6!jYa9JQe%l83JRUrA9z7O;b zXRyqOyC|V^M|!fJuun3`4Y63=5Pwl8;Yw2Kb;Y!cifK2DEiYJkUo`eP5qZ%#{6a+9 zMdS1f8Tl8D3ud@1kh>&f@BoVfkkmyPgUR!R>$JYGF{l}AV7p-ManZo@gyuzqkPD&l z7Y!0FB<5c<D7aEodR?{bqH5U<W6Q~kBH{uUN~$j!*DPRJ5OqlbWcFr7k*kb6p-c?Y z${*Po1cblEO=c1^V$7awD(1;pF}Yq$)FoJl;gA`3h#<pZDVY#6=F42-p&ZPYb+|&f znJ=4hhp;hUVPg&9X1*fG6=J}AMVdQAoB4`1D~N5z70Swfm4ho(Z1N2;MMl%fzr@;P sK(V!w;WH@p7x^<VFx=v>$<0qG%}KQ@il013d^V>u6Qg8N90LOb0F&nUDgXcg delta 506 zcmca^a=?J^GcPX}0|NuYw{;iO|B7$qb7o>H<J=s~l*+`+<neg&ZWecDCXbzySy@*y z+D%^18Z&t#tJLJlY~G9(lMC6!CVypfnEa4UWb$fub;g9rxvbKY-?6t$zQ-|xQGRkF zxA5dHPAw?w1m}UtWt<|D=WrbXi)b4%`L1GM2o(%vwu)e2NMvDPP+)Lp^1TTX3%1nd zWnjn?bZ7G2!@$4|k$E<GFSjM5(&X>lwM+sGlXZC9n3;VaPu|ZX%gpSza`Sy2TV_U~ z$+7~~o97BdFmn4dFfd3mFfbIKX5A!Y&7|>>l|eI+@dF2gq4@=KkBbJLCp0e_gj@)X zyJ!%9AtC*uLB;}>1>p;MC$AHUPys291u3?$yI|*k(LCTnVAMtP=nFAf7tOO5uq=qX zs9-WVQ?!ot0|$eN)nq|2@y-5X`xtpVnHZ#%Ke96j2!9KjEGc2c7(dxZ!ZRv|iGfe? z3a`QkHU<HaU&S5_3=9WVxC8kZ4w-QW8Z#W0k_nPvzMvx>q{V!Zk1J4@`Jx1Opa%0r z4OS4_m@7z#`Jyd%5HIs3Ue+KX=1US>LF(+6w77ySCcl?ZWHgy9DA^_h^6*NA&!E^W j@?&6NxW!?Uo1apelWJEKH+i$<Y|dInM#-XB1_lNI@ST#$ diff --git a/dataSources/PubMed/pubmedApi.py b/dataSources/PubMed/pubmedApi.py index df29e7449..f694ee56e 100644 --- a/dataSources/PubMed/pubmedApi.py +++ b/dataSources/PubMed/pubmedApi.py @@ -85,7 +85,7 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store for part in entrie["MedlineCitation"]["Article"]["Journal"]["Title"]: if "#text" in part: data["Title"] += part["#text"] - elif isinstance(entrie["MedlineCitation"]["Article"]["Journal"]["Title"], object): + elif not isinstance(entrie["MedlineCitation"]["Article"]["Journal"]["Title"], str): data["Title"] = entrie["MedlineCitation"]["Article"]["Journal"]["Title"]["#text"] else: data["Title"] = entrie["MedlineCitation"]["Article"]["Journal"]["Title"] @@ -95,7 +95,7 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store for part in entrie["MedlineCitation"]["Article"]["ArticleTitle"]: if "#text" in part: data["ArticleTitle"] += part["#text"] - elif isinstance(entrie["MedlineCitation"]["Article"]["ArticleTitle"], object): + elif not isinstance(entrie["MedlineCitation"]["Article"]["ArticleTitle"], str): data["ArticleTitle"] = entrie["MedlineCitation"]["Article"]["ArticleTitle"]["#text"] else: data["ArticleTitle"] = entrie["MedlineCitation"]["Article"]["ArticleTitle"] @@ -106,7 +106,7 @@ def getPubmedData(term, date_min, date_max, nb_items = -1, debug = False, store for part in entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]: if "#text" in part: data["Abstract"] += part["#text"] - elif isinstance(entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"], object): + elif not isinstance(entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"], str): data["Abstract"] = entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]["#text"] else: data["Abstract"] = entrie["MedlineCitation"]["Article"]["Abstract"]["AbstractText"] diff --git a/models/LLM/Ollama/__pycache__/ollama.cpython-313.pyc b/models/LLM/Ollama/__pycache__/ollama.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00224bcf9f3e9bfd7270dd9731d100dede3da8f5 GIT binary patch literal 2623 zcmey&%ge>Uz`($G?qa$g2Lr=n5C?`?p^VR73=9lY8G;#t8NC_27>gJcKx`&&1}~-} zCNP`Xo4JTtpCOnfkg<p*m{p3Qh&7n4h%JUyi6NLhhAoE0lo?`{0z*2pCdW$<N0adu zOL9hHi6-MM9_Nh2lAzS$g8aPVR6k9QDoMZmJYDDf+}zT<%;dzRoKyvu%;MC<;?&|> z94?uONvS2N#kbg;6Z4W&i>joZGm7%_GLsdGQi}^RixNxniz*dTAnL2cofC^vGV{w4 zi<3)p5{qEs#Z|n%sd*)dISL-Bi8&=1Rs1kx6-p`#QWXr*1&orRJ^(qLfsKKI0mT2T z!w3!;#!yB+hG3>pCO(E>W@FYo=4eK+ItB)YP=;WZQ08D(Jtlp|U^YvpJeE+FASReB zf(~Lquwkqqb{Gpmn?jjEoKOZ570hl5wHKexJl0UwAZ{dWP-ZX(Ot%J9B$1VYA(S<k zGnk8oA&)hh2P(`E#0O&`XeEYFR+#=gwrD|^Oeh;nRDmI#L6h6BYK5mlUTSKJLS=rb zLP@?tdS+RwLT;);Nk*zdPGVAOPO(BsMq-IVa(-z}ib7H<SV100B}hd{YDI~HMs{jy zfkI}ULT+YW3Pg3OLULlBLP2SXLT+hJNoGL~+_1#F6oul{oYdqJh5WpnN(_54!4`mA zqiLl81t43j6siQET&TOO6sk;NBJLnpgB%1(bf9Dla!yupex5>Feo<~>iGmWuFG^Ml z(XqE!N{dsAG?{L(6y@in-eOPA&nrpIE74@S#hRO+lA3diBR93UI59o7xQaa?ApvAe z6>~yDLJ=sPy!`wB|9?%+B2ap}#ST?i1j@CySc^-FG7E08<v?hT)V!49^30NqTPz?W zZn5U%C#DqNVl61j%q!7kF9OB+Eso;U!qU{d<kVYi5EtHJD=5m(EhxFg0rSc&4v_Zv z;*z3Uyb!VE)Oe6kF(^GkL*Q0~enx(7s(xl!VxGQBesXDUYF<gPeqM5lZc1WFqHbb( zdQob6Vo7Fxo^E1Zif(dFVsUY1T4pj>NFVI)VtpSUUw!|aoW$Hj{d_Q`S5SG21FE*N zNQQxdL4bjQp?DDk149GD2PQ@V{u}(_9a$e37$pU72#QbO0I|$Iurl+RH@JP_Vvti> z!TLZz?1r%9M^<K5whzp#mTVsxSmoFoJU;L-2uNJ#m%qp_f1O|bBER}|e%*`wx|jL& zuk)K-<Tu+;d70nwI=}lxe)r4#o(&!k*!i!sOI&1^m?3(dUH&4w{0(908ICiOCfIdw zJQ0zY?lsA4M#W_jjRxN*ESw#>S6L)JgA#pW3=0E8NJeT=VqSi6NoKM_N>OQgu|iR5 zK~ZXPYF>##VxB^3Zfa3_W?s5NL1IY;C~2pp=46(o7AfTCfzu}_Mly?274p**3W`#b zGmA6x^AvJZQ!<k?^HTK`Jo3v^%TkMU6q52wN^(;3Qj@bm7UU*o=7Ex7az<iNVsc4p zQDzm$juIrdlq43Vr<N4!C}gIj=9OfoRiZ1%Nli>qNY2kK$S=)HDb`T{<(`tn%$)op zbd_Ll=_!QdE994@7A5CHvT$)KvZHhq%2Po;0Q(!1Tad%WJvA>sH&qAX;l!d6kZ(b1 zGD>q3^Ays-vI^xH`NgRUsTH8W1BU`An<u3zl$K=XWLBl7fC4W$KQ}2e4_ty2B^IQX zf&v}n?v%{LbZ8tT78K+_lU}i<E9_Gl>c*pkgE^HL)l!GcR2?DX};;MIoaysVFl= z0qh7}M^A-<%!1S$i0Pn0sVKj=ShpxOEwu=ghl&*fN|St3Qy`vjPt8kJa78%PKQ}Kk zskB%JRC&3l`niVaD1>CBDma6yEd_}86&y=)5{n_hoRO;F=jox~o1c>ksjNJ66Tztj z<QeDuypp2C;t~bNbWkF4Ni0bOIT#$mAQz{nDrDxRWR_*7lqTkYLNB!#lor7L0F{l& zsYS&Kpuz!MO{ON7KnpN<GKELFo`N%qFF|1iiWkhdgP0fO=UALtr~u0K3TZ|8x#0Am zU}&yxU|;|VM}?Bi;^I<}gCMCcGZj?EBXS5dRf0nSH35Q3Q-!j`qRjl#VyIck;NXKM z&BUCX{PJQ2q!bEH;GlGYNcF{_L{gNRlUkOTR{{+Sh2qkrg8YKg9B^?|tfvqTPGz84 z0*7Bq@)gn&lQVNNOA<@KX-y#|wJbF!zW`M5fzobXYPkZ$Cs?dcN-Zf#1vNjwNitPO zp)@ZgwWzoxF%Oh@ic%9xAbBk}KQFT+zephw?CaE$N|4DYk*HV20WK+x^^7#xZ?Qp& zO>pf{1ZqMQ2{AA*++y)_b_oU-pSM^*mDx&$B3T9oh9YT@B!^9IeoARhs$G#1sObkP zU-cLm7(OsFGBVy}5WmYHf182(6ALE?Qv=H<F%d?|4>}ASX@U(b4;eT*q$Y&SFuBU0 H05%T*zTqId literal 0 HcmV?d00001 diff --git a/models/LLM/Ollama/ollama_wrapper.py b/models/LLM/Ollama/ollama_wrapper.py new file mode 100644 index 000000000..ad23ab557 --- /dev/null +++ b/models/LLM/Ollama/ollama_wrapper.py @@ -0,0 +1,47 @@ +from ollama import chat +from ollama import ChatResponse +import json + +NCDS = [ + "Non-Communicable Diseases", + "Diabetes", + "Cancer", + "Chronic respiratory disease", + "Cardiovascular diseases", + "Mental Health", + "Diabetes type 1", + "Diabetes type 2" +] + +def classify(model, sequence, labels): + prompt = f'I need you to give me the labels that could be given to the text (keep in mind that u can put multiple labels and select only the labels that i give you):\ + text: {sequence}\ + labels: {labels}\ + Give the response in json format "labels": [] with no text at all' + + response: ChatResponse = chat(model=model, messages=[ + { + 'role': 'user', + 'content': prompt, + }, + ]) + + json_str = response.message.content.strip().lstrip('```json').strip() + + if json_str.endswith("```"): + json_str = json_str[0:-3] + + responce_json = json.loads(json_str) + + print(responce_json) + + res = {} + + for label in labels: + res[label] = label in responce_json["labels"] + + return + +text = "Theranostic drugs represent an emerging path to deliver on the promise of precision medicine. However, bottlenecks remain in characterizing theranostic targets, identifying theranostic lead compounds, and tailoring theranostic drugs. To overcome these bottlenecks, we present the Theranostic Genome, the part of the human genome whose expression can be utilized to combine therapeutic and diagnostic applications. Using a deep learning-based hybrid human-AI pipeline that cross-references PubMed, the Gene Expression Omnibus, DisGeNET, The Cancer Genome Atlas and the NIH Molecular Imaging and Contrast Agent Database, we bridge individual genes in human cancers with respective theranostic compounds. Cross-referencing the Theranostic Genome with RNAseq data from over 17'000 human tissues identifies theranostic targets and lead compounds for various human cancers, and allows tailoring targeted theranostics to relevant cancer subpopulations. We expect the Theranostic Genome to facilitate the development of new targeted theranostics to better diagnose, understand, treat, and monitor a variety of human cancers." + +classify('llama3.2', text, NCDS) \ No newline at end of file diff --git a/testModel/test.py b/testModel/test.py index 4fd378194..1a09d4f62 100644 --- a/testModel/test.py +++ b/testModel/test.py @@ -108,7 +108,7 @@ for disease_label in DISEASES_LABELS: if MODELS[model]["isHuggingFace"]: predictions = MODELS[model]["predict"](pipline, title+abstract, DISEASES_LABELS, data["treshold"]) else: - predictions = MODELS[model]["predict"](title+abstract, DISEASES_LABELS) + predictions = MODELS[model]["predict"](model, title+abstract, DISEASES_LABELS) end = time.time() diff --git a/variables/models.py b/variables/models.py index ded4df5fd..9a3e18880 100644 --- a/variables/models.py +++ b/variables/models.py @@ -2,10 +2,16 @@ import sys import os # Ajouter le répertoire parent au chemin de recherche -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))) +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))) from models.ZeroShotClassifier.HuggingFace.zero_shot_classification import create_classifier, classify +import models.LLM.Ollama as ollama MODELS = { - 'facebook/bart-large-mnli': {'predict': classify, 'isHuggingFace': True, 'pipline': create_classifier} + 'facebook/bart-large-mnli': {'predict': classify, 'isHuggingFace': True, 'pipline': create_classifier}, + 'MoritzLaurer/bge-m3-zeroshot-v2.0': {'predict': classify, 'isHuggingFace': True, 'pipline': create_classifier}, + 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli': {'predict': classify, 'isHuggingFace': True, 'pipline': create_classifier}, + 'MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33': {'predict': classify, 'isHuggingFace': True, 'pipline': create_classifier}, + 'MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli': {'predict': classify, 'isHuggingFace': True, 'pipline': create_classifier}, + 'llama3.2': {'predict': ollama.classify, 'isHuggingFace': False, 'pipline': None} } \ No newline at end of file -- GitLab