From 47ad197101f3d9c95be25ac1336c6e51ab7f0a95 Mon Sep 17 00:00:00 2001 From: "vincent.steinman" <vincent.steinmann@etu.hesge.ch> Date: Tue, 14 Nov 2023 15:46:20 +0100 Subject: [PATCH] mouais --- SchmidtSteinmann.zip | Bin 0 -> 6850 bytes SchmidtSteinmann/student-data-test.csv | 101 +++++++++++++++ SchmidtSteinmann/student-data-train.csv | 101 +++++++++++++++ SchmidtSteinmann/tree.py | 155 +++++++++++++++++++++++ SchmidtSteinmann/tree_v2.py | 158 ++++++++++++++++++++++++ tree.py | 17 ++- tree_v2.py | 158 ++++++++++++++++++++++++ 7 files changed, 681 insertions(+), 9 deletions(-) create mode 100644 SchmidtSteinmann.zip create mode 100644 SchmidtSteinmann/student-data-test.csv create mode 100644 SchmidtSteinmann/student-data-train.csv create mode 100644 SchmidtSteinmann/tree.py create mode 100644 SchmidtSteinmann/tree_v2.py create mode 100644 tree_v2.py diff --git a/SchmidtSteinmann.zip b/SchmidtSteinmann.zip new file mode 100644 index 0000000000000000000000000000000000000000..79dea5ffd2f7bd1c1fe23382d914701b68eea390 GIT binary patch literal 6850 zcmWIWW@Zs#0D+$sdEp=$h6NcE7=n{Cax+s(f=f~}^KujO^7KPPcp2Eu>w{A*>Vs23 zbZG@Q10%}|W(Ec@0Wpq)frH@{l5ysu3=9(3j4LThP1P%?1e+>ZADn7dADn7jADoJ2 z>fDI;d5_I_{?A)3P$QxBJ;$7LoBqL~18;Zn{@(Dd;Pp1<rUL=FJFm|D?>lMz$u`>y z^OE(=f+pqO^vu7}v}mJe*Ak6S?plkoq*r<#{qor9#`9gPZz(5sUCH&&ik!>L`0v)9 z<(>;nMK3*ZU^dOY`6B3!`dh*IXC?*di?y;u*5<hWb(r%Z_WHCtMpuqcGLHN%qx-Yc z?dVpKnMHeiwj6iaY9qYnjp~nuqE(yBR?eC;edQz7^II}(lzpzOeCD-$m2Tq2N4Ixh zPw}31GQ4|se%`*nKkvTf-Lr05_x1dDbw8edz03b>Z_3r$b0->eIAUWjiYn@Jx@a*T zHnD3-+!3)QX4%q93VUwri&^TQV7_kt=ViE_oSbQ`QM`1}6yD2%6JtuB{)qbQXIA>F zbE>R-#{!u-&nx}Kw(@!YIkm~0n@Rp;DSP9lQwP=WoKanPd8$g;Gp7eP)kUIwU+iOV z{%t?i_w?M?S<YSyznqJ>FYSIN^4*;6)AH_SRj##kQ7ZE;Km9lK|BhSI6WzMl1N~dB z2`kpuyTxBwU--^*8ux<B7jD%!CS45?U2c_ksBPVY{|BuOPub-Cpu^~1)(!QuGt;C` zynZ4jw{P+qrmxSxNeZ(4{M?zyH`BJ-W_$R_;>Lq*YZqLLU9d~%u@~!a7jq3iXXO`H zmiH^2ujIYdmoURs*>lz*!Q@ukf?aWX)AN#Tqo1&!nEi~i^+JG>$@CBFR;XLJ*nd*8 zu|HX$buT}GUF_S8KQ1B}8R7{&mAm-9J(+5K*?ay*remiM^687JpFZlceSRoc$mN%> zGSbps&EO4LwTmmvY2!iW*7aOQVOyR~^qb<o`Nuo6k6H3<5AOHK?Phsn@4NiCvLyfP zzH5g!S65wI8~E_S<xb=0duGg%Uwh`v><|gzRLO~xt{*;mY;~kj#mw!()w6kuL!A6t z^n4mmOghWSt7&VyaAVQ@;63j|*ZY}7iZ<^HW7-#(<JtMkH;ZTGueHgOW-osyH^KUH zNm$~=8G93Fgv_}ra&X7>%C3ogoV&zCKE8gmZf9o7QLm5I=QHDc1P@;M?Hm00Y`~(e zj9eXkMxhZ2ykV1>S}r*%Zn`_^;Mtd*DZXo6c{Oee$H+;(5B&PVzFczIXUXIP%v<`` zDg3@7`KMvBPjaUF3voG-rD>r)t0u$<%H5i<;kVGL8Con|3t10Nj5t#=CDP-@!4T6g zzn&;2^F8NV+1dYbj_1ifk)uK${a*sQx^{iG{Fc<rtHGZv>u~JVh96v~3Rf(cCbqLx z@Z>heIkgNw&%dkO$-6F<`L~j#`m6W*qttw5S@X)5-JEZqc>5^Ny4&srC*IWXzbu{h zW<q@7-c4`s?Ol7^y!p9ujClINUF|m&=Pp-o|0R5l>BF-x_Zu#smTY(^Z2S8Evg1>E ztORC0HS4hCiREglVxK%~rOWo|&Lt&h^$*rhti9rMXy#<`^6l5RPxuma$iwN&;s6)h zLl0WH?=Mf2>;Ay#W@HoXZXv~#F_%9_WYRwgLGOl5cMm-g+quysRBBb#@26HRnGaS! z6)!X7n}1*%OGMg3kzF%$dg{(i&dyKH_nIN@qPSzp%p<|fzsw9Cy%pTFHmTjDJ*W20 zx@ipiL@Sm@uyQV1(|r5u6pqIk8)I8N8Tak~zO9|VR#l;6>0b9|)`dIVa#IeSxE4LF zl%e^!<F(#nCl3S&Wcc+vTezRSCf4A8Q8Rz3>H)dnhB!^W`H|TIzhuLo{W#z0YRs;@ z`p&av?IY(kB&WNlY$|#jvt5(Ll&$c%eRP)pg$;6kD|THhus2XB(SNn(<$|3N#p~wZ zsuSMxSV3&^b`I5t0lVUwkG^{)R<%@{_XBHV{As;R-`RI}*&aN-MdI$E+X>f%&)h3( zW|LTxAn@BK(AxI>@+u#lvgXX?sn?R3g(hT_m^EFW!E*7%#)W&Fgd#ZqX0U#Ku(DTt zEC1ycl^PS0-Urq(tZm~D6kr$LvtiP1{uf8oI@(T191;~cbMV%6MOC|pl1%6G?tjP$ zV^Fr(-MGf9nDOhWzBt`IFS8%I2XkEyYdGhf@_N!`O%LDyfwwQeJ*RWiOpGtq*VfE` z%X|M1J-MAq&+^V~m*qZs-YEOu6uatB>FRA8=NR&yaMQ0B*WK$fS@GLt?yny!_?z#u zE}HY_#M5s|`z3z8jy`a7p~tzb$Y@>py<gAn_-Czls<5`+`OBVT(To09RRw?9`C2$p zIhON-kax1>lGNJ{Ee58(Ie)uMWg@)xZR3lLI#l%klH}ZLb1vJ4Jd5Oh{aGyTuRizR z<F})-@-~`2S}=K^(uK|sZvC$5e8Os)-}mtS{i5HrUG1Ipr}mG}&Uf6|^@8bSCCicP zeC-ptc0FGC)PDbq)hEvOT;_g0Lm};sv8TsE<wIYd`pP9r=uh67x!jn|f`3ln>5rDW zub*#Ob3Wsu)ATuP_K$RVvnG0fD*Iuo@FDhC%F=`1S9Cn^S9#aOV4V8k_ml3m^M$p4 zGV{fzGKXF~$e_k~tlwLM>+6Y6&oZmktvg~E!<mfF#K=4n5`6MVL{)5p?@GpLCP)7j zO=OZeIA>zq4UtpabB=s=DVnx?<}a7#Ctff4)-AMh%ji0LWFo_H%ZU5SCn)@QES6Mi z_;VuD?UiRHuM?YaPqg>!1lIox4*ch?w4Zj;*j(@VsfZIhINztwO4{M<9LBf*#Lh3) zU#9o`Q_I;d!gsmyivQEUKi)^!-H4jWSrfHV{BoiAwSUY3-s~K|KB~OFz|Oz`Y8xP0 z4d;+r4KqX;7^HEv8sf{0U=4>Sb+{W261Vein|-+(@3=z3Y3ZaS>D%3g;xA_9-Z(sI z(dlhWZHfj$Q#V}ARk?aY`kUkLzP~rlDQb33n#&h0pmJ&F{KLhIU9Or1P40SmymiC* z((tYBheTH1mb<daik;!lwo3lV4B1*OCmGq2Zf&}_bcg>fWjo1f;6_6fxX~c<5!7f< zf;1W=bbnU5fEx`v;Ee`NP^00{>=55`r$atYN{=#`C#3c&q<CrY)oBN|{P_0H^yZ}0 zPgfs3n>{<fzOKBQdHw3AkG{^{Eninz`@OkRKeBYbagl<wQ+RlZ>j`hBiCzwdz5Iz1 zdSbfMGgq1jy#LnidXD?kfv>#vzovG#yZg>Rv!7S<RNKoAk9E6>YSvb&&$jw|<WwKO zg8<*L&wJRrquN#KQZLSCZG<!$4joi0c{b^Srh4bxT>=H)I6K3HcjO;T_~)N`xhQ?t ztqDOIdyQ{>KPFH-%{qN;YxL%*xNN?O9kQ?UcJ6;AA71Tz$>XEKE6$5mPEUTPKXJ9~ zH}>BY!!V~U>^Zm4>Se1et`-WLm9W<HUz1oiCACrcRK@Cp{<DnFnoYQV($uC#J(Bg& z`Jb>hgXDbMIiEv6#UyMvC^}^qcgXbAn3W0hU$C9b%;1>&<4fQZ%k{bQ6!v7Ocm`X< z>a1h;`)yKEH+PfTn%0IVCF-A-tY-ZXZqmlmXVC3@V(ov1d+ptu-PTM$y)>O+k!u}; z;#py#g^N2c#2nhaNbX_5uZiV{KKm5awFIxT>$48na$HJfkwfinhW=U0Unls-?Yy$< zi=ei{9+@j2g9<mr_@w+kxz@pCZjrmOY4G7S8&|&4deAbfot^8ymWIs5Y3^2na%azJ z@f_Tu$?>U1Y5&VFGlY}3T@1{xxHdbTSBjPI+8eWvS?_Nb9qU;6E%2nzZqHW3z-fY- zm3QaoNUTpg`C#dLzc;x?XLc(kDVZeR;?TVzb2cF8@?y!km3|*~iSMzRnBvOwDT?#& z)JGmZSMr0BU9RTKPQGJP`rW|N$TwY7t~XP<>raZ=u8&Bme3xuU%rfn|_oQUccjD zyD8|;x#C&km-|wForSh5GD8GpQ&(*`Ahkw~mBX~zC1tm2^UTY_Cuc=<OYN%RFUU+; z<9>1R-#e-^&ZT!OlwVM1!g_J}u?PGTCrz%Ew3Iz`49wb*6x8~p>%)b%mb=pf_Jt%a zmT*&$>3n;W&xo7TdPY%Ota9av8Lv0DUpaC-;@pxH_pTJqCGJ%kuC8}~%2aQ1WDjY- z$anYXsSST+TOJ2F>*^Hspf@b)Ce9K5`aNEE^0Gdr=W`>?&duL2cTw898}0@tD)zO1 zvEnY9>|VH6>Nv-qwdu~OIoFTgOuPI2I=Ef&>38JkeA&yVC131UFS_>Mw@+KrM8W8d zS%;-gEvQj3d(ldl=cg5|c0HW^VEzfaEVYMcPIm9U{W{vCN;6Sa=~tk}BDutm&d0v5 zyveRy(=b)w{58ftHin?*+{&#>?A?wqEy&rPHs#h<3s{>X_sQy~;?pFj$2&wbY`9s* zmF#2YX7~K$*5@~73w;v}<j~7h`*?+6A1_a3bxY3b8;WNYqvf}+RyDcLwI+B?6HDN= z18=LiT1t1^Sa)a%!|nUk?;bkGbKW>|>7LSO`@-7ZTPIp3uHCNldI7H-d(`$>>da1# z3oLE<9?Gaj$6l!E@~fO9#$dPXLe)}>nzrjDFUn(PKDKukJIm9vreHSzIstp-l$8Dx zEya(0plyoy=q!JO4RU@9c3mv6H}riGcO~-j!Gevi?$~VotX6TnQAaI*<J1=CmsQMS z%U36V@d}@HOq``oclQ$0<g(W_P17?Ct&ZJJFca>{-|fhh6SASPcG-%#bG`>#s)_A( z+&bB4_C`id4-+ZrgV{b!8Cz~R#3^!YX{p=NRQch`vF=;#nOFAcO?dPM)TU_R4-{aZ zT$iX>E`OoJSAch-VS_fqjOMNDoxN%f8MDkOz5gI5j6uobwL{p_oefo~?(4<wecAe9 zv1V(wx5IPQO{FJavMfpax8m)}-NwFUJ*|gBg8R=3zkM!j^EhJBjbiiX$NO@Ro>zjj zDP+>OU7TacJGVFPpI%f|r<U`a<>Id%f0Ps3&;6$8lk)W2-G7p1tj}Ml&*xeccIuYb z&lexfmH(IfOi`|{cmBHOSoA7To8s%n*TPB8v78&w+Y}NBs`1gydb$Za>t9as%RYBf zPV4iUwy%}E>;Fw}`**xp*L3>}sox70?^819{LsxLHrLYU#*#aqt-f5|&v9?oj{Fny z$7b0p7rkD@t&lFr5Wm~5Ct!lCzv=&<uJNvU>2u;Nj<XoPp6BVrB`mm2KmRj_^A48v zXD#L`9;i6b6V!L!Gtyo?wbrdmY}XT$AA<Us0o^f^wjaLUall-1@>I6_ixryYXWU_B zEIV~z`$_d^d-t$U>@mAev8`Iv%<3c5BR@Sr<kiHdXD@&AD-+nsw2mch=1z+vs$~<p zFY9e=$no+`_6(k*FRr;o`G)Z&?%7V2jk7D}d7S%X5<EX+LX76!gSsKdZ_G;CT&U7e z$ad|!rV2-0%;C6LAG=964rXp&b$Pv*pya-ZJ*6McOg;5dN?Ua8mwn8lhh-%%<^6c_ z{L||T_mzIFUbod!=Gyxe@w<Qjy|?gBURI{s--|(eXS~~d;5R?0Em0Kw>*Hf~1_n@z z0nwH)D~X2>Xt1+0FsNZ`OB9!srljVT=%yr=B<hx=7MJKH7ngy@BP^I4!6O`4MmbDF z&#qN7=h?kKis$gAf2%p0&MJMkS+YweC@6|Uv%XsDz~YS>T~GdeKfnKe{r?{yKmV@& z`}6wy`ages?au!$o%U@SUu@*<uP>(SSl?b#6Z+;-*_D=j|Cg!T0w1k7c`vc``OQ+L z2;twycAidgzvtrnyzfFxVDx&^>bY;jw%Tm>`Z(`Z?#lA1@3(1O`EhgY^69tsy=s+G zIJ0lB*Y;TND-n6cn^P?;uLlV4wPw6o5b*6mg<0Fq=qsG;x_#k##|4@k<Ift#w=K6= z?<xObwWw}?jY9ZwuY;Q-nwN#&@ZMfsT(Z8lq1i!~oliq#{nZ%@wd9-2Hbn+i1~%re z%U-|XZlcOg{tunS9~L_ql*cnC?3pn6iQ9X&$X&a(eJz-y!6)2zsqinOUFP07&S!0> zE_hVL`^Z>W;=_SuQnw{`+_2S5ci!95%^E5f%NE!nn17nB*H7&6yx#{xwtu|$>emK# zft^9ypD8A6@$l7=Q402O_;F5l4)X`Yhs$<k@SJ-$qoJXZ|FP!#&zCnBJh0G=sfiLe zcQ>jt?7|D#-bpu~%_yJ0uBO0ffte1E(HHieTdS_!*~8XU7P|P#S&J#$A#XM^&52(q zz2JjR4?|`w_q98V!g^)*&VIr$E&E(S;p+Pj1eV!6*spQU_*B-$Dq~T;hVPdgoex}W z6aV0=(Yn-O@rN|_Z;xG=YvhE!^zNwNQ|T6NAhvPJn=g7vS?Vz>ZY!N^da_2rLw{Yu z<6ZLA<<@&A*#;yW;^67MRDJy3DdFoYEe&V)@b1^%-={6in4T?O6CiS}g6~P>_lAuV z)gMe<CR9*R_+GA!`-JuJdsiy+J~UW<2~v{nYzdq6ccq}92aEXp1TTm8QtobjCZCtI zv{@f5U;L%;axY7RWHC>xm~dUh%ChgX|4jS-Vvk+(Zl&4_oPTP2uPPKt8+_ANa_UYJ zl8;pE{SwhzY%6KX(Gsvnpk$h6V@gX950CXtj<+U<Ju(+PZxZ;?y{PI)Op8n}xBjD) zCl{tT^RBM_b5%x7k*8>x;GOwrybkO&t(bT>$x`yGlZ0YY>xl&`?@iM?;gVLor0oBK z!0xm>$rnEs95e{|(s@`kV3~N1lj+g>N3Mum%B^UNv;4kvQHjtoHCxdGPHz96Di`UU z%i!&ZdZ%Lgo#m@$vfo=@K7O|SPG5cn-#_@ji_6p`mA78liY4v!_5Yw;uGW3-KoK(o z11K*ea=9~7F6UxrU{EKV%Zm~-^WfRs4|g`#4fW<rHs`s0K2nY+vVQI|jb+X6tY7@~ zJ{7B4kkk3d{pa8J=jZRQ`~UCXzvt)o*ZnR0|Nn~or<wCZR<HlnQT5TKuk70MvZ^Nb z!ds{JUou|w>v5gt^KH+rSNd2L1{EI>Nqsb9_v5L@Cz}0cD$lcuzgx`Os_uX1wD6s@ z%jXM)swcQF`~BK#-JwHokNeKfIMmb1UhwHzdTq$zf<s2)#tfBRlJS}iSFe4JJ5sD; zyMJZy_VP212e;*B-rONvwoFg%e4u9i`Xwy;UucRKO)NcD++p$Bw{KbHW9bcxOYaK= zOB{Ob^0Yc->HOE3+1Fg#*Syhxe1)f-i&5(MY&+SD{t|nu?2j*ZV87y;U%DXc%Jrkm z*>yRK4^=Kz<S}`)_}b}v$?P%KH?su!_#|%?u<M$B+7vqf6@!euaiG7}YD2GkL5EKj z#@Oszq3rm$VX}5b^RFlsCiWc<T$qoxi8-({N3uTLe8c*IS@+)6*DH=Y{Nla9USnxJ zWq$P0K(|>RPBHQ){AoJBm0`!uT+fFOW0~?6o7dh|TcTfZGkfjTiC>N!@XYhOq&$1? z%SlTDCJ21Jr~NT&*7nfpx(Y|P&((V${BV+_rChdd)T(!F3%C3_sgye1t^JgZ<bsnw zcKM%ca*R8+&^&E-<{ImQ*i3_H<CK~N&U(u&6Kj&&f5jP2i@dF{`r}Ur_2rHqb!Xgr zaMGsY<-`)+<ikc>au3~EG?!hE{4k?Da93D-c)W#h>qapfo%pca0Cu+(Gb*bY&+6Ce z?Rv@Z+atv|y31!zs*+<DV|Itcxic<rgii7--19?B)44KrN{GeIwY=FkW}TIE(UEy^ zIrrC`bq*(P+}(X>f#1aSsvmwyZ~5|;FNeAIp3vf|8)apgG8T7anw_+3?asaByMC*3 zC%3_KZw9AF{;R_7EvRoRYGU#==X#US-apU&+f<#{XmOS#-j*%)|5pYjKG?I`@*aQD zMyHOi#ktZZ?aG4u-yWGQWIh?4pESX9Yuwq#?_@V5FPJGeYf38niA~L)SYLDO_B!fy zXTzh(5)(D|72U0Ex<4UY-MVvsgLw5jO`hPLZx|LVc<@Z=ncd?>aYdcU7kHBkc(rAP zRlmwSkC-0w{%zrXza!gw=egcKz2&<``JzVNOKZM_Zv52R=g)Qc)6tU9-~2*<m~Z*A zO^Q?gW27T*_4lzoD0f@eJ#o%tW?%s2;Q((&CJ|=b(-N>LiocE^78#Qi5aVIf6v#sn zplJ$_p&-1a(Vu~V0mFDACoN!}0NDZ>)<ABgfQB_dHh}P!##Rm-wh%YSfou_|&4XM{ zg4#SFOF(!_<3#~17LnBgLbelB0wc#Bs00St2f|w#w~G+8li2b&z?+o~WGE{GD}y)_ K1H)7a5Dx&<d@id1 literal 0 HcmV?d00001 diff --git a/SchmidtSteinmann/student-data-test.csv b/SchmidtSteinmann/student-data-test.csv new file mode 100644 index 0000000..3a0d6af --- /dev/null +++ b/SchmidtSteinmann/student-data-test.csv @@ -0,0 +1,101 @@ +success,grade_1,grade_2 +0,39.1963,78.5303 +1,40.4485,86.8395 +0,65.5719,44.3035 +1,79.6481,70.8066 +0,66.2602,41.6727 +1,97.6637,68.3249 +0,30.5488,57.3185 +1,89.4732,85.9468 +0,50.9309,34.2358 +1,39.7929,83.4247 +0,47.4544,43.4024 +1,69.975,84.4084 +0,66.5791,42.1357 +1,85.0587,54.3103 +0,66.5045,46.5154 +1,75.6727,93.7901 +0,30.5896,71.5884 +1,43.2175,83.5596 +0,58.0402,39.4724 +1,40.158,94.2887 +0,65.4079,39.872 +1,58.2539,64.9645 +0,90.0515,34.031 +1,72.2487,90.1078 +0,32.7323,98.4927 +1,74.0641,66.9625 +0,30.0749,56.5131 +1,87.572,68.1501 +0,54.562,49.5424 +1,78.309,72.2327 +0,57.8703,48.5142 +1,91.3575,85.6202 +0,32.8994,68.8984 +1,75.9627,73.3708 +0,49.7378,59.1349 +1,73.5545,66.0414 +0,34.2051,72.6251 +1,54.4923,75.5097 +0,48.5071,47.746 +1,92.3877,76.8295 +0,39.8972,62.0987 +1,75.7688,43.6375 +0,32.9389,75.696 +1,44.5334,86.442 +0,51.2656,60.1213 +1,70.7878,84.2462 +0,28.9464,39.5992 +1,47.5371,73.6289 +0,49.0241,48.504 +1,78.3707,93.9148 +0,48.807,62.2066 +1,72.0392,88.5636 +0,31.2363,96.3053 +1,51.5616,89.1555 +0,65.09,39.4882 +1,81.7598,47.952 +0,46.467,43.1749 +1,64.496,82.2082 +0,65.5995,42.7966 +1,50.6678,64.2266 +0,30.6653,42.7069 +1,76.6023,65.6216 +0,60.3982,38.5427 +1,80.7499,47.9425 +0,81.8373,39.6295 +1,76.6719,73.004 +0,31.7026,73.4485 +1,89.7585,65.1794 +0,31.1113,77.9068 +1,56.3601,68.8154 +0,47.3655,59.2683 +1,81.997,55.4778 +0,73.1963,28.3999 +1,50.2859,85.686 +0,30.5329,77.174 +1,66.6274,65.141 +0,30.5638,44.1596 +1,69.3048,90.1573 +0,40.631,61.4716 +1,67.5189,76.709 +0,33.6945,43.962 +1,54.6194,73.6004 +0,29.9562,91.6003 +1,59.5618,81.8905 +0,29.0975,92.016 +1,87.7544,65.2841 +0,79.147,40.1185 +1,74.4849,92.3425 +0,26.3324,44.9552 +1,54.3469,58.4329 +0,29.9471,93.0608 +1,96.3263,64.8035 +0,29.8645,73.1155 +1,62.2263,57.8496 +0,35.2611,72.8553 +1,47.3407,69.4123 +0,63.1953,36.9634 +1,59.4646,72.4025 +0,60.0839,42.4864 +1,57.453,73.6793 diff --git a/SchmidtSteinmann/student-data-train.csv b/SchmidtSteinmann/student-data-train.csv new file mode 100644 index 0000000..87954be --- /dev/null +++ b/SchmidtSteinmann/student-data-train.csv @@ -0,0 +1,101 @@ +success,grade_1,grade_2 +0,34.6237,78.0247 +0,30.2867,43.895 +0,35.8474,72.9022 +1,60.1826,86.3086 +1,79.0327,75.3444 +0,45.0833,56.3164 +1,61.1067,96.5114 +1,75.0247,46.554 +1,76.0988,87.4206 +1,84.4328,43.5334 +0,95.8616,38.2253 +0,75.0137,30.6033 +1,82.3071,76.482 +1,69.3646,97.7187 +0,39.5383,76.0368 +1,53.9711,89.2074 +1,69.0701,52.7405 +0,67.9469,46.6786 +1,70.6615,92.9271 +1,76.9788,47.576 +0,67.372,42.8384 +1,89.6768,65.7994 +0,50.5348,48.8558 +0,34.2121,44.2095 +1,77.9241,68.9724 +1,62.271,69.9545 +1,80.1902,44.8216 +0,93.1144,38.8007 +0,61.8302,50.2561 +0,38.7858,64.9957 +1,61.3793,72.8079 +1,85.4045,57.052 +0,52.108,63.1276 +1,52.0454,69.4329 +0,40.2369,71.1677 +0,54.6351,52.2139 +0,33.9155,98.8694 +1,64.177,80.9081 +0,74.7893,41.5734 +0,34.1836,75.2377 +1,83.9024,56.308 +0,51.5477,46.8563 +1,94.4434,65.5689 +0,82.3688,40.6183 +0,51.0478,45.8227 +0,62.2227,52.061 +1,77.193,70.4582 +1,97.7716,86.7278 +1,62.0731,96.7688 +1,91.565,88.6963 +1,79.9448,74.1631 +1,99.2725,60.999 +1,90.5467,43.3906 +0,34.5245,60.3963 +0,50.2865,49.8045 +0,49.5867,59.809 +1,97.6456,68.8616 +0,32.5772,95.5985 +1,74.2487,69.8246 +1,71.7965,78.4536 +1,75.3956,85.7599 +0,35.2861,47.0205 +0,56.2538,39.2615 +0,30.0588,49.593 +0,44.6683,66.4501 +0,66.5609,41.0921 +1,40.4576,97.5352 +0,49.0726,51.8832 +1,80.2796,92.1161 +1,66.7467,60.9914 +0,32.7228,43.3072 +1,64.0393,78.0317 +1,72.3465,96.2276 +1,60.4579,73.095 +1,58.841,75.8584 +1,99.8279,72.3693 +1,47.2643,88.4759 +1,50.4582,75.8099 +0,60.4556,42.5084 +0,82.2267,42.7199 +1,88.9139,69.8038 +1,94.8345,45.6943 +1,67.3193,66.5894 +1,57.2387,59.5143 +1,80.3668,90.9601 +1,68.4685,85.5943 +0,42.0755,78.8448 +1,75.4777,90.4245 +1,78.6354,96.6474 +0,52.348,60.7695 +1,94.0943,77.1591 +1,90.4486,87.5088 +0,55.4822,35.5707 +1,74.4927,84.8451 +1,89.8458,45.3583 +1,83.4892,48.3803 +1,42.2617,87.1039 +1,99.315,68.7754 +1,55.34,64.9319 +1,74.7759,89.5298 diff --git a/SchmidtSteinmann/tree.py b/SchmidtSteinmann/tree.py new file mode 100644 index 0000000..5452c3b --- /dev/null +++ b/SchmidtSteinmann/tree.py @@ -0,0 +1,155 @@ +import math +import numpy as np +import pandas as pd +import random as rd +import matplotlib.pyplot as plt + +from typing import Tuple +from sklearn import tree +from sklearn import metrics +from scipy.special import expit +from sklearn.model_selection import train_test_split + +ETA = 0.5 +NBNEURON = 10 +ACTIVATION = lambda x : (1/(1 + np.exp(-x))) + +per_lst = [] + +def getNewWeight(weight: float, result: float, neuron_value: float, expected_value: int) -> float: + return weight + ETA * (expected_value - result) * result * (1 - result) * neuron_value + +def getSlope(weights: list[float]) -> Tuple[float, float]: + pente = -weights[0]/weights[1] + origine = -weights[2]/weights[1] + + return pente, origine + +def gradient(): + # -somme(t-o)o(1-o)x + pass + +def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: + predicted = data["success"].tolist() + weights_hidden = weights + for __ in range(2000): + for idx, row in data.iterrows(): + pente, origine = getSlope(weights) + predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 + + neuron_values = (row["grade_1"], row["grade_2"]) + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden[:-1])) + weights_hidden[-1] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[-1] + out = activation(neuron_sum) + + for i in range(0, len(weights)-1): + weights[i] = getNewWeight(weights[i], out, neuron_values[i], row["success"]) + weights[2] = getNewWeight(weights[2], out, 1, row["success"]) + print(math.sqrt(np.square(np.subtract(data["success"], predicted)).mean())) + + return weights + +def updateDfToNormalized(df: pd.DataFrame) -> None: + for name, vals in df.items(): + if name == "success": + continue + m = np.mean(vals) + e = np.std(vals) + for idx, item in vals.items(): + df.at[idx,name] = ((item - m) / e) + +def abline(slope: float, intercept: float) -> None: + axes = plt.gca() + x_vals = np.array(axes.get_xlim()) + y_vals = intercept + slope * x_vals + plt.plot(x_vals, y_vals, c="red", label="Droite de séparation") + +def split(df: pd.DataFrame) -> Tuple[list[Tuple[int, int]], list[Tuple[int, int]]]: + set1 = [] + set2 = [] + for _, row in df.iterrows(): + if row["success"]: + set1.append((row["grade_1"], row["grade_2"])) + else: + set2.append((row["grade_1"], row["grade_2"])) + return set1, set2 + +def decisionTree(data): + x,y = data.data, data.target #TODO correct this shit + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42) # 50% training and 50% test + + entro = tree.DecisionTreeClassifier(criterion="entropy", max_depth=100, min_samples_split=2, min_samples_leaf=1).fit(X_train, y_train) + y_pred_entro = entro.predict(X_test), + accuracy_entro = metrics.accuracy_score(y_test, y_pred_entro) + confus = metrics.confusion_matrix(y_test, y_pred_entro) + printResTree(y_test, y_pred_entro, accuracy_entro,confus, data) + +def printResTree(y_test, y_pred, accuracy, confus, data): + correct = [0, 0, 0] + wrong = [0, 0, 0] + total = np.bincount(y_test) + + print("Real - Pred") + for i in range(len(y_test)): + res = "" + if y_test[i] == y_pred[i]: + res = "O" + correct[y_test[i]] += 1 + else: + res = "X" + wrong[y_pred[i]] += 1 + + print(" " , y_test[i], " - ", y_pred[i], " -> ", res) + print("") + + print("Res:") + for j in range(len(data.target_names)): + print(j,"-", data.target_names[j], ":", correct[j], "/", total[j], " correct val -", wrong[j], "wrong val") + print("") + + cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confus,display_labels=data.target_names) + cm_display.plot() + plt.show() + +def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weights: list[float]) -> None: + plt.scatter(*zip(*set1), c='skyblue', marker='d', label="Passed") + plt.scatter(*zip(*set2), c='k', marker='o', label="Failed") + pente = -new_weights[0]/new_weights[1] + origine = -new_weights[2]/new_weights[1] + + misses = 0 + for point in set1: + if point[1] < pente*point[0]+origine: + misses += 1 + + for point in set2: + if point[1] > pente*point[0]+origine: + misses += 1 + + misses_percent = misses / 100 * (len(set1) + len(set2)) + print(f"Pente : {pente}, Origine : {origine}, Accuracy : {100-misses_percent}%") + + abline(pente, origine) + plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) + plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) + plt.title("Multilayer Perceptron") + plt.xlabel("Grade 1 - Normalisé") + plt.ylabel("Grade 2 - Normalisé") + plt.legend(loc='upper center', shadow=True, fontsize='x-large') + plt.show() + + +if __name__ == '__main__': + df = pd.read_csv("./student-data-train.csv") + nb_set = len(df) + weights = [rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)] + + updateDfToNormalized(df) + new_weights = perceptron(df, weights, ACTIVATION) + + x = np.arange(0, nb_set) + set1, set2 = split(df) + showData(set1, set2, new_weights) + #decisionTree(df) diff --git a/SchmidtSteinmann/tree_v2.py b/SchmidtSteinmann/tree_v2.py new file mode 100644 index 0000000..5729782 --- /dev/null +++ b/SchmidtSteinmann/tree_v2.py @@ -0,0 +1,158 @@ +import math +import numpy as np +import pandas as pd +import random as rd +import matplotlib.pyplot as plt + +from typing import Tuple +from sklearn import tree +from sklearn import metrics +from scipy.special import expit +from sklearn.model_selection import train_test_split + +ETA = 0.5 +NBNEURON = 10 +ACTIVATION = lambda x : (1/(1 + np.exp(-x))) + +per_lst = [] + +def getNewWeight(weight: float, result: float, neuron_value: float, expected_value: int) -> float: + return weight + ETA * (expected_value - result) * result * (1 - result) * neuron_value + +def getSlope(weights: list[float]) -> Tuple[float, float]: + pente = -weights[0]/weights[1] + origine = -weights[2]/weights[1] + + return pente, origine + +def gradient(): + # -somme(t-o)o(1-o)x + pass + +def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: + predicted = data["success"].tolist() + weights_hidden = [] + for __ in range(NBNEURON): + weights_hidden.append([rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)]) + + for __ in range(2000): + for idx, row in data.iterrows(): + pente, origine = getSlope(weights_hidden) + predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 + + neuron_values = (row["grade_1"], row["grade_2"]) + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden)) + weights_hidden[2] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[2] + out = activation(neuron_sum) + + for i in range(0, len(weights)-1): + weights[i] = getNewWeight(weights[i], out, neuron_values[i], row["success"]) + weights[2] = getNewWeight(weights[2], out, 1, row["success"]) + print(math.sqrt(np.square(np.subtract(data["success"], predicted)).mean())) + + return weights + +def updateDfToNormalized(df: pd.DataFrame) -> None: + for name, vals in df.items(): + if name == "success": + continue + m = np.mean(vals) + e = np.std(vals) + for idx, item in vals.items(): + df.at[idx,name] = ((item - m) / e) + +def abline(slope: float, intercept: float) -> None: + axes = plt.gca() + x_vals = np.array(axes.get_xlim()) + y_vals = intercept + slope * x_vals + plt.plot(x_vals, y_vals, c="red", label="Droite de séparation") + +def split(df: pd.DataFrame) -> Tuple[list[Tuple[int, int]], list[Tuple[int, int]]]: + set1 = [] + set2 = [] + for _, row in df.iterrows(): + if row["success"]: + set1.append((row["grade_1"], row["grade_2"])) + else: + set2.append((row["grade_1"], row["grade_2"])) + return set1, set2 + +def decisionTree(data): + x,y = data.data, data.target #TODO correct this shit + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42) # 50% training and 50% test + + entro = tree.DecisionTreeClassifier(criterion="entropy", max_depth=100, min_samples_split=2, min_samples_leaf=1).fit(X_train, y_train) + y_pred_entro = entro.predict(X_test), + accuracy_entro = metrics.accuracy_score(y_test, y_pred_entro) + confus = metrics.confusion_matrix(y_test, y_pred_entro) + printResTree(y_test, y_pred_entro, accuracy_entro,confus, data) + +def printResTree(y_test, y_pred, accuracy, confus, data): + correct = [0, 0, 0] + wrong = [0, 0, 0] + total = np.bincount(y_test) + + print("Real - Pred") + for i in range(len(y_test)): + res = "" + if y_test[i] == y_pred[i]: + res = "O" + correct[y_test[i]] += 1 + else: + res = "X" + wrong[y_pred[i]] += 1 + + print(" " , y_test[i], " - ", y_pred[i], " -> ", res) + print("") + + print("Res:") + for j in range(len(data.target_names)): + print(j,"-", data.target_names[j], ":", correct[j], "/", total[j], " correct val -", wrong[j], "wrong val") + print("") + + cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confus,display_labels=data.target_names) + cm_display.plot() + plt.show() + +def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weights: list[float]) -> None: + plt.scatter(*zip(*set1), c='skyblue', marker='d', label="Passed") + plt.scatter(*zip(*set2), c='k', marker='o', label="Failed") + pente = -new_weights[0]/new_weights[1] + origine = -new_weights[2]/new_weights[1] + + misses = 0 + for point in set1: + if point[1] < pente*point[0]+origine: + misses += 1 + + for point in set2: + if point[1] > pente*point[0]+origine: + misses += 1 + + misses_percent = misses / 100 * (len(set1) + len(set2)) + print(f"Pente : {pente}, Origine : {origine}, Accuracy : {100-misses_percent}%") + + abline(pente, origine) + plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) + plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) + plt.title("Multilayer Perceptron") + plt.xlabel("Grade 1 - Normalisé") + plt.ylabel("Grade 2 - Normalisé") + plt.legend(loc='upper center', shadow=True, fontsize='x-large') + plt.show() + + +if __name__ == '__main__': + df = pd.read_csv("./student-data-train.csv") + nb_set = len(df) + weights = np.random.uniform(-0.5, 0.5, NBNEURON) + + updateDfToNormalized(df) + new_weights = perceptron(df, weights, ACTIVATION) + + x = np.arange(0, nb_set) + set1, set2 = split(df) + showData(set1, set2, new_weights) + #decisionTree(df) diff --git a/tree.py b/tree.py index a1c37e7..5452c3b 100644 --- a/tree.py +++ b/tree.py @@ -25,23 +25,23 @@ def getSlope(weights: list[float]) -> Tuple[float, float]: return pente, origine -def neuron(data: pd.DataFrame, weights: list[float], activation): - for i in range(NBNEURON): - perceptron(data, weights, activation) - def gradient(): - #somme (t-o)o(1-o)x + # -somme(t-o)o(1-o)x pass def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: predicted = data["success"].tolist() - for __ in range(0, 2000): + weights_hidden = weights + for __ in range(2000): for idx, row in data.iterrows(): pente, origine = getSlope(weights) predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 neuron_values = (row["grade_1"], row["grade_2"]) - neuron_sum = np.sum(np.multiply(neuron_values, weights[:-1])) + weights[-1] + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden[:-1])) + weights_hidden[-1] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[-1] out = activation(neuron_sum) for i in range(0, len(weights)-1): @@ -134,7 +134,7 @@ def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weigh abline(pente, origine) plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) - plt.title("Perceptron") + plt.title("Multilayer Perceptron") plt.xlabel("Grade 1 - Normalisé") plt.ylabel("Grade 2 - Normalisé") plt.legend(loc='upper center', shadow=True, fontsize='x-large') @@ -147,7 +147,6 @@ if __name__ == '__main__': weights = [rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)] updateDfToNormalized(df) - new_weights = perceptron(df, weights, ACTIVATION) x = np.arange(0, nb_set) diff --git a/tree_v2.py b/tree_v2.py new file mode 100644 index 0000000..5729782 --- /dev/null +++ b/tree_v2.py @@ -0,0 +1,158 @@ +import math +import numpy as np +import pandas as pd +import random as rd +import matplotlib.pyplot as plt + +from typing import Tuple +from sklearn import tree +from sklearn import metrics +from scipy.special import expit +from sklearn.model_selection import train_test_split + +ETA = 0.5 +NBNEURON = 10 +ACTIVATION = lambda x : (1/(1 + np.exp(-x))) + +per_lst = [] + +def getNewWeight(weight: float, result: float, neuron_value: float, expected_value: int) -> float: + return weight + ETA * (expected_value - result) * result * (1 - result) * neuron_value + +def getSlope(weights: list[float]) -> Tuple[float, float]: + pente = -weights[0]/weights[1] + origine = -weights[2]/weights[1] + + return pente, origine + +def gradient(): + # -somme(t-o)o(1-o)x + pass + +def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: + predicted = data["success"].tolist() + weights_hidden = [] + for __ in range(NBNEURON): + weights_hidden.append([rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)]) + + for __ in range(2000): + for idx, row in data.iterrows(): + pente, origine = getSlope(weights_hidden) + predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 + + neuron_values = (row["grade_1"], row["grade_2"]) + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden)) + weights_hidden[2] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[2] + out = activation(neuron_sum) + + for i in range(0, len(weights)-1): + weights[i] = getNewWeight(weights[i], out, neuron_values[i], row["success"]) + weights[2] = getNewWeight(weights[2], out, 1, row["success"]) + print(math.sqrt(np.square(np.subtract(data["success"], predicted)).mean())) + + return weights + +def updateDfToNormalized(df: pd.DataFrame) -> None: + for name, vals in df.items(): + if name == "success": + continue + m = np.mean(vals) + e = np.std(vals) + for idx, item in vals.items(): + df.at[idx,name] = ((item - m) / e) + +def abline(slope: float, intercept: float) -> None: + axes = plt.gca() + x_vals = np.array(axes.get_xlim()) + y_vals = intercept + slope * x_vals + plt.plot(x_vals, y_vals, c="red", label="Droite de séparation") + +def split(df: pd.DataFrame) -> Tuple[list[Tuple[int, int]], list[Tuple[int, int]]]: + set1 = [] + set2 = [] + for _, row in df.iterrows(): + if row["success"]: + set1.append((row["grade_1"], row["grade_2"])) + else: + set2.append((row["grade_1"], row["grade_2"])) + return set1, set2 + +def decisionTree(data): + x,y = data.data, data.target #TODO correct this shit + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42) # 50% training and 50% test + + entro = tree.DecisionTreeClassifier(criterion="entropy", max_depth=100, min_samples_split=2, min_samples_leaf=1).fit(X_train, y_train) + y_pred_entro = entro.predict(X_test), + accuracy_entro = metrics.accuracy_score(y_test, y_pred_entro) + confus = metrics.confusion_matrix(y_test, y_pred_entro) + printResTree(y_test, y_pred_entro, accuracy_entro,confus, data) + +def printResTree(y_test, y_pred, accuracy, confus, data): + correct = [0, 0, 0] + wrong = [0, 0, 0] + total = np.bincount(y_test) + + print("Real - Pred") + for i in range(len(y_test)): + res = "" + if y_test[i] == y_pred[i]: + res = "O" + correct[y_test[i]] += 1 + else: + res = "X" + wrong[y_pred[i]] += 1 + + print(" " , y_test[i], " - ", y_pred[i], " -> ", res) + print("") + + print("Res:") + for j in range(len(data.target_names)): + print(j,"-", data.target_names[j], ":", correct[j], "/", total[j], " correct val -", wrong[j], "wrong val") + print("") + + cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confus,display_labels=data.target_names) + cm_display.plot() + plt.show() + +def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weights: list[float]) -> None: + plt.scatter(*zip(*set1), c='skyblue', marker='d', label="Passed") + plt.scatter(*zip(*set2), c='k', marker='o', label="Failed") + pente = -new_weights[0]/new_weights[1] + origine = -new_weights[2]/new_weights[1] + + misses = 0 + for point in set1: + if point[1] < pente*point[0]+origine: + misses += 1 + + for point in set2: + if point[1] > pente*point[0]+origine: + misses += 1 + + misses_percent = misses / 100 * (len(set1) + len(set2)) + print(f"Pente : {pente}, Origine : {origine}, Accuracy : {100-misses_percent}%") + + abline(pente, origine) + plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) + plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) + plt.title("Multilayer Perceptron") + plt.xlabel("Grade 1 - Normalisé") + plt.ylabel("Grade 2 - Normalisé") + plt.legend(loc='upper center', shadow=True, fontsize='x-large') + plt.show() + + +if __name__ == '__main__': + df = pd.read_csv("./student-data-train.csv") + nb_set = len(df) + weights = np.random.uniform(-0.5, 0.5, NBNEURON) + + updateDfToNormalized(df) + new_weights = perceptron(df, weights, ACTIVATION) + + x = np.arange(0, nb_set) + set1, set2 = split(df) + showData(set1, set2, new_weights) + #decisionTree(df) -- GitLab