diff --git a/SchmidtSteinmann.zip b/SchmidtSteinmann.zip new file mode 100644 index 0000000000000000000000000000000000000000..79dea5ffd2f7bd1c1fe23382d914701b68eea390 Binary files /dev/null and b/SchmidtSteinmann.zip differ diff --git a/SchmidtSteinmann/student-data-test.csv b/SchmidtSteinmann/student-data-test.csv new file mode 100644 index 0000000000000000000000000000000000000000..3a0d6af377841e4e1bf0fb631b00648e922e5498 --- /dev/null +++ b/SchmidtSteinmann/student-data-test.csv @@ -0,0 +1,101 @@ +success,grade_1,grade_2 +0,39.1963,78.5303 +1,40.4485,86.8395 +0,65.5719,44.3035 +1,79.6481,70.8066 +0,66.2602,41.6727 +1,97.6637,68.3249 +0,30.5488,57.3185 +1,89.4732,85.9468 +0,50.9309,34.2358 +1,39.7929,83.4247 +0,47.4544,43.4024 +1,69.975,84.4084 +0,66.5791,42.1357 +1,85.0587,54.3103 +0,66.5045,46.5154 +1,75.6727,93.7901 +0,30.5896,71.5884 +1,43.2175,83.5596 +0,58.0402,39.4724 +1,40.158,94.2887 +0,65.4079,39.872 +1,58.2539,64.9645 +0,90.0515,34.031 +1,72.2487,90.1078 +0,32.7323,98.4927 +1,74.0641,66.9625 +0,30.0749,56.5131 +1,87.572,68.1501 +0,54.562,49.5424 +1,78.309,72.2327 +0,57.8703,48.5142 +1,91.3575,85.6202 +0,32.8994,68.8984 +1,75.9627,73.3708 +0,49.7378,59.1349 +1,73.5545,66.0414 +0,34.2051,72.6251 +1,54.4923,75.5097 +0,48.5071,47.746 +1,92.3877,76.8295 +0,39.8972,62.0987 +1,75.7688,43.6375 +0,32.9389,75.696 +1,44.5334,86.442 +0,51.2656,60.1213 +1,70.7878,84.2462 +0,28.9464,39.5992 +1,47.5371,73.6289 +0,49.0241,48.504 +1,78.3707,93.9148 +0,48.807,62.2066 +1,72.0392,88.5636 +0,31.2363,96.3053 +1,51.5616,89.1555 +0,65.09,39.4882 +1,81.7598,47.952 +0,46.467,43.1749 +1,64.496,82.2082 +0,65.5995,42.7966 +1,50.6678,64.2266 +0,30.6653,42.7069 +1,76.6023,65.6216 +0,60.3982,38.5427 +1,80.7499,47.9425 +0,81.8373,39.6295 +1,76.6719,73.004 +0,31.7026,73.4485 +1,89.7585,65.1794 +0,31.1113,77.9068 +1,56.3601,68.8154 +0,47.3655,59.2683 +1,81.997,55.4778 +0,73.1963,28.3999 +1,50.2859,85.686 +0,30.5329,77.174 +1,66.6274,65.141 +0,30.5638,44.1596 +1,69.3048,90.1573 +0,40.631,61.4716 +1,67.5189,76.709 +0,33.6945,43.962 +1,54.6194,73.6004 +0,29.9562,91.6003 +1,59.5618,81.8905 +0,29.0975,92.016 +1,87.7544,65.2841 +0,79.147,40.1185 +1,74.4849,92.3425 +0,26.3324,44.9552 +1,54.3469,58.4329 +0,29.9471,93.0608 +1,96.3263,64.8035 +0,29.8645,73.1155 +1,62.2263,57.8496 +0,35.2611,72.8553 +1,47.3407,69.4123 +0,63.1953,36.9634 +1,59.4646,72.4025 +0,60.0839,42.4864 +1,57.453,73.6793 diff --git a/SchmidtSteinmann/student-data-train.csv b/SchmidtSteinmann/student-data-train.csv new file mode 100644 index 0000000000000000000000000000000000000000..87954be0a44c33cdc434e85215ef57e59786b701 --- /dev/null +++ b/SchmidtSteinmann/student-data-train.csv @@ -0,0 +1,101 @@ +success,grade_1,grade_2 +0,34.6237,78.0247 +0,30.2867,43.895 +0,35.8474,72.9022 +1,60.1826,86.3086 +1,79.0327,75.3444 +0,45.0833,56.3164 +1,61.1067,96.5114 +1,75.0247,46.554 +1,76.0988,87.4206 +1,84.4328,43.5334 +0,95.8616,38.2253 +0,75.0137,30.6033 +1,82.3071,76.482 +1,69.3646,97.7187 +0,39.5383,76.0368 +1,53.9711,89.2074 +1,69.0701,52.7405 +0,67.9469,46.6786 +1,70.6615,92.9271 +1,76.9788,47.576 +0,67.372,42.8384 +1,89.6768,65.7994 +0,50.5348,48.8558 +0,34.2121,44.2095 +1,77.9241,68.9724 +1,62.271,69.9545 +1,80.1902,44.8216 +0,93.1144,38.8007 +0,61.8302,50.2561 +0,38.7858,64.9957 +1,61.3793,72.8079 +1,85.4045,57.052 +0,52.108,63.1276 +1,52.0454,69.4329 +0,40.2369,71.1677 +0,54.6351,52.2139 +0,33.9155,98.8694 +1,64.177,80.9081 +0,74.7893,41.5734 +0,34.1836,75.2377 +1,83.9024,56.308 +0,51.5477,46.8563 +1,94.4434,65.5689 +0,82.3688,40.6183 +0,51.0478,45.8227 +0,62.2227,52.061 +1,77.193,70.4582 +1,97.7716,86.7278 +1,62.0731,96.7688 +1,91.565,88.6963 +1,79.9448,74.1631 +1,99.2725,60.999 +1,90.5467,43.3906 +0,34.5245,60.3963 +0,50.2865,49.8045 +0,49.5867,59.809 +1,97.6456,68.8616 +0,32.5772,95.5985 +1,74.2487,69.8246 +1,71.7965,78.4536 +1,75.3956,85.7599 +0,35.2861,47.0205 +0,56.2538,39.2615 +0,30.0588,49.593 +0,44.6683,66.4501 +0,66.5609,41.0921 +1,40.4576,97.5352 +0,49.0726,51.8832 +1,80.2796,92.1161 +1,66.7467,60.9914 +0,32.7228,43.3072 +1,64.0393,78.0317 +1,72.3465,96.2276 +1,60.4579,73.095 +1,58.841,75.8584 +1,99.8279,72.3693 +1,47.2643,88.4759 +1,50.4582,75.8099 +0,60.4556,42.5084 +0,82.2267,42.7199 +1,88.9139,69.8038 +1,94.8345,45.6943 +1,67.3193,66.5894 +1,57.2387,59.5143 +1,80.3668,90.9601 +1,68.4685,85.5943 +0,42.0755,78.8448 +1,75.4777,90.4245 +1,78.6354,96.6474 +0,52.348,60.7695 +1,94.0943,77.1591 +1,90.4486,87.5088 +0,55.4822,35.5707 +1,74.4927,84.8451 +1,89.8458,45.3583 +1,83.4892,48.3803 +1,42.2617,87.1039 +1,99.315,68.7754 +1,55.34,64.9319 +1,74.7759,89.5298 diff --git a/SchmidtSteinmann/tree.py b/SchmidtSteinmann/tree.py new file mode 100644 index 0000000000000000000000000000000000000000..5452c3b7050852dfd93bf3bc9125931cabf8de11 --- /dev/null +++ b/SchmidtSteinmann/tree.py @@ -0,0 +1,155 @@ +import math +import numpy as np +import pandas as pd +import random as rd +import matplotlib.pyplot as plt + +from typing import Tuple +from sklearn import tree +from sklearn import metrics +from scipy.special import expit +from sklearn.model_selection import train_test_split + +ETA = 0.5 +NBNEURON = 10 +ACTIVATION = lambda x : (1/(1 + np.exp(-x))) + +per_lst = [] + +def getNewWeight(weight: float, result: float, neuron_value: float, expected_value: int) -> float: + return weight + ETA * (expected_value - result) * result * (1 - result) * neuron_value + +def getSlope(weights: list[float]) -> Tuple[float, float]: + pente = -weights[0]/weights[1] + origine = -weights[2]/weights[1] + + return pente, origine + +def gradient(): + # -somme(t-o)o(1-o)x + pass + +def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: + predicted = data["success"].tolist() + weights_hidden = weights + for __ in range(2000): + for idx, row in data.iterrows(): + pente, origine = getSlope(weights) + predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 + + neuron_values = (row["grade_1"], row["grade_2"]) + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden[:-1])) + weights_hidden[-1] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[-1] + out = activation(neuron_sum) + + for i in range(0, len(weights)-1): + weights[i] = getNewWeight(weights[i], out, neuron_values[i], row["success"]) + weights[2] = getNewWeight(weights[2], out, 1, row["success"]) + print(math.sqrt(np.square(np.subtract(data["success"], predicted)).mean())) + + return weights + +def updateDfToNormalized(df: pd.DataFrame) -> None: + for name, vals in df.items(): + if name == "success": + continue + m = np.mean(vals) + e = np.std(vals) + for idx, item in vals.items(): + df.at[idx,name] = ((item - m) / e) + +def abline(slope: float, intercept: float) -> None: + axes = plt.gca() + x_vals = np.array(axes.get_xlim()) + y_vals = intercept + slope * x_vals + plt.plot(x_vals, y_vals, c="red", label="Droite de séparation") + +def split(df: pd.DataFrame) -> Tuple[list[Tuple[int, int]], list[Tuple[int, int]]]: + set1 = [] + set2 = [] + for _, row in df.iterrows(): + if row["success"]: + set1.append((row["grade_1"], row["grade_2"])) + else: + set2.append((row["grade_1"], row["grade_2"])) + return set1, set2 + +def decisionTree(data): + x,y = data.data, data.target #TODO correct this shit + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42) # 50% training and 50% test + + entro = tree.DecisionTreeClassifier(criterion="entropy", max_depth=100, min_samples_split=2, min_samples_leaf=1).fit(X_train, y_train) + y_pred_entro = entro.predict(X_test), + accuracy_entro = metrics.accuracy_score(y_test, y_pred_entro) + confus = metrics.confusion_matrix(y_test, y_pred_entro) + printResTree(y_test, y_pred_entro, accuracy_entro,confus, data) + +def printResTree(y_test, y_pred, accuracy, confus, data): + correct = [0, 0, 0] + wrong = [0, 0, 0] + total = np.bincount(y_test) + + print("Real - Pred") + for i in range(len(y_test)): + res = "" + if y_test[i] == y_pred[i]: + res = "O" + correct[y_test[i]] += 1 + else: + res = "X" + wrong[y_pred[i]] += 1 + + print(" " , y_test[i], " - ", y_pred[i], " -> ", res) + print("") + + print("Res:") + for j in range(len(data.target_names)): + print(j,"-", data.target_names[j], ":", correct[j], "/", total[j], " correct val -", wrong[j], "wrong val") + print("") + + cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confus,display_labels=data.target_names) + cm_display.plot() + plt.show() + +def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weights: list[float]) -> None: + plt.scatter(*zip(*set1), c='skyblue', marker='d', label="Passed") + plt.scatter(*zip(*set2), c='k', marker='o', label="Failed") + pente = -new_weights[0]/new_weights[1] + origine = -new_weights[2]/new_weights[1] + + misses = 0 + for point in set1: + if point[1] < pente*point[0]+origine: + misses += 1 + + for point in set2: + if point[1] > pente*point[0]+origine: + misses += 1 + + misses_percent = misses / 100 * (len(set1) + len(set2)) + print(f"Pente : {pente}, Origine : {origine}, Accuracy : {100-misses_percent}%") + + abline(pente, origine) + plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) + plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) + plt.title("Multilayer Perceptron") + plt.xlabel("Grade 1 - Normalisé") + plt.ylabel("Grade 2 - Normalisé") + plt.legend(loc='upper center', shadow=True, fontsize='x-large') + plt.show() + + +if __name__ == '__main__': + df = pd.read_csv("./student-data-train.csv") + nb_set = len(df) + weights = [rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)] + + updateDfToNormalized(df) + new_weights = perceptron(df, weights, ACTIVATION) + + x = np.arange(0, nb_set) + set1, set2 = split(df) + showData(set1, set2, new_weights) + #decisionTree(df) diff --git a/SchmidtSteinmann/tree_v2.py b/SchmidtSteinmann/tree_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..5729782f31adfcc3de2e58918397533154e6375d --- /dev/null +++ b/SchmidtSteinmann/tree_v2.py @@ -0,0 +1,158 @@ +import math +import numpy as np +import pandas as pd +import random as rd +import matplotlib.pyplot as plt + +from typing import Tuple +from sklearn import tree +from sklearn import metrics +from scipy.special import expit +from sklearn.model_selection import train_test_split + +ETA = 0.5 +NBNEURON = 10 +ACTIVATION = lambda x : (1/(1 + np.exp(-x))) + +per_lst = [] + +def getNewWeight(weight: float, result: float, neuron_value: float, expected_value: int) -> float: + return weight + ETA * (expected_value - result) * result * (1 - result) * neuron_value + +def getSlope(weights: list[float]) -> Tuple[float, float]: + pente = -weights[0]/weights[1] + origine = -weights[2]/weights[1] + + return pente, origine + +def gradient(): + # -somme(t-o)o(1-o)x + pass + +def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: + predicted = data["success"].tolist() + weights_hidden = [] + for __ in range(NBNEURON): + weights_hidden.append([rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)]) + + for __ in range(2000): + for idx, row in data.iterrows(): + pente, origine = getSlope(weights_hidden) + predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 + + neuron_values = (row["grade_1"], row["grade_2"]) + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden)) + weights_hidden[2] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[2] + out = activation(neuron_sum) + + for i in range(0, len(weights)-1): + weights[i] = getNewWeight(weights[i], out, neuron_values[i], row["success"]) + weights[2] = getNewWeight(weights[2], out, 1, row["success"]) + print(math.sqrt(np.square(np.subtract(data["success"], predicted)).mean())) + + return weights + +def updateDfToNormalized(df: pd.DataFrame) -> None: + for name, vals in df.items(): + if name == "success": + continue + m = np.mean(vals) + e = np.std(vals) + for idx, item in vals.items(): + df.at[idx,name] = ((item - m) / e) + +def abline(slope: float, intercept: float) -> None: + axes = plt.gca() + x_vals = np.array(axes.get_xlim()) + y_vals = intercept + slope * x_vals + plt.plot(x_vals, y_vals, c="red", label="Droite de séparation") + +def split(df: pd.DataFrame) -> Tuple[list[Tuple[int, int]], list[Tuple[int, int]]]: + set1 = [] + set2 = [] + for _, row in df.iterrows(): + if row["success"]: + set1.append((row["grade_1"], row["grade_2"])) + else: + set2.append((row["grade_1"], row["grade_2"])) + return set1, set2 + +def decisionTree(data): + x,y = data.data, data.target #TODO correct this shit + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42) # 50% training and 50% test + + entro = tree.DecisionTreeClassifier(criterion="entropy", max_depth=100, min_samples_split=2, min_samples_leaf=1).fit(X_train, y_train) + y_pred_entro = entro.predict(X_test), + accuracy_entro = metrics.accuracy_score(y_test, y_pred_entro) + confus = metrics.confusion_matrix(y_test, y_pred_entro) + printResTree(y_test, y_pred_entro, accuracy_entro,confus, data) + +def printResTree(y_test, y_pred, accuracy, confus, data): + correct = [0, 0, 0] + wrong = [0, 0, 0] + total = np.bincount(y_test) + + print("Real - Pred") + for i in range(len(y_test)): + res = "" + if y_test[i] == y_pred[i]: + res = "O" + correct[y_test[i]] += 1 + else: + res = "X" + wrong[y_pred[i]] += 1 + + print(" " , y_test[i], " - ", y_pred[i], " -> ", res) + print("") + + print("Res:") + for j in range(len(data.target_names)): + print(j,"-", data.target_names[j], ":", correct[j], "/", total[j], " correct val -", wrong[j], "wrong val") + print("") + + cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confus,display_labels=data.target_names) + cm_display.plot() + plt.show() + +def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weights: list[float]) -> None: + plt.scatter(*zip(*set1), c='skyblue', marker='d', label="Passed") + plt.scatter(*zip(*set2), c='k', marker='o', label="Failed") + pente = -new_weights[0]/new_weights[1] + origine = -new_weights[2]/new_weights[1] + + misses = 0 + for point in set1: + if point[1] < pente*point[0]+origine: + misses += 1 + + for point in set2: + if point[1] > pente*point[0]+origine: + misses += 1 + + misses_percent = misses / 100 * (len(set1) + len(set2)) + print(f"Pente : {pente}, Origine : {origine}, Accuracy : {100-misses_percent}%") + + abline(pente, origine) + plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) + plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) + plt.title("Multilayer Perceptron") + plt.xlabel("Grade 1 - Normalisé") + plt.ylabel("Grade 2 - Normalisé") + plt.legend(loc='upper center', shadow=True, fontsize='x-large') + plt.show() + + +if __name__ == '__main__': + df = pd.read_csv("./student-data-train.csv") + nb_set = len(df) + weights = np.random.uniform(-0.5, 0.5, NBNEURON) + + updateDfToNormalized(df) + new_weights = perceptron(df, weights, ACTIVATION) + + x = np.arange(0, nb_set) + set1, set2 = split(df) + showData(set1, set2, new_weights) + #decisionTree(df) diff --git a/tree.py b/tree.py index a1c37e79e7dbf1bdd01df7cc5cef431b2fe04d59..5452c3b7050852dfd93bf3bc9125931cabf8de11 100644 --- a/tree.py +++ b/tree.py @@ -25,23 +25,23 @@ def getSlope(weights: list[float]) -> Tuple[float, float]: return pente, origine -def neuron(data: pd.DataFrame, weights: list[float], activation): - for i in range(NBNEURON): - perceptron(data, weights, activation) - def gradient(): - #somme (t-o)o(1-o)x + # -somme(t-o)o(1-o)x pass def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: predicted = data["success"].tolist() - for __ in range(0, 2000): + weights_hidden = weights + for __ in range(2000): for idx, row in data.iterrows(): pente, origine = getSlope(weights) predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 neuron_values = (row["grade_1"], row["grade_2"]) - neuron_sum = np.sum(np.multiply(neuron_values, weights[:-1])) + weights[-1] + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden[:-1])) + weights_hidden[-1] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[-1] out = activation(neuron_sum) for i in range(0, len(weights)-1): @@ -134,7 +134,7 @@ def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weigh abline(pente, origine) plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) - plt.title("Perceptron") + plt.title("Multilayer Perceptron") plt.xlabel("Grade 1 - Normalisé") plt.ylabel("Grade 2 - Normalisé") plt.legend(loc='upper center', shadow=True, fontsize='x-large') @@ -147,7 +147,6 @@ if __name__ == '__main__': weights = [rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)] updateDfToNormalized(df) - new_weights = perceptron(df, weights, ACTIVATION) x = np.arange(0, nb_set) diff --git a/tree_v2.py b/tree_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..5729782f31adfcc3de2e58918397533154e6375d --- /dev/null +++ b/tree_v2.py @@ -0,0 +1,158 @@ +import math +import numpy as np +import pandas as pd +import random as rd +import matplotlib.pyplot as plt + +from typing import Tuple +from sklearn import tree +from sklearn import metrics +from scipy.special import expit +from sklearn.model_selection import train_test_split + +ETA = 0.5 +NBNEURON = 10 +ACTIVATION = lambda x : (1/(1 + np.exp(-x))) + +per_lst = [] + +def getNewWeight(weight: float, result: float, neuron_value: float, expected_value: int) -> float: + return weight + ETA * (expected_value - result) * result * (1 - result) * neuron_value + +def getSlope(weights: list[float]) -> Tuple[float, float]: + pente = -weights[0]/weights[1] + origine = -weights[2]/weights[1] + + return pente, origine + +def gradient(): + # -somme(t-o)o(1-o)x + pass + +def perceptron(data: pd.DataFrame, weights: list[float], activation) -> list[float]: + predicted = data["success"].tolist() + weights_hidden = [] + for __ in range(NBNEURON): + weights_hidden.append([rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5), rd.uniform(-0.5, 0.5)]) + + for __ in range(2000): + for idx, row in data.iterrows(): + pente, origine = getSlope(weights_hidden) + predicted[idx] = 1 if row["grade_2"] > pente * row["grade_1"] + origine else 0 + + neuron_values = (row["grade_1"], row["grade_2"]) + hidden_sum = np.sum(np.multiply(neuron_values, weights_hidden)) + weights_hidden[2] + out_hidden = activation(hidden_sum) + + neuron_sum = np.sum(np.multiply(out_hidden, weights[:-1])) + weights[2] + out = activation(neuron_sum) + + for i in range(0, len(weights)-1): + weights[i] = getNewWeight(weights[i], out, neuron_values[i], row["success"]) + weights[2] = getNewWeight(weights[2], out, 1, row["success"]) + print(math.sqrt(np.square(np.subtract(data["success"], predicted)).mean())) + + return weights + +def updateDfToNormalized(df: pd.DataFrame) -> None: + for name, vals in df.items(): + if name == "success": + continue + m = np.mean(vals) + e = np.std(vals) + for idx, item in vals.items(): + df.at[idx,name] = ((item - m) / e) + +def abline(slope: float, intercept: float) -> None: + axes = plt.gca() + x_vals = np.array(axes.get_xlim()) + y_vals = intercept + slope * x_vals + plt.plot(x_vals, y_vals, c="red", label="Droite de séparation") + +def split(df: pd.DataFrame) -> Tuple[list[Tuple[int, int]], list[Tuple[int, int]]]: + set1 = [] + set2 = [] + for _, row in df.iterrows(): + if row["success"]: + set1.append((row["grade_1"], row["grade_2"])) + else: + set2.append((row["grade_1"], row["grade_2"])) + return set1, set2 + +def decisionTree(data): + x,y = data.data, data.target #TODO correct this shit + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42) # 50% training and 50% test + + entro = tree.DecisionTreeClassifier(criterion="entropy", max_depth=100, min_samples_split=2, min_samples_leaf=1).fit(X_train, y_train) + y_pred_entro = entro.predict(X_test), + accuracy_entro = metrics.accuracy_score(y_test, y_pred_entro) + confus = metrics.confusion_matrix(y_test, y_pred_entro) + printResTree(y_test, y_pred_entro, accuracy_entro,confus, data) + +def printResTree(y_test, y_pred, accuracy, confus, data): + correct = [0, 0, 0] + wrong = [0, 0, 0] + total = np.bincount(y_test) + + print("Real - Pred") + for i in range(len(y_test)): + res = "" + if y_test[i] == y_pred[i]: + res = "O" + correct[y_test[i]] += 1 + else: + res = "X" + wrong[y_pred[i]] += 1 + + print(" " , y_test[i], " - ", y_pred[i], " -> ", res) + print("") + + print("Res:") + for j in range(len(data.target_names)): + print(j,"-", data.target_names[j], ":", correct[j], "/", total[j], " correct val -", wrong[j], "wrong val") + print("") + + cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confus,display_labels=data.target_names) + cm_display.plot() + plt.show() + +def showData(set1: list[Tuple[int, int]], set2: list[Tuple[int, int]], new_weights: list[float]) -> None: + plt.scatter(*zip(*set1), c='skyblue', marker='d', label="Passed") + plt.scatter(*zip(*set2), c='k', marker='o', label="Failed") + pente = -new_weights[0]/new_weights[1] + origine = -new_weights[2]/new_weights[1] + + misses = 0 + for point in set1: + if point[1] < pente*point[0]+origine: + misses += 1 + + for point in set2: + if point[1] > pente*point[0]+origine: + misses += 1 + + misses_percent = misses / 100 * (len(set1) + len(set2)) + print(f"Pente : {pente}, Origine : {origine}, Accuracy : {100-misses_percent}%") + + abline(pente, origine) + plt.xlim(float(min(df["grade_1"]))-0.2, max(df["grade_1"]) + 0.2) + plt.ylim(float(min(df["grade_2"]))-0.2, max(df["grade_2"]) + 0.2) + plt.title("Multilayer Perceptron") + plt.xlabel("Grade 1 - Normalisé") + plt.ylabel("Grade 2 - Normalisé") + plt.legend(loc='upper center', shadow=True, fontsize='x-large') + plt.show() + + +if __name__ == '__main__': + df = pd.read_csv("./student-data-train.csv") + nb_set = len(df) + weights = np.random.uniform(-0.5, 0.5, NBNEURON) + + updateDfToNormalized(df) + new_weights = perceptron(df, weights, ACTIVATION) + + x = np.arange(0, nb_set) + set1, set2 = split(df) + showData(set1, set2, new_weights) + #decisionTree(df)