Skip to content
Snippets Groups Projects
Commit 6a39a168 authored by Kevin Bonga's avatar Kevin Bonga
Browse files

adding course 6 & 7 pdf files

parent 21eef806
No related branches found
No related tags found
No related merge requests found
File added
File added
......@@ -3,37 +3,49 @@ from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
# Charger les données Iris
data = pd.read_csv('resources/datas/iris.csv')
# Specify the column names
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
# Séparer les caractéristiques et les étiquettes
# Load the Iris data without header and specify column names
data = pd.read_csv('resources/datas/iris.csv', header=None, names=column_names)
# Separate features and labels
X = data.drop(columns=['species'])
y = data['species']
# Diviser les données en ensembles d'entraînement et de test
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Initialiser les paramètres à tester
# Initialize parameters to test
parameters = {
'min_samples_leaf': [1, 2, 4, 6, 8, 10],
'max_depth': [None, 5, 10, 15, 20]
}
if __name__ == "__main__":
# Effectuer la validation croisée et mesurer le taux de classifications correctes
best_clf = None
best_score = 0
# Perform cross-validation and measure classification accuracy
for min_samples_leaf in parameters['min_samples_leaf']:
for max_depth in parameters['max_depth']:
clf = DecisionTreeClassifier(min_samples_leaf=min_samples_leaf, max_depth=max_depth, random_state=42)
clf.fit(X_train, y_train)
# Validation croisée
# Cross-validation
train_scores = cross_val_score(clf, X_train, y_train, cv=5)
test_scores = cross_val_score(clf, X_test, y_test, cv=5)
print(f"min_samples_leaf: {min_samples_leaf}, max_depth: {max_depth}")
print(f"Train accuracy: {train_scores.mean():.2f}, Test accuracy: {test_scores.mean():.2f}\n")
# Visualiser l'arbre de décision
# Save the best classifier
if test_scores.mean() > best_score:
best_score = test_scores.mean()
best_clf = clf
# Visualize the best decision tree
if best_clf:
plt.figure(figsize=(20,10))
plot_tree(clf, filled=True, feature_names=X.columns, class_names=y.unique())
plot_tree(best_clf, filled=True, feature_names=X.columns, class_names=y.unique())
plt.show()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment