From b0da2dcda6f975e77d92340bf3b406d8e47a2e25 Mon Sep 17 00:00:00 2001 From: "dario.genga" <dario.genga@etu.hesge.ch> Date: Tue, 7 Jun 2022 15:11:19 +0200 Subject: [PATCH] Add export of data from universe Added a 'dimensions' property for the universe in order to know the size of a data. This is required for saving each dimension value of a point. Added the signature of the 'remove_string_trailing_newline' method in the 'files_utils.h' file. Updated the 'open_file' method by adding a mode parameter. Renamed some variables for more clarity. --- files_utils.c | 19 ++++++++----------- files_utils.h | 7 ++++++- kmeans.c | 43 ++++++++++++++++++++++++++++++++++++++----- kmeans.h | 4 ++++ main.c | 18 ++++++++++++++++++ output_data.txt | 10 ++++++++++ 6 files changed, 84 insertions(+), 17 deletions(-) create mode 100644 output_data.txt diff --git a/files_utils.c b/files_utils.c index 9a8738f..567e56b 100644 --- a/files_utils.c +++ b/files_utils.c @@ -4,38 +4,35 @@ #include "files_utils.h" -FILE* open_file(char *file_path) { +FILE* open_file(char *file_path, char *mode) { // Open the file - FILE *fp = fopen(file_path, "r"); - if (fp == NULL) + FILE *file = fopen(file_path, mode); + if (file == NULL) { perror("Error while opening the file.\n"); exit(EXIT_FAILURE); } - return fp; + return file; } int count_file_lines(char *file_path) { - FILE *fp = open_file(file_path); + FILE *file = open_file(file_path, "r"); int current_line = 0; char *line = NULL; - size_t len = 0; + size_t buffer_size = 0; - while(getline(&line, &len, fp) != -1) { + while(getline(&line, &buffer_size, file) != -1) { // Count the lines that are not empty if (strcmp(line, "\n") != 0) { current_line++; } } - fclose(fp); + fclose(file); free(line); return current_line; } -/// Remove the newline character in the string. The char must be at the end of the line. -/// \param str The string to modify. -/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/ void remove_string_trailing_newline(char *str) { if (str == NULL) return; diff --git a/files_utils.h b/files_utils.h index 3d78c57..45ffb24 100644 --- a/files_utils.h +++ b/files_utils.h @@ -12,11 +12,16 @@ /// Open a file. /// \param file_path The path to the file. /// \return The pointer of the FILE object. -FILE *open_file(char *file_path); +FILE *open_file(char *file_path, char *mode); /// Count the number of lines in the file. /// \param file_path The path to the file. /// \return The number of line in the file. int count_file_lines(char *file_path); +/// Remove the newline character in the string. The char must be at the end of the line. +/// \param str The string to modify. +/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/ +void remove_string_trailing_newline(char *str); + #endif diff --git a/kmeans.c b/kmeans.c index fcf9aa7..3d2547f 100644 --- a/kmeans.c +++ b/kmeans.c @@ -8,6 +8,7 @@ kmeans* kmeans_create_empty() { universe->points_array = NULL; universe->nb_points = 0; universe->k = 0; + universe->dimensions = 0; universe->clusters_array = NULL; return universe; @@ -101,26 +102,27 @@ void init_from_cmd_arguments(kmeans *universe) { void read_data_source(kmeans* universe, char* source_file) { char *line = NULL; - size_t len = 0; + size_t buffer_size = 0; ssize_t line_length; int current_line = 0; int dimensions = 0; int point_index = 0; - // Count the number of points + // Count the number of data (points) int nb_data = count_file_lines(source_file) - 2; // The two first line are not for the data universe->nb_points = nb_data; initialize_points_array(universe); // Open the file and read it, line by line - FILE *fp = open_file(source_file); - while((line_length = getline(&line, &len, fp)) != -1) { + FILE *file = open_file(source_file, "r"); + while((line_length = getline(&line, &buffer_size, file)) != -1) { // Remove newline char at end of line remove_string_trailing_newline(line); // Get the number of dimensions for each data if (current_line == LINE_INDEX_DIMENSIONS) { dimensions = (int)strtol(line, NULL, 10); + universe->dimensions = dimensions; } // Get the number of cluster if (current_line == LINE_INDEX_CLUSTER) { @@ -140,12 +142,43 @@ void read_data_source(kmeans* universe, char* source_file) { current_line++; } - fclose(fp); + // Close the file and free the memory + fclose(file); free(line); } void write_data_output(kmeans *universe, char* output_file) { + // Open the file + FILE *file = open_file(output_file, "w"); + // Write the number of dimensions + fprintf(file, "%d\n", universe->dimensions); + // Write the number of clusters + fprintf(file, "%d\n", universe->k); + + // Start writing the data of each cluster + for (int i = 0; i < universe->k; i++) { + // Prints the cluster symbol + fprintf(file, CLUSTER_SYMBOL); + fprintf(file, "\n"); + + for (int j = 0; j < universe->nb_points; j++) { + // Prints the point of the cluster + if (universe->points_array[j]->cluster == universe->clusters_array[i]) { + for (int d = 0; d < universe->dimensions; d++) { + fprintf(file, "%.2f", universe->points_array[j]->value[d]); + if (d < universe->dimensions - 1) { + fprintf(file, SEPARATOR); + } else { + fprintf(file, "\n"); + } + } + } + } + } + + // Close and save the file + fclose(file); } void init_clusters(kmeans *universe) { diff --git a/kmeans.h b/kmeans.h index f415495..69b86f9 100644 --- a/kmeans.h +++ b/kmeans.h @@ -14,6 +14,8 @@ #define LINE_INDEX_DIMENSIONS 0 #define LINE_INDEX_CLUSTER 1 #define LINE_INDEX_CONTENT 2 +#define CLUSTER_SYMBOL "*" +#define SEPARATOR ";" /// A group who contains points. typedef struct _cluster { @@ -39,6 +41,8 @@ typedef struct _point { typedef struct _kmeans { /// The number of clusters in the universe. int k; + /// The number of dimensions for each data in the universe. + int dimensions; /// A 'k' size array of clusters, who contains the data. struct _cluster** clusters_array; /// An array of points, representing the data in the universe. diff --git a/main.c b/main.c index e66bf46..eacdf12 100644 --- a/main.c +++ b/main.c @@ -7,9 +7,27 @@ int main() { char* path = "./source_data.txt"; + char* output = "./output_data.txt"; kmeans *universe = kmeans_create_empty(); read_data_source(universe, path); + // Custom clustering for testing + for (int i = 0; i < universe->nb_points; i++) { + int c = 0; + if (i % 2 == 0) { + c = 1; + } + else if (i % 3 == 0) { + c = 2; + } + else { + c = 0; + } + universe->points_array[i]->cluster = universe->clusters_array[c]; + } + + write_data_output(universe, output); + destroy_universe(universe); return EXIT_SUCCESS; } diff --git a/output_data.txt b/output_data.txt new file mode 100644 index 0000000..1c8a7f5 --- /dev/null +++ b/output_data.txt @@ -0,0 +1,10 @@ +2 +3 +* +2.30;33.65 +* +1.00;24.00 +3.00;4.00 +-1.00;5.00 +* +5.00;34.00 -- GitLab