diff --git a/files_utils.c b/files_utils.c index 9a8738f8f3439c74f711d7fa0f6cf59e4187c0e3..567e56b2780cbac353f9f4fc7b861f79cc227980 100644 --- a/files_utils.c +++ b/files_utils.c @@ -4,38 +4,35 @@ #include "files_utils.h" -FILE* open_file(char *file_path) { +FILE* open_file(char *file_path, char *mode) { // Open the file - FILE *fp = fopen(file_path, "r"); - if (fp == NULL) + FILE *file = fopen(file_path, mode); + if (file == NULL) { perror("Error while opening the file.\n"); exit(EXIT_FAILURE); } - return fp; + return file; } int count_file_lines(char *file_path) { - FILE *fp = open_file(file_path); + FILE *file = open_file(file_path, "r"); int current_line = 0; char *line = NULL; - size_t len = 0; + size_t buffer_size = 0; - while(getline(&line, &len, fp) != -1) { + while(getline(&line, &buffer_size, file) != -1) { // Count the lines that are not empty if (strcmp(line, "\n") != 0) { current_line++; } } - fclose(fp); + fclose(file); free(line); return current_line; } -/// Remove the newline character in the string. The char must be at the end of the line. -/// \param str The string to modify. -/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/ void remove_string_trailing_newline(char *str) { if (str == NULL) return; diff --git a/files_utils.h b/files_utils.h index 3d78c5704291e3216de1bcaec9d6f0b1cfc130e4..45ffb24f302a554394c5a4cc1ddfff3c8adb13cb 100644 --- a/files_utils.h +++ b/files_utils.h @@ -12,11 +12,16 @@ /// Open a file. /// \param file_path The path to the file. /// \return The pointer of the FILE object. -FILE *open_file(char *file_path); +FILE *open_file(char *file_path, char *mode); /// Count the number of lines in the file. /// \param file_path The path to the file. /// \return The number of line in the file. int count_file_lines(char *file_path); +/// Remove the newline character in the string. The char must be at the end of the line. +/// \param str The string to modify. +/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/ +void remove_string_trailing_newline(char *str); + #endif diff --git a/kmeans.c b/kmeans.c index fcf9aa76b66dffdb92347c7b7f9957b7e94a0371..3d2547fe62b57976904d3228a04424e9deb6ce23 100644 --- a/kmeans.c +++ b/kmeans.c @@ -8,6 +8,7 @@ kmeans* kmeans_create_empty() { universe->points_array = NULL; universe->nb_points = 0; universe->k = 0; + universe->dimensions = 0; universe->clusters_array = NULL; return universe; @@ -101,26 +102,27 @@ void init_from_cmd_arguments(kmeans *universe) { void read_data_source(kmeans* universe, char* source_file) { char *line = NULL; - size_t len = 0; + size_t buffer_size = 0; ssize_t line_length; int current_line = 0; int dimensions = 0; int point_index = 0; - // Count the number of points + // Count the number of data (points) int nb_data = count_file_lines(source_file) - 2; // The two first line are not for the data universe->nb_points = nb_data; initialize_points_array(universe); // Open the file and read it, line by line - FILE *fp = open_file(source_file); - while((line_length = getline(&line, &len, fp)) != -1) { + FILE *file = open_file(source_file, "r"); + while((line_length = getline(&line, &buffer_size, file)) != -1) { // Remove newline char at end of line remove_string_trailing_newline(line); // Get the number of dimensions for each data if (current_line == LINE_INDEX_DIMENSIONS) { dimensions = (int)strtol(line, NULL, 10); + universe->dimensions = dimensions; } // Get the number of cluster if (current_line == LINE_INDEX_CLUSTER) { @@ -140,12 +142,43 @@ void read_data_source(kmeans* universe, char* source_file) { current_line++; } - fclose(fp); + // Close the file and free the memory + fclose(file); free(line); } void write_data_output(kmeans *universe, char* output_file) { + // Open the file + FILE *file = open_file(output_file, "w"); + // Write the number of dimensions + fprintf(file, "%d\n", universe->dimensions); + // Write the number of clusters + fprintf(file, "%d\n", universe->k); + + // Start writing the data of each cluster + for (int i = 0; i < universe->k; i++) { + // Prints the cluster symbol + fprintf(file, CLUSTER_SYMBOL); + fprintf(file, "\n"); + + for (int j = 0; j < universe->nb_points; j++) { + // Prints the point of the cluster + if (universe->points_array[j]->cluster == universe->clusters_array[i]) { + for (int d = 0; d < universe->dimensions; d++) { + fprintf(file, "%.2f", universe->points_array[j]->value[d]); + if (d < universe->dimensions - 1) { + fprintf(file, SEPARATOR); + } else { + fprintf(file, "\n"); + } + } + } + } + } + + // Close and save the file + fclose(file); } void init_clusters(kmeans *universe) { diff --git a/kmeans.h b/kmeans.h index f41549582190bc3946125913c78162b3f93faffc..69b86f9cdb1c45d76142c826a3cfd824664c7dad 100644 --- a/kmeans.h +++ b/kmeans.h @@ -14,6 +14,8 @@ #define LINE_INDEX_DIMENSIONS 0 #define LINE_INDEX_CLUSTER 1 #define LINE_INDEX_CONTENT 2 +#define CLUSTER_SYMBOL "*" +#define SEPARATOR ";" /// A group who contains points. typedef struct _cluster { @@ -39,6 +41,8 @@ typedef struct _point { typedef struct _kmeans { /// The number of clusters in the universe. int k; + /// The number of dimensions for each data in the universe. + int dimensions; /// A 'k' size array of clusters, who contains the data. struct _cluster** clusters_array; /// An array of points, representing the data in the universe. diff --git a/main.c b/main.c index e66bf4624231c5f4965d2f46e8729ce3b4d667f7..eacdf12e16f1a0565aaf0a70541c4236a4800a09 100644 --- a/main.c +++ b/main.c @@ -7,9 +7,27 @@ int main() { char* path = "./source_data.txt"; + char* output = "./output_data.txt"; kmeans *universe = kmeans_create_empty(); read_data_source(universe, path); + // Custom clustering for testing + for (int i = 0; i < universe->nb_points; i++) { + int c = 0; + if (i % 2 == 0) { + c = 1; + } + else if (i % 3 == 0) { + c = 2; + } + else { + c = 0; + } + universe->points_array[i]->cluster = universe->clusters_array[c]; + } + + write_data_output(universe, output); + destroy_universe(universe); return EXIT_SUCCESS; } diff --git a/output_data.txt b/output_data.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c8a7f531c01f116fc51362eed89915b0c9cbb4c --- /dev/null +++ b/output_data.txt @@ -0,0 +1,10 @@ +2 +3 +* +2.30;33.65 +* +1.00;24.00 +3.00;4.00 +-1.00;5.00 +* +5.00;34.00