From b0da2dcda6f975e77d92340bf3b406d8e47a2e25 Mon Sep 17 00:00:00 2001
From: "dario.genga" <dario.genga@etu.hesge.ch>
Date: Tue, 7 Jun 2022 15:11:19 +0200
Subject: [PATCH] Add export of data from universe

Added a 'dimensions' property for the universe in order to know the
size of a data. This is required for saving each dimension value of a
point.

Added the signature of the 'remove_string_trailing_newline' method in
the 'files_utils.h' file.

Updated the 'open_file' method by adding a mode parameter.

Renamed some variables for more clarity.
---
 files_utils.c   | 19 ++++++++-----------
 files_utils.h   |  7 ++++++-
 kmeans.c        | 43 ++++++++++++++++++++++++++++++++++++++-----
 kmeans.h        |  4 ++++
 main.c          | 18 ++++++++++++++++++
 output_data.txt | 10 ++++++++++
 6 files changed, 84 insertions(+), 17 deletions(-)
 create mode 100644 output_data.txt

diff --git a/files_utils.c b/files_utils.c
index 9a8738f..567e56b 100644
--- a/files_utils.c
+++ b/files_utils.c
@@ -4,38 +4,35 @@
 #include "files_utils.h"
 
 
-FILE* open_file(char *file_path) {
+FILE* open_file(char *file_path, char *mode) {
     // Open the file
-    FILE *fp = fopen(file_path, "r");
-    if (fp == NULL)
+    FILE *file = fopen(file_path, mode);
+    if (file == NULL)
     {
         perror("Error while opening the file.\n");
         exit(EXIT_FAILURE);
     }
-    return fp;
+    return file;
 }
 
 int count_file_lines(char *file_path) {
-    FILE *fp = open_file(file_path);
+    FILE *file = open_file(file_path, "r");
     int current_line = 0;
     char *line = NULL;
-    size_t len = 0;
+    size_t buffer_size = 0;
 
-    while(getline(&line, &len, fp) != -1) {
+    while(getline(&line, &buffer_size, file) != -1) {
         // Count the lines that are not empty
         if (strcmp(line, "\n") != 0) {
             current_line++;
         }
     }
 
-    fclose(fp);
+    fclose(file);
     free(line);
     return current_line;
 }
 
-/// Remove the newline character in the string. The char must be at the end of the line.
-/// \param str The string to modify.
-/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/
 void remove_string_trailing_newline(char *str) {
     if (str == NULL)
         return;
diff --git a/files_utils.h b/files_utils.h
index 3d78c57..45ffb24 100644
--- a/files_utils.h
+++ b/files_utils.h
@@ -12,11 +12,16 @@
 /// Open a file.
 /// \param file_path The path to the file.
 /// \return The pointer of the FILE object.
-FILE *open_file(char *file_path);
+FILE *open_file(char *file_path, char *mode);
 
 /// Count the number of lines in the file.
 /// \param file_path The path to the file.
 /// \return The number of line in the file.
 int count_file_lines(char *file_path);
 
+/// Remove the newline character in the string. The char must be at the end of the line.
+/// \param str The string to modify.
+/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/
+void remove_string_trailing_newline(char *str);
+
 #endif
diff --git a/kmeans.c b/kmeans.c
index fcf9aa7..3d2547f 100644
--- a/kmeans.c
+++ b/kmeans.c
@@ -8,6 +8,7 @@ kmeans* kmeans_create_empty() {
     universe->points_array = NULL;
     universe->nb_points = 0;
     universe->k = 0;
+    universe->dimensions = 0;
     universe->clusters_array = NULL;
 
     return universe;
@@ -101,26 +102,27 @@ void init_from_cmd_arguments(kmeans *universe) {
 
 void read_data_source(kmeans* universe, char* source_file) {
     char *line = NULL;
-    size_t len = 0;
+    size_t buffer_size = 0;
     ssize_t line_length;
     int current_line = 0;
     int dimensions = 0;
     int point_index = 0;
 
-    // Count the number of points
+    // Count the number of data (points)
     int nb_data = count_file_lines(source_file) - 2; // The two first line are not for the data
     universe->nb_points = nb_data;
     initialize_points_array(universe);
 
     // Open the file and read it, line by line
-    FILE *fp = open_file(source_file);
-    while((line_length = getline(&line, &len, fp)) != -1) {
+    FILE *file = open_file(source_file, "r");
+    while((line_length = getline(&line, &buffer_size, file)) != -1) {
         // Remove newline char at end of line
         remove_string_trailing_newline(line);
 
         // Get the number of dimensions for each data
         if (current_line == LINE_INDEX_DIMENSIONS) {
             dimensions = (int)strtol(line, NULL, 10);
+            universe->dimensions = dimensions;
         }
         // Get the number of cluster
         if (current_line == LINE_INDEX_CLUSTER) {
@@ -140,12 +142,43 @@ void read_data_source(kmeans* universe, char* source_file) {
         current_line++;
     }
 
-    fclose(fp);
+    // Close the file and free the memory
+    fclose(file);
     free(line);
 }
 
 void write_data_output(kmeans *universe, char* output_file) {
+    // Open the file
+    FILE *file = open_file(output_file, "w");
 
+    // Write the number of dimensions
+    fprintf(file, "%d\n", universe->dimensions);
+    // Write the number of clusters
+    fprintf(file, "%d\n", universe->k);
+
+    // Start writing the data of each cluster
+    for (int i = 0; i < universe->k; i++) {
+        // Prints the cluster symbol
+        fprintf(file, CLUSTER_SYMBOL);
+        fprintf(file, "\n");
+
+        for (int j = 0; j < universe->nb_points; j++) {
+            // Prints the point of the cluster
+            if (universe->points_array[j]->cluster == universe->clusters_array[i]) {
+                for (int d = 0; d < universe->dimensions; d++) {
+                    fprintf(file, "%.2f", universe->points_array[j]->value[d]);
+                    if (d < universe->dimensions - 1) {
+                        fprintf(file, SEPARATOR);
+                    } else {
+                        fprintf(file, "\n");
+                    }
+                }
+            }
+        }
+    }
+
+    // Close and save the file
+    fclose(file);
 }
 
 void init_clusters(kmeans *universe) {
diff --git a/kmeans.h b/kmeans.h
index f415495..69b86f9 100644
--- a/kmeans.h
+++ b/kmeans.h
@@ -14,6 +14,8 @@
 #define LINE_INDEX_DIMENSIONS 0
 #define LINE_INDEX_CLUSTER 1
 #define LINE_INDEX_CONTENT 2
+#define CLUSTER_SYMBOL "*"
+#define SEPARATOR ";"
 
 /// A group who contains points.
 typedef struct _cluster {
@@ -39,6 +41,8 @@ typedef struct _point {
 typedef struct _kmeans {
     /// The number of clusters in the universe.
     int k;
+    /// The number of dimensions for each data in the universe.
+    int dimensions;
     /// A 'k' size array of clusters, who contains the data.
     struct _cluster** clusters_array;
     /// An array of points, representing the data in the universe.
diff --git a/main.c b/main.c
index e66bf46..eacdf12 100644
--- a/main.c
+++ b/main.c
@@ -7,9 +7,27 @@
 
 int main() {
     char* path = "./source_data.txt";
+    char* output = "./output_data.txt";
     kmeans *universe = kmeans_create_empty();
     read_data_source(universe, path);
 
+    // Custom clustering for testing
+    for (int i = 0; i < universe->nb_points; i++) {
+        int c = 0;
+        if (i % 2 == 0) {
+            c = 1;
+        }
+        else if (i % 3 == 0) {
+            c = 2;
+        }
+        else {
+            c = 0;
+        }
+        universe->points_array[i]->cluster = universe->clusters_array[c];
+    }
+
+    write_data_output(universe, output);
+
     destroy_universe(universe);
     return EXIT_SUCCESS;
 }
diff --git a/output_data.txt b/output_data.txt
new file mode 100644
index 0000000..1c8a7f5
--- /dev/null
+++ b/output_data.txt
@@ -0,0 +1,10 @@
+2
+3
+*
+2.30;33.65
+*
+1.00;24.00
+3.00;4.00
+-1.00;5.00
+*
+5.00;34.00
-- 
GitLab