diff --git a/kmeans.c b/kmeans.c index ee695671bceff3dc71d53e37569612753ad6ee9d..9bb1d3b3ba5edd590865f2c42ee7cfcf4b5267a9 100644 --- a/kmeans.c +++ b/kmeans.c @@ -73,11 +73,12 @@ kmeans* kmeans_create_empty() { return universe; } -kmeans* kmeans_create(int k, point** data, int nb_points) { +kmeans* kmeans_create(int k, int dimensions, point** data, int nb_points) { kmeans* universe = malloc(sizeof(kmeans)); universe->points_array = data; universe->nb_points = nb_points; universe->k = k; + universe->dimensions = dimensions; universe->clusters_array = NULL; initialize_clusters_array(universe); @@ -99,11 +100,35 @@ cluster* cluster_create(point* centroid) { return c; } -void init_from_cmd_arguments(kmeans *universe) { - universe = universe; +point** read_data(char* source_file, int dimensions, int* data_length) { + char *line = NULL; + size_t buffer_size = 0; + ssize_t line_length; + int point_index = 0; + + // Count the number of data (points) + int nb_data = count_file_lines(source_file); + point **data = malloc(sizeof(point) * nb_data); + *data_length = nb_data; + + // Open the file and read it, line by line + FILE *file = open_file(source_file, "r"); + while((line_length = getline(&line, &buffer_size, file)) != -1) { + point* p = create_point_from_string(line, dimensions); + + if (p != NULL) { + data[point_index] = p; + point_index++; + } + } + + // Close the file and free the memory + fclose(file); + free(line); + return data; } -void read_data_source(kmeans* universe, char* source_file) { +void read_custom_data_source(kmeans* universe, char* source_file) { char *line = NULL; size_t buffer_size = 0; ssize_t line_length; diff --git a/kmeans.h b/kmeans.h index 0013411957df1a5b486feb0134dca1c17d0331eb..7947c1583e2c046329fad7679e8790f87374c08b 100644 --- a/kmeans.h +++ b/kmeans.h @@ -11,6 +11,8 @@ #include "files_utils.h" #include "values_utils.h" +#define REQUIRED_FULL_ARGS 4 +#define ONLY_SOURCE_PATH_ARG 2 #define LINE_INDEX_DIMENSIONS 0 #define LINE_INDEX_CLUSTER 1 #define LINE_INDEX_CONTENT 2 @@ -59,10 +61,11 @@ kmeans* kmeans_create_empty(); /// Create the kmeans universe. /// \param k The number of clusters in the universe. +/// \param dimensions The number of dimensions for the data in the universe. /// \param data The array of points /// \param nb_points The number of points in the universe. /// \return The kmeans object of the universe. -kmeans* kmeans_create(int k, point** data, int nb_points); +kmeans* kmeans_create(int k, int dimensions, point** data, int nb_points); /// Create a point. /// \param value The coordinates of the point. @@ -75,15 +78,17 @@ point* point_create(float* value, int dimensions); /// \return The cluster object. cluster* cluster_create(point* centroid); -/// Initialize the universe with the file specified in argument when starting the program. If no arguments has been -/// specified, use the standard input instead. -/// \param universe The universe to initialize. -void init_from_cmd_arguments(kmeans *universe); +/// Load the data (points) from the source_file +/// \param source_file The path to the file to be read. +/// \param dimensions The number of dimensions of each data. +/// \param data_length Save the length of the data +/// \return +point** read_data(char* source_file, int dimensions, int* data_length); /// Load the universe with the data in the file source. /// \param universe The universe that will contains the data. /// \param source_file The path to the file to be read. -void read_data_source(kmeans *universe, char* source_file); +void read_custom_data_source(kmeans *universe, char* source_file); /// Save the universe and its data in the file specified. /// \param universe The universe to save. diff --git a/main.c b/main.c index 30334a0a88f1ab38614a5a79ef992dd2c69f208e..7493511b3ab2ce65d0d4a6c1d47fd02210f85d50 100644 --- a/main.c +++ b/main.c @@ -6,16 +6,78 @@ #include <time.h> #include "kmeans.h" -int main() { +#define REQUIRED_FULL_ARGS 4 +#define ONLY_SOURCE_PATH_ARG 2 +#define FILE_OUTPUT "./output_data.txt" + +kmeans *init_frm_cmd_arguments(int argc, char *argv[]) { + kmeans *universe; + int k; + int dimensions; + char data_path[256]; + int *data_length = malloc(sizeof(int)); + point** data; + + // No arguments, ask the user the settings and data + if (argc < 2) { + printf("Number of clusters (k) : "); + scanf("%d", &k); + printf("Number of dimensions for the data : "); + scanf("%d", &dimensions); + printf("Path to data (comma separator) : "); + scanf("%s", data_path); + } + // Create the universe with only our file + else if (argc == ONLY_SOURCE_PATH_ARG) { + strcpy(data_path, argv[1]); + universe = kmeans_create_empty(); + read_custom_data_source(universe, data_path); + } + // Set the number of dimensions, clusters and path to data + else if (argc == REQUIRED_FULL_ARGS) { + dimensions = atoi(argv[1]); + k = atoi(argv[2]); + strcpy(data_path, argv[3]); + } + // Bad number of arguments + else { + if (argc < REQUIRED_FULL_ARGS) + printf("Missing arguments, "); + if (argc > REQUIRED_FULL_ARGS) + printf("Too much arguments, "); + + // Display instructions + printf("you must start the program without arguments or with one of the following format :\n"); + printf("main <number of dimensions> <number of cluster> <path_to_data_source>\n"); + printf("or\n"); + printf("main <path_to_custom_data_source>\n"); + exit(EXIT_SUCCESS); + } + + + // Create the universe + if (argc != ONLY_SOURCE_PATH_ARG) { + data = read_data(data_path, dimensions, data_length); + universe = kmeans_create(k, dimensions, data, *data_length); + } + + free(data_length); + return universe; +} + +int main(int argc, char *argv[]) { srand(time(NULL)); - char* path = "./source_data.txt"; - char* output = "./output_data.txt"; - kmeans *universe = kmeans_create_empty(); - read_data_source(universe, path); + // Initialize the universe + kmeans *universe = init_frm_cmd_arguments(argc, argv); + + // Start the clustering start_clustering(universe); - write_data_output(universe, output); + // Output the result to a file + write_data_output(universe, FILE_OUTPUT); + + // Free the universe and exit the program destroy_universe(universe); return EXIT_SUCCESS; } diff --git a/source_data.csv b/source_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..122689e3ad7839c06094bba93c44ec0edb585cc0 --- /dev/null +++ b/source_data.csv @@ -0,0 +1,16 @@ +0,0 +1,1 +2,2 +3,3 +4,4 +5,5 +-1,-5 +-2,-4 +-3,-3 +-4,-2 +-5,-1 +-2.25,4.75 +3.1,-4.9 +2.2,4.4 +-1.75,-2.25 +4,2