From 23a7d5fb79c0c7636f47a1ab0758123fed03f856 Mon Sep 17 00:00:00 2001 From: Boris Stefanovic <owldev@bluewin.ch> Date: Tue, 7 Jun 2022 02:23:02 +0200 Subject: [PATCH] ADD: main function structure for int complete --- doc/kmeans.md | 28 +++++++----- src/common.c | 8 +++- src/common.h | 6 +-- src/distance.c | 53 +++++++++++----------- src/distance.h | 22 ++++----- src/io.c | 98 ++++++++++++++++++++++++++++++++++++++++ src/io.h | 30 +++++++++++++ src/kmeans.c | 22 ++++----- src/kmeans.h | 12 ++--- src/linkedlist.c | 114 +++++++++++++++++++++++++++++++++++------------ src/linkedlist.h | 58 ++++++++++++++++++------ src/main.c | 79 ++++++++++++-------------------- src/point.c | 6 +-- src/point.h | 6 --- src/vector.c | 42 +++++++++-------- src/vector.h | 33 +++++++------- 16 files changed, 407 insertions(+), 210 deletions(-) create mode 100644 src/io.c create mode 100644 src/io.h diff --git a/doc/kmeans.md b/doc/kmeans.md index 6ee769f..125ba33 100644 --- a/doc/kmeans.md +++ b/doc/kmeans.md @@ -16,12 +16,16 @@ header-includes: # Structures de Données -## Point +## Vecteur - chaque point est un vecteur - types entiers et virgule flottante séparés - "common.h" contient les définitions de `int_t` et `fpt_t` +We can justify the implementation of vectors for multiple types to ease +application to several scenarios without the need for casting, +e.g. scientific measurements (floating point) and image data (integer). + ```c typedef struct vector_int { size_t dim; @@ -56,11 +60,15 @@ typedef vector_int_t* cluster_int_t; ```c typedef struct point_int { - const vector_int_t* vector; - cluster_int_t* cluster; + vector_int_t* vector; + vector_int_t* cluster; } point_int_t; ``` +On justifie la présence d'une adresse de centre dans la struct par +le fait qu'il y ait une relation qui à chaque centre associe probablement +plusieurs points. + ## Ensemble de Points - parcours répétés de l'ensemble de tous les points @@ -68,16 +76,16 @@ typedef struct point_int { - une liste simplement chaînée fera l'affaire ```c -typedef struct ll_point_int_node { +typedef struct list_points_node_int { const point_int_t* point; - struct ll_point_int_node* next; -} ll_point_int_node_t; + struct list_points_node_int* next; +} list_points_node_int_t; -typedef struct ll_point_int { - ll_point_int_node_t* head; - ll_point_int_node_t* tail; +typedef struct list_points_int { + list_points_node_int_t* head; + list_points_node_int_t* tail; size_t size; -} ll_point_int_t; +} list_points_int_t; ``` # Algorithmique diff --git a/src/common.c b/src/common.c index 2579288..d095459 100644 --- a/src/common.c +++ b/src/common.c @@ -3,18 +3,22 @@ #include <stdlib.h> #include <time.h> + bool randinit = false; + inline void init_rand() { srand(time(NULL)); randinit = true; } -int rand_int(const int max) { + +int rand_int(int max) { if (!randinit) init_rand(); return rand() % max; } + int rand_int_range(int min, int max) { if (min > max) { int swap = min; @@ -24,11 +28,13 @@ int rand_int_range(int min, int max) { return min + rand_int(max - min); } + double rand_double_range_one() { if (!randinit) init_rand(); return ((double) rand()) / ((double) RAND_MAX); } + double rand_double_range(double min, double max) { if (min > max) { double swap = min; diff --git a/src/common.h b/src/common.h index 15787e9..45cd812 100644 --- a/src/common.h +++ b/src/common.h @@ -1,7 +1,3 @@ -// -// by Boris Stefanovic on 24/05/22 -// - #ifndef PROG_KMEANS_COMMON_H #define PROG_KMEANS_COMMON_H @@ -12,7 +8,7 @@ typedef int64_t int_t; typedef double fpt_t; -int rand_int(const int max); +int rand_int(int max); int rand_int_range(int min, int max); diff --git a/src/distance.c b/src/distance.c index 6459803..1655e2f 100644 --- a/src/distance.c +++ b/src/distance.c @@ -19,8 +19,9 @@ fpt_t abs_diff_fpt(const fpt_t a1, const fpt_t a2) { return diff >= 0.0 ? diff : -diff; } + fpt_t distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { - if (p1->dim != p2->dim)return ERROR; + if (p1->dim != p2->dim) return ERROR; int_t acc = 0; for (size_t i = 0; i < p1->dim; ++i) { int_t diff = p2->data[i] - p1->data[i]; @@ -30,41 +31,31 @@ fpt_t distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { return sqrt((fpt_t) acc); } -fpt_t distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { - if (p1->dim != p2->dim)return ERROR; - int_t acc = 0; +fpt_t distance_euclid_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2) { + if (p1->dim != p2->dim) return ERROR; + fpt_t acc = 0; for (size_t i = 0; i < p1->dim; ++i) { - int_t diff = p2->data[i] - p1->data[i]; - int_t item = diff >= 0 ? diff : -diff; + fpt_t diff = p2->data[i] - p1->data[i]; + fpt_t item = diff * diff; acc += item; } - return (fpt_t) acc; + return sqrt((fpt_t) acc); } -fpt_t distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2) { - if (p1->dim != p2->dim)return ERROR; - int_t max = ERROR; - int_t item; - for (size_t i = 0; i < p1->dim; ++i) { - item = abs_diff_int(p1->data[i], p2->data[i]); - if (item > max) max = item; - } - return (fpt_t) max; -} -fpt_t distance_euclid_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2) { - if (p1->dim != p2->dim)return ERROR; - fpt_t acc = 0; +fpt_t distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { + if (p1->dim != p2->dim) return ERROR; + int_t acc = 0; for (size_t i = 0; i < p1->dim; ++i) { - fpt_t diff = p2->data[i] - p1->data[i]; - fpt_t item = diff * diff; + int_t diff = p2->data[i] - p1->data[i]; + int_t item = diff >= 0 ? diff : -diff; acc += item; } - return sqrt((fpt_t) acc); + return (fpt_t) acc; } fpt_t distance_manhattan_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2) { - if (p1->dim != p2->dim)return ERROR; + if (p1->dim != p2->dim) return ERROR; fpt_t acc = 0; for (size_t i = 0; i < p1->dim; ++i) { fpt_t diff = p2->data[i] - p1->data[i]; @@ -74,8 +65,20 @@ fpt_t distance_manhattan_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2) { return (fpt_t) acc; } + +fpt_t distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2) { + if (p1->dim != p2->dim) return ERROR; + int_t max = ERROR; + int_t item; + for (size_t i = 0; i < p1->dim; ++i) { + item = abs_diff_int(p1->data[i], p2->data[i]); + if (item > max) max = item; + } + return (fpt_t) max; +} + fpt_t distance_chebyshev_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2) { - if (p1->dim != p2->dim)return ERROR; + if (p1->dim != p2->dim) return ERROR; fpt_t max = ERROR; fpt_t item; for (size_t i = 0; i < p1->dim; ++i) { diff --git a/src/distance.h b/src/distance.h index a7a2f0b..166cd54 100644 --- a/src/distance.h +++ b/src/distance.h @@ -7,23 +7,25 @@ #include "vector.h" -/* - * Although many distance return types may be left as integers, - * when passing function pointers to the k-means routine, - * a fixed signature will be expected. - * Therefore, we convert all distance return types to double. - */ - int_t abs_diff_int(int_t a1, int_t a2); fpt_t abs_diff_fpt(fpt_t a1, fpt_t a2); -double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2); -double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2); +fpt_t distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2); + +fpt_t distance_euclid_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2); + + +fpt_t distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2); + +fpt_t distance_manhattan_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2); + + +fpt_t distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2); -double distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2); +fpt_t distance_chebyshev_fpt(const vector_fpt_t* p1, const vector_fpt_t* p2); #endif //PROG_KMEANS_DISTANCE_H diff --git a/src/io.c b/src/io.c new file mode 100644 index 0000000..3bd31f2 --- /dev/null +++ b/src/io.c @@ -0,0 +1,98 @@ +#include "io.h" +#include <stdio.h> +#include <string.h> +#include "linkedlist.h" +#include "vector.h" + + +int_t read_int(FILE* file) { + char* line; + size_t len; + getline(&line, &len, file); + return strtol(line, NULL, 10); +} + +fpt_t read_fpt(FILE* file) { + char* line; + size_t len; + getline(&line, &len, file); + return strtod(line, NULL); +} + + +vector_int_t* line_to_vector_int(char* line, const size_t dim) { + vector_int_t* vector = vector_int_create(dim); + char* tgt = line; + char* token = NULL; + for (size_t i = 0; i < vector->dim; ++i, tgt = NULL) { + token = strtok(tgt, ","); + // strtol returns 0 if number not read, which is the desired behaviour: + vector->data[i] = token != NULL ? strtol(token, NULL, 10) : 0; + } + return vector; +} + +vector_fpt_t* line_to_vector_fpt(char* line, const size_t dim) { + vector_fpt_t* vector = vector_fpt_create(dim); + char* tgt = line; + char* token = NULL; + for (size_t i = 0; i < vector->dim; ++i, tgt = NULL) { + token = strtok(tgt, ","); + // strtol returns 0 if number not read, which is the desired behaviour: + vector->data[i] = token != NULL ? strtod(token, NULL) : 0; + } + return vector; +} + + +list_points_int_t* get_vector_list_int(FILE* ifile, const size_t dim) { + list_points_int_t* list = list_points_create_int(); + char* line = NULL; + size_t len = 0; + while (getline(&line, &len, ifile) != -1) { + if (len != 0) { + vector_int_t* vector = line_to_vector_int(line, dim); + list_points_append_int(list, vector); + free(line); + } + } + return list; +} + +list_points_fpt_t* get_vector_list_fpt(FILE* ifile, const size_t dim) { + list_points_fpt_t* list = list_points_create_fpt(); + char* line = NULL; + size_t len = 0; + while (getline(&line, &len, ifile) != -1) { + if (len != 0) { + vector_fpt_t* vector = line_to_vector_fpt(line, dim); + list_points_append_fpt(list, vector); + free(line); + } + } + return list; +} + + +static int _point_compare_clusters_int_(const void* p1, const void* p2) { + const point_int_t* point1 = (const point_int_t*) p1; + const point_int_t* point2 = (const point_int_t*) p2; + return point1->cluster < point2->cluster ? -1 : point1->cluster > point2->cluster ? 1 : 0; +} + + +void io_write_clusters_to_file_int(FILE* file, point_int_t** points, const size_t point_count) { + qsort(points, point_count, sizeof(point_int_t*), _point_compare_clusters_int_); // group points by cluster + vector_int_t* current_cluster = NULL; + point_int_t* current_point = NULL; + for (size_t i = 0; i < point_count; ++i) { + current_point = points[i]; + if (current_point->cluster != current_cluster) { + current_cluster = current_point->cluster; + fprintf(file, "\n*\n"); + } + vector_print_to_file_int(file, current_point->vector); + } +} + +void io_write_clusters_to_file_fpt(FILE* file, point_fpt_t** points, const size_t point_count) {} diff --git a/src/io.h b/src/io.h new file mode 100644 index 0000000..2487aab --- /dev/null +++ b/src/io.h @@ -0,0 +1,30 @@ +#ifndef PROG_KMEANS_IO_H +#define PROG_KMEANS_IO_H + +#include <stdio.h> +#include "common.h" +#include "linkedlist.h" +#include "vector.h" + + +int_t read_int(FILE* file); + +fpt_t read_fpt(FILE* file); + + +vector_int_t* line_to_vector_int(char* line, const size_t dim); + +vector_fpt_t* line_to_vector_fpt(char* line, const size_t dim); + + +list_points_int_t* get_vector_list_int(FILE* ifile, const size_t dim); + +list_points_fpt_t* get_vector_list_fpt(FILE* ifile, const size_t dim); + + +void io_write_clusters_to_file_int(FILE* file, point_int_t** points, const size_t point_count); + +void io_write_clusters_to_file_fpt(FILE* file, point_fpt_t** points, const size_t point_count); + + +#endif //PROG_KMEANS_IO_H diff --git a/src/kmeans.c b/src/kmeans.c index 26f7178..55ddccd 100644 --- a/src/kmeans.c +++ b/src/kmeans.c @@ -1,19 +1,15 @@ -// -// by Boris Stefanovic on 01/06/22 -// - #include "kmeans.h" -#include "cluster.h" +#include "point.h" -cluster_int_t* kmeans_init_clusters_int(const cluster_point_int_t** points, const size_t point_count, const size_t nclusters) { +vector_int_t** kmeans_init_clusters_int(const point_int_t** points, const size_t point_count, const size_t nclusters) { if (nclusters < 2) return NULL; if (NULL == points) return NULL; - cluster_int_t* clusters = calloc(nclusters, sizeof(cluster_int_t)); + vector_int_t** clusters = calloc(nclusters, sizeof(vector_int_t*)); if (NULL == clusters) return NULL; // determine range in which we are working - vector_int_t* min = vector_int_copy(points[0]->vector); - vector_int_t* max = vector_int_copy(points[0]->vector); + vector_int_t* min = vector_copy_int(points[0]->vector); + vector_int_t* max = vector_copy_int(points[0]->vector); for (size_t i = 0; i < point_count; ++i) { for (size_t p = 0; p < max->dim; ++p) { const int_t value = points[i]->vector->data[p]; @@ -23,7 +19,7 @@ cluster_int_t* kmeans_init_clusters_int(const cluster_point_int_t** points, cons } // until we have enough centers for (size_t i = 0; i < nclusters; ++i) { - cluster_int_t center = vector_int_create(max->dim); + vector_int_t* center = vector_create_int(max->dim); for (size_t p = 0; p < center->dim; ++p) { center->data[p] = rand_int_range(min->data[p], max->data[p]); } @@ -35,8 +31,8 @@ cluster_int_t* kmeans_init_clusters_int(const cluster_point_int_t** points, cons void kmeans_int( - cluster_point_int_t** points, const size_t point_count, - cluster_int_t* clusters, const size_t nb_clusters, - fpt_t (* distance_function)(const vector_fpt_t*, const vector_fpt_t*)) { + point_int_t** points, const size_t point_count, + vector_int_t** clusters, const size_t nb_clusters, + fpt_t (* distance_function)(const vector_int_t*, const vector_int_t*)) { //TODO } diff --git a/src/kmeans.h b/src/kmeans.h index 377fc54..c1c5ff7 100644 --- a/src/kmeans.h +++ b/src/kmeans.h @@ -1,20 +1,16 @@ -// -// by Boris Stefanovic on 01/06/22 -// - #ifndef PROG_KMEANS_KMEANS_H #define PROG_KMEANS_KMEANS_H -#include "cluster.h" +#include "point.h" #include "linkedlist.h" -cluster_int_t* kmeans_init_clusters_int(const point_int_t** points, const size_t point_count, const size_t nclusters); +vector_int_t** kmeans_init_clusters_int(const point_int_t** points, const size_t point_count, const size_t nclusters); void kmeans_int( point_int_t** points, const size_t point_count, - cluster_int_t* clusters, const size_t nb_clusters, - fpt_t (* distance_function)(const vector_fpt_t*, const vector_fpt_t*)); + vector_int_t** clusters, const size_t nb_clusters, + fpt_t (* distance_function)(const vector_int_t*, const vector_int_t*)); #endif //PROG_KMEANS_KMEANS_H diff --git a/src/linkedlist.c b/src/linkedlist.c index 1cfd9b1..b9c8f83 100644 --- a/src/linkedlist.c +++ b/src/linkedlist.c @@ -1,17 +1,23 @@ -// -// by Boris Stefanovic on 31/05/22 -// - #include "linkedlist.h" #include <assert.h> #include <stdbool.h> -#include "cluster.h" +#include "point.h" -ll_point_int_node_t* ll_point_int_create_node(vector_int_t* vec) { - ll_point_int_node_t* node = malloc(sizeof(ll_point_int_node_t)); +list_points_node_int_t* list_points_create_node_int(vector_int_t* vec) { + list_points_node_int_t* node = malloc(sizeof(list_points_node_int_t)); if (NULL == node) return NULL; - cluster_point_int_t* point = cluster_point_int_create(vec); + point_int_t* point = point_int_create(vec); + if (NULL == point) return NULL; + node->point = point; + node->next = NULL; + return node; +} + +list_points_node_fpt_t* list_points_create_node_fpt(vector_fpt_t* vec) { + list_points_node_fpt_t* node = malloc(sizeof(list_points_node_fpt_t)); + if (NULL == node) return NULL; + point_fpt_t* point = point_fpt_create(vec); if (NULL == point) return NULL; node->point = point; node->next = NULL; @@ -19,38 +25,64 @@ ll_point_int_node_t* ll_point_int_create_node(vector_int_t* vec) { } -void ll_point_int_destroy_node(ll_point_int_node_t* node, const bool full) { +void list_points_destroy_node_int(list_points_node_int_t* node, const bool full) { if (NULL == node) return; - if (full) cluster_point_int_destroy(node->point); + if (full) point_int_destroy(node->point); free(node); } +void list_points_destroy_node_fpt(list_points_node_fpt_t* node, const bool full) { + if (NULL == node) return; + if (full) point_fpt_destroy(node->point); + free(node); +} -ll_point_int_t* ll_point_int_create() { - ll_point_int_t* ll = NULL; - ll = malloc(sizeof(ll_point_int_t)); - if (NULL == ll) return NULL; - ll->head = NULL; - ll->tail = NULL; - ll->size = 0; - return ll; + +list_points_int_t* list_points_create_int() { + list_points_int_t* list = NULL; + list = malloc(sizeof(list_points_int_t)); + if (NULL == list) return NULL; + list->head = NULL; + list->tail = NULL; + list->size = 0; + return list; +} + +list_points_fpt_t* list_points_create_fpt() { + list_points_fpt_t* list = NULL; + list = malloc(sizeof(list_points_fpt_t)); + if (NULL == list) return NULL; + list->head = NULL; + list->tail = NULL; + list->size = 0; + return list; } -void ll_point_int_destroy(ll_point_int_t* list, const bool full) { +void list_points_destroy_int(list_points_int_t* list, const bool full) { + if (NULL == list) return; + list_points_node_int_t* node; + while ((node = list->head) != NULL) { + list->head = node->next; + list_points_destroy_node_int(node, full); + } + free(list); +} + +void list_points_destroy_fpt(list_points_fpt_t* list, const bool full) { if (NULL == list) return; - ll_point_int_node_t* node; + list_points_node_fpt_t* node; while ((node = list->head) != NULL) { list->head = node->next; - ll_point_int_destroy_node(node, full); + list_points_destroy_node_fpt(node, full); } free(list); } -void ll_point_int_append(ll_point_int_t* list, vector_int_t* vector) { +void list_points_append_int(list_points_int_t* list, vector_int_t* vector) { if (NULL == vector) return; - ll_point_int_node_t* node = ll_point_int_create_node(vector); + list_points_node_int_t* node = list_points_create_node_int(vector); if (NULL == list->head) { // if list is empty list->head = node; list->tail = list->head; @@ -58,14 +90,41 @@ void ll_point_int_append(ll_point_int_t* list, vector_int_t* vector) { list->tail->next = node; list->tail = node; } - list->size++; + ++list->size; } +void list_points_append_fpt(list_points_fpt_t* list, vector_fpt_t* vector) { + if (NULL == vector) return; + list_points_node_fpt_t* node = list_points_create_node_fpt(vector); + if (NULL == list->head) { // if list is empty + list->head = node; + list->tail = list->head; + } else { + list->tail->next = node; + list->tail = node; + } + ++list->size; +} + + +point_int_t** list_points_to_array_int(const list_points_int_t* list) { + point_int_t** a = calloc(list->size, sizeof(point_int_t*)); + if (NULL == a) return NULL; + list_points_node_int_t* cur = list->head; + size_t idx = 0; + while (cur != NULL) { + a[idx] = cur->point; + cur = cur->next; + ++idx; + } + assert(idx == list->size); + return a; +} -cluster_point_int_t** ll_point_int_to_array(const ll_point_int_t* list, size_t* size_ptr) { - cluster_point_int_t** a = calloc(list->size, sizeof(cluster_point_int_t*)); +point_fpt_t** list_points_to_array_fpt(const list_points_fpt_t* list) { + point_fpt_t** a = calloc(list->size, sizeof(point_fpt_t*)); if (NULL == a) return NULL; - ll_point_int_node_t* cur = list->head; + list_points_node_fpt_t* cur = list->head; size_t idx = 0; while (cur != NULL) { a[idx] = cur->point; @@ -73,6 +132,5 @@ cluster_point_int_t** ll_point_int_to_array(const ll_point_int_t* list, size_t* ++idx; } assert(idx == list->size); - if (size_ptr != NULL) *size_ptr = list->size; return a; } diff --git a/src/linkedlist.h b/src/linkedlist.h index fbb3ded..a269942 100644 --- a/src/linkedlist.h +++ b/src/linkedlist.h @@ -6,32 +6,62 @@ #define PROG_KMEANS_LINKEDLIST_H #include <stdbool.h> -#include "cluster.h" +#include "point.h" #include "vector.h" -typedef struct ll_point_int_node { +typedef struct list_points_node_int { point_int_t* point; - struct ll_point_int_node* next; -} ll_point_int_node_t; + struct list_points_node_int* next; +} list_points_node_int_t; -typedef struct ll_point_int { - ll_point_int_node_t* head; - ll_point_int_node_t* tail; +typedef struct list_points_int { + list_points_node_int_t* head; + list_points_node_int_t* tail; size_t size; -} ll_point_int_t; +} list_points_int_t; -ll_point_int_node_t* ll_point_int_create_node(vector_int_t* vec); -void ll_point_int_destroy_node(ll_point_int_node_t* node, const bool full); +typedef struct list_points_node_fpt { + point_fpt_t* point; + struct list_points_node_fpt* next; +} list_points_node_fpt_t; -ll_point_int_t* ll_point_int_create(); +typedef struct list_points_fpt { + list_points_node_fpt_t* head; + list_points_node_fpt_t* tail; + size_t size; +} list_points_fpt_t; + + +list_points_node_int_t* list_points_create_node_int(vector_int_t* vec); + +list_points_node_fpt_t* list_points_create_node_fpt(vector_fpt_t* vec); + + +void list_points_destroy_node_int(list_points_node_int_t* node, const bool full); + +void list_points_destroy_node_fpt(list_points_node_fpt_t* node, const bool full); + + +list_points_int_t* list_points_create_int(); + +list_points_fpt_t* list_points_create_fpt(); + + +void list_points_destroy_int(list_points_int_t* list, const bool full); + +void list_points_destroy_fpt(list_points_fpt_t* list, const bool full); + + +void list_points_append_int(list_points_int_t* list, vector_int_t* vector); + +void list_points_append_fpt(list_points_fpt_t* list, vector_fpt_t* vector); -void ll_point_int_destroy(ll_point_int_t* list, const bool full); -void ll_point_int_append(ll_point_int_t* list, vector_int_t* vector); +point_int_t** list_points_to_array_int(const list_points_int_t* list); -point_int_t** ll_point_int_to_array(const ll_point_int_t* list, size_t* size_ptr); +point_fpt_t** list_points_to_array_fpt(const list_points_fpt_t* list); #endif //PROG_KMEANS_LINKEDLIST_H diff --git a/src/main.c b/src/main.c index b5bef70..b24a099 100644 --- a/src/main.c +++ b/src/main.c @@ -1,10 +1,12 @@ #define _GNU_SOURCE +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -#include "common.h" +#include "distance.h" +#include "io.h" #include "kmeans.h" #include "linkedlist.h" #include "vector.h" @@ -15,77 +17,54 @@ void help(const char* callname) { } -int_t read_int(FILE* file) { - char* line; - size_t len; - getline(&line, &len, file); - return strtol(line, NULL, 10); -} - - -vector_int_t* line_to_vector_int(char* line, const size_t dim) { - vector_int_t* vector = vector_int_create(dim); - char* tgt = line; - char* token = NULL; - for (size_t i = 0; i < vector->dim; ++i, tgt = NULL) { - token = strtok(tgt, ","); - // strtol returns 0 if number not read, which is the desired behaviour: - vector->data[i] = token != NULL ? strtol(token, NULL, 10) : 0; +bool init(int argc, char** argv, char** ipath, char** opath) { + if (argc <= 1) { + help(argv[0]); + return false; } - return vector; -} - - -ll_point_int_t* get_vector_list_int(FILE* ifile, const size_t dim) { - ll_point_int_t* list = ll_point_int_create(); - char* line = NULL; - size_t len = 0; - while (getline(&line, &len, ifile) != -1) { - if (len != 0) { - vector_int_t* vector = line_to_vector_int(line, dim); - ll_point_int_append(list, vector); - free(line); + if (argc > 1) { + *ipath = argv[1]; + if (access(*ipath, F_OK) == -1) { + fprintf(stderr, "IFILE: [ %s ] file does not exist !", *ipath); + return false; } } - return list; + if (argc > 2) *opath = argv[2]; } int main(int argc, char** argv) { - if (argc <= 1) help(argv[0]); + // INIT char* ipath = NULL; char* opath = NULL; - if (argc > 1) { - ipath = argv[1]; - if (access(ipath, F_OK) == -1) { - fprintf(stderr, "IFILE: [ %s ] file does not exist !", ipath); - return EXIT_FAILURE; - } - } - if (argc > 2) opath = argv[2]; + if (!init(argc, argv, &ipath, &opath)) + return EXIT_FAILURE; // READ FILE* ifile = ipath != NULL ? fopen(ipath, "r") : stdin; const size_t dim = read_int(ifile); - const size_t cluster_count = read_int(ifile); // k + const size_t nb_clusters = read_int(ifile); if (0 <= dim) { printf("DIMENSION MUST BE STRICTLY POSITIVE !\n"); return EXIT_FAILURE; } - if (0 <= cluster_count) { + if (0 <= nb_clusters) { printf("NUMBER OF CLUSTERS MUST BE STRICTLY POSITIVE !\n"); return EXIT_FAILURE; } - ll_point_int_t* list = get_vector_list_int(ifile, dim); - size_t count; - const point_int_t** points = ll_point_int_to_array(list, &count); - ll_point_int_destroy(list, false); + list_points_int_t* list = get_vector_list_int(ifile, dim); + fclose(ifile); + ifile = NULL; + const size_t point_count = list->size; + point_int_t** points = list_points_to_array_int(list); + list_points_destroy_int(list, false); list = NULL; // ALGORITHM - // TODO - // init clusters - cluster_int_t* clusters = kmeans_init_clusters_int(points, count, cluster_count); + vector_int_t** clusters = kmeans_init_clusters_int((const point_int_t**) points, point_count, nb_clusters); + kmeans_int(points, point_count, clusters, nb_clusters, distance_euclid_int); //TODO: choose dist func with command line // WRITE FILE* ofile = opath != NULL ? fopen(opath, "w") : stdout; - // TODO + fprintf(ofile, "%lud\n%lud\n", dim, nb_clusters); + io_write_clusters_to_file_int(ofile, points, point_count); + fclose(ofile); return EXIT_SUCCESS; } diff --git a/src/point.c b/src/point.c index 9898f8b..cb2e30f 100644 --- a/src/point.c +++ b/src/point.c @@ -1,8 +1,4 @@ -// -// by Boris Stefanovic on 01/06/22 -// - -#include "cluster.h" +#include "point.h" #include <stdlib.h> #include "vector.h" diff --git a/src/point.h b/src/point.h index f329132..58694e5 100644 --- a/src/point.h +++ b/src/point.h @@ -1,9 +1,3 @@ -// -// by Boris Stefanovic on 01/06/22 -// - -// cluster id inside point struct is justified by "many-to-one" relationship and several passes over all points - #ifndef PROG_KMEANS_CLUSTER_H #define PROG_KMEANS_CLUSTER_H diff --git a/src/vector.c b/src/vector.c index 085a10a..f72d466 100644 --- a/src/vector.c +++ b/src/vector.c @@ -1,13 +1,10 @@ -// -// Created by Boris Stefanovic on 24/05/22. -// - #include "vector.h" #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> -vector_int_t* vector_int_create(const size_t dim) { +vector_int_t* vector_create_int(const size_t dim) { vector_int_t* v; if ((v = malloc(sizeof(vector_int_t))) == NULL) return NULL; v->dim = dim; @@ -15,8 +12,7 @@ vector_int_t* vector_int_create(const size_t dim) { return v; } - -vector_fpt_t* vector_fpt_create(const size_t dim) { +vector_fpt_t* vector_create_fpt(const size_t dim) { vector_fpt_t* v; if ((v = malloc(sizeof(vector_fpt_t))) == NULL) return NULL; v->dim = dim; @@ -25,23 +21,22 @@ vector_fpt_t* vector_fpt_create(const size_t dim) { } -void vector_int_destroy(vector_int_t* vp) { +void vector_destroy_int(vector_int_t* vp) { if (NULL == vp) return; free(vp->data); free(vp); } - -void vector_fpt_destroy(vector_fpt_t* vp) { +void vector_destroy_fpt(vector_fpt_t* vp) { if (NULL == vp) return; free(vp->data); free(vp); } -vector_int_t* vector_int_copy(const vector_int_t* v) { +vector_int_t* vector_copy_int(const vector_int_t* v) { if (NULL == v) return NULL; - vector_int_t* c = vector_int_create(v->dim); + vector_int_t* c = vector_create_int(v->dim); if (NULL == c) return NULL; for (size_t i = 0; i < v->dim; ++i) { c->data[i] = v->data[i]; @@ -49,10 +44,9 @@ vector_int_t* vector_int_copy(const vector_int_t* v) { return c; } - -vector_fpt_t* vector_fpt_copy(const vector_fpt_t* v) { +vector_fpt_t* vector_copy_fpt(const vector_fpt_t* v) { if (NULL == v) return NULL; - vector_fpt_t* c = vector_fpt_create(v->dim); + vector_fpt_t* c = vector_create_fpt(v->dim); if (NULL == c) return NULL; for (size_t i = 0; i < v->dim; ++i) { c->data[i] = v->data[i]; @@ -61,7 +55,7 @@ vector_fpt_t* vector_fpt_copy(const vector_fpt_t* v) { } -bool vector_int_equals(const vector_int_t* v1, const vector_int_t* v2) { +bool vector_equals_int(const vector_int_t* v1, const vector_int_t* v2) { if (v1->dim != v2->dim) return false; for (size_t i = 0; i < v1->dim; ++i) { if (v1->data[i] != v2->data[i]) { @@ -71,8 +65,7 @@ bool vector_int_equals(const vector_int_t* v1, const vector_int_t* v2) { return true; } - -bool vector_fpt_equals(const vector_fpt_t* v1, const vector_fpt_t* v2) { +bool vector_equals_fpt(const vector_fpt_t* v1, const vector_fpt_t* v2) { if (v1->dim != v2->dim) return false; for (size_t i = 0; i < v1->dim; ++i) { if (v1->data[i] != v2->data[i]) { @@ -81,3 +74,16 @@ bool vector_fpt_equals(const vector_fpt_t* v1, const vector_fpt_t* v2) { } return true; } + + +void vector_print_to_file_int(FILE* file, const vector_int_t* v) { + fprintf(file, "%lud", v->data[0]); + for (size_t i = 1; i < v->dim; ++i) fprintf(file, " , %lud", v->data[i]); + fprintf(file, "\n"); +} + +void vector_print_to_file_fpt(FILE* file, const vector_fpt_t* v) { + fprintf(file, "%lf", v->data[0]); + for (size_t i = 1; i < v->dim; ++i) fprintf(file, " , %lf", v->data[i]); + fprintf(file, "\n"); +} diff --git a/src/vector.h b/src/vector.h index 2d2db1a..8291ac9 100644 --- a/src/vector.h +++ b/src/vector.h @@ -1,16 +1,8 @@ -// -// Created by Boris Stefanovic on 24/05/22. -// #ifndef PROG_KMEANS_VECTOR_H #define PROG_KMEANS_VECTOR_H -/* - * We can justify the implementation of vectors for multiple types to ease - * application to several scenarios without the need for casting, - * e.g. scientific measurements (floating point) and image data (integer). - */ - #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> #include "common.h" @@ -27,22 +19,29 @@ typedef struct vector_fpt { } vector_fpt_t; -vector_int_t* vector_int_create(const size_t dim); +vector_int_t* vector_create_int(const size_t dim); + +vector_fpt_t* vector_create_fpt(const size_t dim); + + +void vector_destroy_int(vector_int_t* vp); + +void vector_destroy_fpt(vector_fpt_t* vp); + -void vector_int_destroy(vector_int_t* vp); +vector_int_t* vector_copy_int(const vector_int_t* v); -vector_int_t* vector_int_copy(const vector_int_t* v); +vector_fpt_t* vector_copy_fpt(const vector_fpt_t* v); -bool vector_int_equals(const vector_int_t* v1, const vector_int_t* v2); +bool vector_equals_int(const vector_int_t* v1, const vector_int_t* v2); -vector_fpt_t* vector_fpt_create(const size_t dim); +bool vector_equals_fpt(const vector_fpt_t* v1, const vector_fpt_t* v2); -void vector_fpt_destroy(vector_fpt_t* vp); -vector_fpt_t* vector_fpt_copy(const vector_fpt_t* v); +void vector_print_to_file_int(FILE* file, const vector_int_t* v); -bool vector_fpt_equals(const vector_fpt_t* v1, const vector_fpt_t* v2); +void vector_print_to_file_fpt(FILE* file, const vector_fpt_t* v); #endif //PROG_KMEANS_VECTOR_H -- GitLab