diff --git a/doc/Makefile b/doc/Makefile index 05189ad57601a6ccd7781a3f0ccd282b1d6e7499..acba9dfc7b74658f961f868ffe1f1d4932b91db4 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -7,7 +7,7 @@ read: ${PDF} firefox $^ %.pdf: %.md Makefile - pandoc --pdf-engine=xelatex -o $@ $< + pandoc --pdf-engine=lualatex -t beamer -o $@ $< clean: rm -rf ${PDF} diff --git a/doc/kmeans.md b/doc/kmeans.md index cdd2c5c593ea1ee974ce43d80f4d50fed66588c9..dd06909e5805cd64fcf021f41779b180b98b7edd 100644 --- a/doc/kmeans.md +++ b/doc/kmeans.md @@ -2,6 +2,7 @@ title: K-Means - Une Implémentation author: Boris Stefanovic date: 2022-05-24 +theme: "Frankfurt" geometry: "margin=40mm" mainfont: DejaVu Sans header-includes: diff --git a/src/common.h b/src/common.h index e2fddef821d2ad35b1dcecfe986f04553fb47ec4..5d079229665fe858fc92ea0eebe8d2809a043f7e 100644 --- a/src/common.h +++ b/src/common.h @@ -9,7 +9,7 @@ typedef int64_t int_t; -typedef double fp_t; +typedef double fpt_t; #endif //PROG_KMEANS_COMMON_H diff --git a/src/distance.c b/src/distance.c index 922e24035a305bc6bb7dcbb10aba233f85a16cc0..a854e6cb876ebd132d4302f36441f229b1707fb2 100644 --- a/src/distance.c +++ b/src/distance.c @@ -9,7 +9,17 @@ #define ERROR -1.0 -double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { +int_t abs_diff_int(const int_t a1, const int_t a2) { + int_t diff = a2 - a1; + return diff >= 0 ? diff : -diff; +} + +fpt_t abs_diff_fpt(const fpt_t a1, const fpt_t a2) { + fpt_t diff = a2 - a1; + return diff >= 0.0 ? diff : -diff; +} + +fpt_t distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { if (p1->dim != p2->dim)return ERROR; int_t acc = 0; for (size_t i = 0; i < p1->dim; ++i) { @@ -20,12 +30,40 @@ double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { return sqrt((double) acc); } -int_t abs_diff(const int_t a1, const int_t a2) { - int_t diff = a2 - a1; - return diff >= 0 ? diff : -diff; +fpt_t distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { + if (p1->dim != p2->dim)return ERROR; + int_t acc = 0; + for (size_t i = 0; i < p1->dim; ++i) { + int_t diff = p2->data[i] - p1->data[i]; + int_t item = diff >= 0 ? diff : -diff; + acc += item; + } + return (double) acc; +} + +fpt_t distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2) { + if (p1->dim != p2->dim)return ERROR; + int_t max = ERROR; + int_t item; + for (size_t i = 0; i < p1->dim; ++i) { + item = abs_diff_int(p1->data[i], p2->data[i]); + if (item > max) max = item; + } + return (double) max; +} + +fpt_t distance_euclid_fpt(const vector_int_t* p1, const vector_int_t* p2) { + if (p1->dim != p2->dim)return ERROR; + int_t acc = 0; + for (size_t i = 0; i < p1->dim; ++i) { + int_t diff = p2->data[i] - p1->data[i]; + int_t item = diff * diff; + acc += item; + } + return sqrt((double) acc); } -double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { +fpt_t distance_manhattan_fpt(const vector_int_t* p1, const vector_int_t* p2) { if (p1->dim != p2->dim)return ERROR; int_t acc = 0; for (size_t i = 0; i < p1->dim; ++i) { @@ -36,12 +74,12 @@ double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { return (double) acc; } -double distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2) { +fpt_t distance_chebyshev_fpt(const vector_int_t* p1, const vector_int_t* p2) { if (p1->dim != p2->dim)return ERROR; int_t max = ERROR; int_t item; for (size_t i = 0; i < p1->dim; ++i) { - item = abs_diff(p1->data[i], p2->data[i]); + item = abs_diff_int(p1->data[i], p2->data[i]); if (item > max) max = item; } return (double) max; diff --git a/src/distance.h b/src/distance.h index 47f86eac375cc2d45a7a9b354a2a04b0953b6d3d..a7a2f0ba5aa678d79404ec99730b265d1a57ebfc 100644 --- a/src/distance.h +++ b/src/distance.h @@ -15,6 +15,10 @@ */ +int_t abs_diff_int(int_t a1, int_t a2); + +fpt_t abs_diff_fpt(fpt_t a1, fpt_t a2); + double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2); double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2); diff --git a/src/main.c b/src/main.c index d675d0b0fcedd2d78b43a1030ec15f7d2ce7fb40..b1cb8d16f4eb2e2b57b686380c788051271ffaa6 100644 --- a/src/main.c +++ b/src/main.c @@ -1,13 +1,88 @@ +#define _GNU_SOURCE + +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "common.h" +#include "vector.h" + +void help(const char* callname) { + fprintf(stderr, "\nUSAGE: %s <INPUT_FILE> <OUTPUT_FILE>\n", callname); +} + +int_t read_int(FILE* file) { + char* line; + size_t len; + getline(&line, &len, file); + return strtol(line, NULL, 10); +} + +bool read_vector_int(FILE* file, vector_int_t* vector) { + // procure line + char* line = NULL; + size_t len = 0; + getline(&line, &len, file); + if (len == 0) return false; + // tokenise + char* toktgt = line; + char* token = NULL; + for (size_t i = 0; i < vector->dim; ++i, toktgt = NULL) { + token = strtok(toktgt, ","); + // strtol returns 0 if number not read; desired behaviour: + vector->data[i] = token != NULL ? strtol(token, NULL, 10) : 0; + } + free(line); + return true; +} + +vector_int_t* line_to_vector_int(char* line, const size_t dim) { + vector_int_t* vector = vector_int_create_zero(dim); + char* tgt = line; + char* token = NULL; + for (size_t i = 0; i < vector->dim; ++i, tgt = NULL) { + token = strtok(tgt, ","); + // strtol returns 0 if number not read; desired behaviour: + vector->data[i] = token != NULL ? strtol(token, NULL, 10) : 0; + } + return vector; +} int main(int argc, char** argv) { - fprintf(stderr, "USAGE: %s <INPUT_FILE> <OUTPUT_FILE>\n", argv[0]); - char* ipath = "/dev/stdin"; - char* opath = "/dev/stdout"; - if (argc > 1) ipath = argv[1]; + if (argc <= 1) help(argv[0]); + char* ipath = NULL; + char* opath = NULL; + if (argc > 1) { + ipath = argv[1]; + if (access(ipath, F_OK) == -1) { + fprintf(stderr, "IFILE: [ %s ] file does not exist !", ipath); + return EXIT_FAILURE; + } + } if (argc > 2) opath = argv[2]; + // READ + FILE* ifile = ipath != NULL ? fopen(ipath, "r") : stdin; + const size_t dim = read_int(ifile); + const int_t nclusters = read_int(ifile); + if (0 <= dim) { + printf("DIMENSION MUST BE STRICTLY POSITIVE !\n"); + return EXIT_FAILURE; + } + if (0 == nclusters) { + printf("NUMBER OF CLUSTERS MUST BE STRICTLY POSITIVE !\n"); + return EXIT_FAILURE; + } + char* line = NULL; + size_t len = 0; + while (getline(&line, &len, ifile) != -1) { + vector_int_t* vector = line_to_vector_int(line, dim); + //TODO + free(line); + } + // WRITE + FILE* ofile = opath != NULL ? fopen(opath, "w") : stdout; // TODO return EXIT_SUCCESS; } diff --git a/src/vector.c b/src/vector.c index d30b3ef765c94d6df15aafe2101ce57d1eea55ac..494530f301baa6be8c0a23f642281f6d304c0ac7 100644 --- a/src/vector.c +++ b/src/vector.c @@ -8,8 +8,32 @@ vector_int_t* vector_int_create(const size_t dim, const int_t* data) { vector_int_t* v; - if ((v = malloc(dim * sizeof(int))) == NULL) return NULL; + if ((v = calloc(dim, sizeof(int_t))) == NULL) return NULL; v->dim = dim; for (size_t i = 0; i < dim; ++i) v->data[i] = data[i]; return v; } + +vector_fpt_t* vector_fpt_create(const size_t dim, const fpt_t* data) { + vector_fpt_t* v; + if ((v = calloc(dim, sizeof(fpt_t))) == NULL) return NULL; + v->dim = dim; + for (size_t i = 0; i < dim; ++i) v->data[i] = data[i]; + return v; +} + +vector_int_t* vector_int_create_zero(const size_t dim) { + vector_int_t* v; + if ((v = calloc(dim, sizeof(int_t))) == NULL) return NULL; + v->dim = dim; + for (size_t i = 0; i < dim; ++i) v->data[i] = 0; + return v; +} + +vector_fpt_t* vector_fpt_create_zero(const size_t dim) { + vector_fpt_t* v; + if ((v = calloc(dim, sizeof(fpt_t))) == NULL) return NULL; + v->dim = dim; + for (size_t i = 0; i < dim; ++i) v->data[i] = 0.0; + return v; +} diff --git a/src/vector.h b/src/vector.h index 2cdd4ad0dc2900b8039bceca7cdc6ed11e77b315..5ca1d546972900a088e3c651442f7b3cc4bf306d 100644 --- a/src/vector.h +++ b/src/vector.h @@ -19,7 +19,18 @@ typedef struct vector_int_t_ { int_t* data; } vector_int_t; -vector_int_t* vector_int_create(const size_t dim, const int_t* data); +typedef struct vector_fpt_t_ { + size_t dim; + fpt_t* data; +} vector_fpt_t; + +vector_int_t* vector_int_create(size_t dim, const int_t* data); + +vector_fpt_t* vector_fpt_create(size_t dim, const fpt_t* data); + +vector_int_t* vector_int_create_zero(size_t dim); + +vector_fpt_t* vector_fpt_create_zero(size_t dim); #endif //PROG_KMEANS_VECTOR_H