Skip to content
Snippets Groups Projects
Commit d5d3eccc authored by Boris Stefanovic's avatar Boris Stefanovic
Browse files

ADD: read vectors from file

parent 27987d1a
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ read: ${PDF} ...@@ -7,7 +7,7 @@ read: ${PDF}
firefox $^ firefox $^
%.pdf: %.md Makefile %.pdf: %.md Makefile
pandoc --pdf-engine=xelatex -o $@ $< pandoc --pdf-engine=lualatex -t beamer -o $@ $<
clean: clean:
rm -rf ${PDF} rm -rf ${PDF}
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
title: K-Means - Une Implémentation title: K-Means - Une Implémentation
author: Boris Stefanovic author: Boris Stefanovic
date: 2022-05-24 date: 2022-05-24
theme: "Frankfurt"
geometry: "margin=40mm" geometry: "margin=40mm"
mainfont: DejaVu Sans mainfont: DejaVu Sans
header-includes: header-includes:
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
typedef int64_t int_t; typedef int64_t int_t;
typedef double fp_t; typedef double fpt_t;
#endif //PROG_KMEANS_COMMON_H #endif //PROG_KMEANS_COMMON_H
...@@ -9,7 +9,17 @@ ...@@ -9,7 +9,17 @@
#define ERROR -1.0 #define ERROR -1.0
double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { int_t abs_diff_int(const int_t a1, const int_t a2) {
int_t diff = a2 - a1;
return diff >= 0 ? diff : -diff;
}
fpt_t abs_diff_fpt(const fpt_t a1, const fpt_t a2) {
fpt_t diff = a2 - a1;
return diff >= 0.0 ? diff : -diff;
}
fpt_t distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) {
if (p1->dim != p2->dim)return ERROR; if (p1->dim != p2->dim)return ERROR;
int_t acc = 0; int_t acc = 0;
for (size_t i = 0; i < p1->dim; ++i) { for (size_t i = 0; i < p1->dim; ++i) {
...@@ -20,12 +30,40 @@ double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) { ...@@ -20,12 +30,40 @@ double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2) {
return sqrt((double) acc); return sqrt((double) acc);
} }
int_t abs_diff(const int_t a1, const int_t a2) { fpt_t distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) {
int_t diff = a2 - a1; if (p1->dim != p2->dim)return ERROR;
return diff >= 0 ? diff : -diff; int_t acc = 0;
for (size_t i = 0; i < p1->dim; ++i) {
int_t diff = p2->data[i] - p1->data[i];
int_t item = diff >= 0 ? diff : -diff;
acc += item;
}
return (double) acc;
}
fpt_t distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2) {
if (p1->dim != p2->dim)return ERROR;
int_t max = ERROR;
int_t item;
for (size_t i = 0; i < p1->dim; ++i) {
item = abs_diff_int(p1->data[i], p2->data[i]);
if (item > max) max = item;
}
return (double) max;
}
fpt_t distance_euclid_fpt(const vector_int_t* p1, const vector_int_t* p2) {
if (p1->dim != p2->dim)return ERROR;
int_t acc = 0;
for (size_t i = 0; i < p1->dim; ++i) {
int_t diff = p2->data[i] - p1->data[i];
int_t item = diff * diff;
acc += item;
}
return sqrt((double) acc);
} }
double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { fpt_t distance_manhattan_fpt(const vector_int_t* p1, const vector_int_t* p2) {
if (p1->dim != p2->dim)return ERROR; if (p1->dim != p2->dim)return ERROR;
int_t acc = 0; int_t acc = 0;
for (size_t i = 0; i < p1->dim; ++i) { for (size_t i = 0; i < p1->dim; ++i) {
...@@ -36,12 +74,12 @@ double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) { ...@@ -36,12 +74,12 @@ double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2) {
return (double) acc; return (double) acc;
} }
double distance_chebyshev_int(const vector_int_t* p1, const vector_int_t* p2) { fpt_t distance_chebyshev_fpt(const vector_int_t* p1, const vector_int_t* p2) {
if (p1->dim != p2->dim)return ERROR; if (p1->dim != p2->dim)return ERROR;
int_t max = ERROR; int_t max = ERROR;
int_t item; int_t item;
for (size_t i = 0; i < p1->dim; ++i) { for (size_t i = 0; i < p1->dim; ++i) {
item = abs_diff(p1->data[i], p2->data[i]); item = abs_diff_int(p1->data[i], p2->data[i]);
if (item > max) max = item; if (item > max) max = item;
} }
return (double) max; return (double) max;
......
...@@ -15,6 +15,10 @@ ...@@ -15,6 +15,10 @@
*/ */
int_t abs_diff_int(int_t a1, int_t a2);
fpt_t abs_diff_fpt(fpt_t a1, fpt_t a2);
double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2); double distance_euclid_int(const vector_int_t* p1, const vector_int_t* p2);
double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2); double distance_manhattan_int(const vector_int_t* p1, const vector_int_t* p2);
......
#define _GNU_SOURCE
#include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "common.h"
#include "vector.h"
void help(const char* callname) {
fprintf(stderr, "\nUSAGE: %s <INPUT_FILE> <OUTPUT_FILE>\n", callname);
}
int_t read_int(FILE* file) {
char* line;
size_t len;
getline(&line, &len, file);
return strtol(line, NULL, 10);
}
bool read_vector_int(FILE* file, vector_int_t* vector) {
// procure line
char* line = NULL;
size_t len = 0;
getline(&line, &len, file);
if (len == 0) return false;
// tokenise
char* toktgt = line;
char* token = NULL;
for (size_t i = 0; i < vector->dim; ++i, toktgt = NULL) {
token = strtok(toktgt, ",");
// strtol returns 0 if number not read; desired behaviour:
vector->data[i] = token != NULL ? strtol(token, NULL, 10) : 0;
}
free(line);
return true;
}
vector_int_t* line_to_vector_int(char* line, const size_t dim) {
vector_int_t* vector = vector_int_create_zero(dim);
char* tgt = line;
char* token = NULL;
for (size_t i = 0; i < vector->dim; ++i, tgt = NULL) {
token = strtok(tgt, ",");
// strtol returns 0 if number not read; desired behaviour:
vector->data[i] = token != NULL ? strtol(token, NULL, 10) : 0;
}
return vector;
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
fprintf(stderr, "USAGE: %s <INPUT_FILE> <OUTPUT_FILE>\n", argv[0]); if (argc <= 1) help(argv[0]);
char* ipath = "/dev/stdin"; char* ipath = NULL;
char* opath = "/dev/stdout"; char* opath = NULL;
if (argc > 1) ipath = argv[1]; if (argc > 1) {
ipath = argv[1];
if (access(ipath, F_OK) == -1) {
fprintf(stderr, "IFILE: [ %s ] file does not exist !", ipath);
return EXIT_FAILURE;
}
}
if (argc > 2) opath = argv[2]; if (argc > 2) opath = argv[2];
// READ
FILE* ifile = ipath != NULL ? fopen(ipath, "r") : stdin;
const size_t dim = read_int(ifile);
const int_t nclusters = read_int(ifile);
if (0 <= dim) {
printf("DIMENSION MUST BE STRICTLY POSITIVE !\n");
return EXIT_FAILURE;
}
if (0 == nclusters) {
printf("NUMBER OF CLUSTERS MUST BE STRICTLY POSITIVE !\n");
return EXIT_FAILURE;
}
char* line = NULL;
size_t len = 0;
while (getline(&line, &len, ifile) != -1) {
vector_int_t* vector = line_to_vector_int(line, dim);
//TODO
free(line);
}
// WRITE
FILE* ofile = opath != NULL ? fopen(opath, "w") : stdout;
// TODO // TODO
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
...@@ -8,8 +8,32 @@ ...@@ -8,8 +8,32 @@
vector_int_t* vector_int_create(const size_t dim, const int_t* data) { vector_int_t* vector_int_create(const size_t dim, const int_t* data) {
vector_int_t* v; vector_int_t* v;
if ((v = malloc(dim * sizeof(int))) == NULL) return NULL; if ((v = calloc(dim, sizeof(int_t))) == NULL) return NULL;
v->dim = dim; v->dim = dim;
for (size_t i = 0; i < dim; ++i) v->data[i] = data[i]; for (size_t i = 0; i < dim; ++i) v->data[i] = data[i];
return v; return v;
} }
vector_fpt_t* vector_fpt_create(const size_t dim, const fpt_t* data) {
vector_fpt_t* v;
if ((v = calloc(dim, sizeof(fpt_t))) == NULL) return NULL;
v->dim = dim;
for (size_t i = 0; i < dim; ++i) v->data[i] = data[i];
return v;
}
vector_int_t* vector_int_create_zero(const size_t dim) {
vector_int_t* v;
if ((v = calloc(dim, sizeof(int_t))) == NULL) return NULL;
v->dim = dim;
for (size_t i = 0; i < dim; ++i) v->data[i] = 0;
return v;
}
vector_fpt_t* vector_fpt_create_zero(const size_t dim) {
vector_fpt_t* v;
if ((v = calloc(dim, sizeof(fpt_t))) == NULL) return NULL;
v->dim = dim;
for (size_t i = 0; i < dim; ++i) v->data[i] = 0.0;
return v;
}
...@@ -19,7 +19,18 @@ typedef struct vector_int_t_ { ...@@ -19,7 +19,18 @@ typedef struct vector_int_t_ {
int_t* data; int_t* data;
} vector_int_t; } vector_int_t;
vector_int_t* vector_int_create(const size_t dim, const int_t* data); typedef struct vector_fpt_t_ {
size_t dim;
fpt_t* data;
} vector_fpt_t;
vector_int_t* vector_int_create(size_t dim, const int_t* data);
vector_fpt_t* vector_fpt_create(size_t dim, const fpt_t* data);
vector_int_t* vector_int_create_zero(size_t dim);
vector_fpt_t* vector_fpt_create_zero(size_t dim);
#endif //PROG_KMEANS_VECTOR_H #endif //PROG_KMEANS_VECTOR_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment