Skip to content
Snippets Groups Projects
Commit c4ba6478 authored by dario.genga's avatar dario.genga
Browse files

Add import of data from file

The universe can now be created from data in a text file.

Added the files_utils who contains methods to open a file, count the
number of lines and remove trailing newline.

Added methods to create an empty kmeans universe and to initialize
empty clusters array and points array.

Added a method who create a point from a line in the text file.

Fixed a memory leak caused by non freed point value.
parent 9f9276e5
No related branches found
No related tags found
No related merge requests found
// Project : K-means
// Author : Dario GENGA
#include "files_utils.h"
FILE* open_file(char *file_path) {
// Open the file
FILE *fp = fopen(file_path, "r");
if (fp == NULL)
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
return fp;
}
int count_file_lines(char *file_path) {
FILE *fp = open_file(file_path);
int current_line = 0;
char *line = NULL;
size_t len = 0;
while(getline(&line, &len, fp) != -1) {
// Count the lines that are not empty
if (strcmp(line, "\n") != 0) {
current_line++;
}
}
fclose(fp);
free(line);
return current_line;
}
/// Remove the newline character in the string. The char must be at the end of the line.
/// \param str The string to modify.
/// \see https://siongui.github.io/2013/01/09/c-remove-string-trailing-newline-carriage-return/
void remove_string_trailing_newline(char *str) {
if (str == NULL)
return;
int length = strlen(str);
if (str[length-1] == '\n')
str[length-1] = '\0';
}
// Project : K-means
// Author : Dario GENGA
#ifndef _FILES_UTILS_H
#define _FILES_UTILS_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/// Open a file.
/// \param file_path The path to the file.
/// \return The pointer of the FILE object.
FILE *open_file(char *file_path);
/// Count the number of lines in the file.
/// \param file_path The path to the file.
/// \return The number of line in the file.
int count_file_lines(char *file_path);
#endif
......@@ -3,16 +3,80 @@
#include "kmeans.h"
kmeans* kmeans_create_empty() {
kmeans* universe = malloc(sizeof(kmeans));
universe->points_array = NULL;
universe->nb_points = 0;
universe->k = 0;
universe->clusters_array = NULL;
return universe;
}
/// Create empty clusters for the universe
/// \param universe The universe who contains the cluster to create.
void initialize_clusters_array(kmeans* universe) {
universe->clusters_array = malloc(sizeof(cluster) * universe->k);
for (int i = 0; i < universe->k; i++) {
universe->clusters_array[i] = cluster_create(NULL);
}
}
/// Initialize the array of points in the universe.
/// \param universe The universe who contains the points.
void initialize_points_array(kmeans* universe) {
universe->points_array = malloc(sizeof(point) * universe->nb_points);
for (int i = 0; i < universe->nb_points; i++) {
universe->points_array[i] = NULL;
}
}
/// Create a point from the values in a string.
/// \param line The string who contains the data of the point.
/// \param dimensions The number of dimension in the point.
/// \return The point created from the string.
point* create_point_from_string(char *line, int dimensions) {
// Skip the parsing if the line doesn't contain data
if (strcmp(line, "\n") == 0) {
return NULL;
}
int i = 0;
point* p;
char *token;
const char separator[2] = ",";
double* data = malloc(sizeof(double) * dimensions);
// Parse the line
token = strtok(line, separator);
while(token != NULL) {
// Convert the string value to double, then save it
double value = atof(token);
data[i] = value;
// Get the next value
token = strtok(NULL, separator);
i++;
}
// Verify the dimensions of the point
if (i != dimensions) {
printf("Bad dimensions for the point.\n");
exit(EXIT_FAILURE);
}
// Create the point and return it
p = point_create(data);
return p;
}
kmeans* kmeans_create(int k, point** data, int nb_points) {
kmeans* universe = malloc(sizeof(kmeans));
universe->points_array = data;
universe->nb_points = nb_points;
universe->k = k;
universe->clusters_array = malloc(sizeof(cluster) * k);
for (int i = 0; i < k; i++) {
universe->clusters_array[i] = cluster_create(NULL);
}
universe->clusters_array = NULL;
initialize_clusters_array(universe);
return universe;
}
......@@ -35,8 +99,49 @@ void init_from_cmd_arguments(kmeans *universe) {
}
void read_data_source(kmeans *universe, char* source_file) {
void read_data_source(kmeans* universe, char* source_file) {
char *line = NULL;
size_t len = 0;
ssize_t line_length;
int current_line = 0;
int dimensions = 0;
int point_index = 0;
// Count the number of points
int nb_data = count_file_lines(source_file) - 2; // The two first line are not for the data
universe->nb_points = nb_data;
initialize_points_array(universe);
// Open the file and read it, line by line
FILE *fp = open_file(source_file);
while((line_length = getline(&line, &len, fp)) != -1) {
// Remove newline char at end of line
remove_string_trailing_newline(line);
// Get the number of dimensions for each data
if (current_line == LINE_INDEX_DIMENSIONS) {
dimensions = (int)strtol(line, NULL, 10);
}
// Get the number of cluster
if (current_line == LINE_INDEX_CLUSTER) {
universe->k = (int)strtol(line, NULL, 10);
initialize_clusters_array(universe);
}
// Retrieve the data
if (current_line >= LINE_INDEX_CONTENT) {
point* p = create_point_from_string(line, dimensions);
if (p != NULL) {
universe->points_array[point_index] = p;
point_index++;
}
}
current_line++;
}
fclose(fp);
free(line);
}
void write_data_output(kmeans *universe, char* output_file) {
......@@ -68,6 +173,10 @@ void destroy_point(point* p) {
if (p->label != NULL)
free(p->label);
if (p->value != NULL) {
free(p->value);
}
free(p);
}
}
......@@ -81,11 +190,15 @@ void destroy_cluster(cluster* clstr) {
void destroy_universe(kmeans* kmeans) {
if (kmeans != NULL) {
for (int i = 0; i < kmeans->nb_points; i++) {
destroy_point(kmeans->points_array[i]);
if (kmeans->points_array != NULL) {
for (int i = 0; i < kmeans->nb_points; i++) {
destroy_point(kmeans->points_array[i]);
}
}
for (int i = 0; i < kmeans->k; i++) {
destroy_cluster(kmeans->clusters_array[i]);
if (kmeans->clusters_array != NULL) {
for (int i = 0; i < kmeans->k; i++) {
destroy_cluster(kmeans->clusters_array[i]);
}
}
free(kmeans->points_array);
......
......@@ -7,7 +7,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include "files_utils.h"
#define LINE_INDEX_DIMENSIONS 0
#define LINE_INDEX_CLUSTER 1
#define LINE_INDEX_CONTENT 2
/// A group who contains points.
typedef struct _cluster {
......@@ -41,6 +47,10 @@ typedef struct _kmeans {
int nb_points;
} kmeans;
/// Create an empty kmeans universe.
/// \return The kmeans object of the universe.
kmeans* kmeans_create_empty();
/// Create the kmeans universe.
/// \param k The number of clusters in the universe.
/// \param data The array of points
......
......@@ -6,18 +6,10 @@
#include "kmeans.h"
int main() {
// The code below verify the create and destroy methods.
double d = 5.5;
double b = 3.3;
point* gravity = point_create(&b);
point* p = point_create(&d);
cluster* c = cluster_create(gravity);
char* path = "./source_data.txt";
kmeans *universe = kmeans_create_empty();
read_data_source(universe, path);
point** points = malloc(sizeof(points) * 1);
points[0] = p;
kmeans* universe = kmeans_create(3, points, 1);
destroy_cluster(c);
destroy_universe(universe);
return EXIT_SUCCESS;
}
LIB=-lm
CC=gcc -Wall -Wextra -g
main: kmeans.o main.o
main: files_utils.o kmeans.o main.o
$(CC) $^ -fsanitize=address -fsanitize=leak -o $@ $(LIB)
files_utils.o: files_utils.c files_utils.h
$(CC) -c $< $(LIB)
kmeans.o: kmeans.c kmeans.h
$(CC) -c $< $(LIB)
main.o: main.c
......
2
3
1,24
2.3,33.65
3,4
5,34
-1,5
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment