open-cas-linux/casadm/csvparse.c

/*
* Copyright(c) 2012-2021 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include "csvparse.h"
#include "cas_lib_utils.h"
#include "safeclib/safe_lib.h"
#include <cas_ioctl_codes.h>

#define SUCCESS 0
#define FAILURE 1

struct CSVFILE_t {
	FILE *f; /**< underlying byte stream*/
	int num_columns; /**< number of columns in recently read
			    line of CSV file */
	int alloc_column_ptrs; /**< number of pointers to columns
				  that can be fit in columns buffer */
	char **columns;	/**< buffer contains exactly one pointer to each
			   column of a csv file */
	char *buffer; /**< buffer to which recently read line of a csv file
			 is stored */
	int buffer_size; /**< size of a buffer */

	char csv_comment; /**< character markng whole line comment. if set to null,
			     comments in file are not respected */
	char csv_separator; /**< csv separator (by default coma, but in some csv formats
			       it is something different */
};

#define DEF_ALLOC_COL_PTRS 2
#define DEF_CSV_FILE_BUFFER_SIZE 20

/* return error when input dataset size exceeds some common sense limitations */
#define MAX_NUM_COLUMNS 100
#define MAX_LINE_LENGTH 8192

CSVFILE *csv_open(const char *path, const char *mode)
{
	CSVFILE *csv;

	if (!path || !mode) {
		return NULL;
	}

	/* open underlying file as a character stream */
	FILE *f = fopen(path, mode);
	if (!f) {
		return NULL;
	}

	csv = csv_fopen(f);
	if (NULL == csv) {
		fclose(f);
		return NULL;
	}

	return csv;
}

CSVFILE *csv_fopen(FILE *f)
{
	CSVFILE *cf = malloc(sizeof(*cf));
	if (!cf) {
		return NULL;
	}
	/* allocate storage for columns of CSV file */
	cf->num_columns = 0;
	cf->alloc_column_ptrs = DEF_ALLOC_COL_PTRS;

	cf->columns = malloc(cf->alloc_column_ptrs * sizeof(char *));
	if (!cf->columns) {
		free(cf);
		return NULL;
	}

	/* allocate storage for line of CSV file */
	cf->buffer_size = DEF_CSV_FILE_BUFFER_SIZE;
	cf->buffer = malloc(cf->buffer_size);
	if (!cf->buffer) {
		free(cf->columns);
		free(cf);
		return NULL;
	}

	/* assign underlying file as a character stream */
	cf->f = f;

	cf->csv_separator = ',';
	cf->csv_comment = 0;

	return cf;
}

void csv_close(CSVFILE *cf)
{
	fclose(cf->f);
	csv_close_nu(cf);
}

void csv_close_nu(CSVFILE *cf)
{
	free(cf->columns);
	free(cf->buffer);
	memset(cf, 0, sizeof(*cf));
	free(cf);
}

/**
 * internal helper function for the library.
 */
static int ensure_items_array(CSVFILE *cf)
{
	if (cf->num_columns > MAX_NUM_COLUMNS) {
		return FAILURE;
	} else if (cf->num_columns < cf->alloc_column_ptrs) {
		return SUCCESS;
	} else {
		char **tmp;
		cf->alloc_column_ptrs = cf->num_columns * 2;
		tmp =
		    realloc(cf->columns,
			    cf->alloc_column_ptrs * sizeof(char *));
		if (!tmp) {
			return FAILURE;
		} else {
			cf->columns = tmp;
			return SUCCESS;
		}
	}
}

/**
 * Function checks if CSV file is a valid one.
 */
bool csv_is_valid(CSVFILE *cf)
{
	if (!cf) {
		return false;
	} else if (!cf->f) {
		return false;
	} else if (!cf->columns) {
		return false;
	} else if (!cf->buffer) {
		return false;
	} else {
		return true;
	}
}

static int csv_read_line(CSVFILE *cf)
{
	char *line;
	char *c;
	int i, len;
	int already_read = 0;
	/* fgets reads at most buffer_size-1 characters and always places NULL
	 * at the end. */

	while (true) {
		line = fgets(cf->buffer + already_read,
			     cf->buffer_size - already_read, cf->f);
		if (!line) {
			return FAILURE;
		}
		line = cf->buffer;
		/* check that entire line was read; if failed, expand buffer and retry
		 * or (in case of eof) be happy with what we have */
		c = line;
		i = 0;

		while (*c && *c != '\n') {
			c++;
			i++;
		}
		len = i;
		if (len > MAX_LINE_LENGTH) {
			return FAILURE;
		}

		/* buffer ends with 0 while it is not an EOF - sign that we have NOT read entire line
		 * - try to expand buffer*/
		if (!*c && !feof(cf->f)) {
			already_read = cf->buffer_size - 1;
			cf->buffer_size *= 2;
			char *tmp = realloc(cf->buffer, cf->buffer_size);

			if (tmp) {
				cf->buffer = tmp;
				continue;
			} else {
				return FAILURE;
			}
		}

		if (cf->buffer[i] == '\n') {
			cf->buffer[i] = 0;
		}
		break;
	}
	return SUCCESS;
}

int csv_read(CSVFILE *cf)
{
	int i, j, spaces_at_end;
	bool parsing_token = false;	/* if false, "cursor" is over whitespace, otherwise
					 * it is over part of token */

	bool quotation = false;
	if (!csv_is_valid(cf)) {
		return FAILURE;
	}
	if (csv_read_line(cf)) {
		return FAILURE;
	}

	i = 0;
	cf->num_columns = 0;
	cf->columns[0] = 0;
	spaces_at_end = 0;

	while (cf->buffer[i]) {
		if (quotation) {	/* handling text within quotation marks -
					 * ignore commas in this kind of text and don't strip spaces */
			if (cf->buffer[i] == '"' && cf->buffer[i + 1] == '"') {
				/* double quotation mark is considered escaped quotation by
				 * Micros~1 Excel. We should do likewise */
				if (!parsing_token) {	/* start of an cf->buffer */
					cf->columns[cf->num_columns] =
					    &cf->buffer[i];
					parsing_token = true;
				}
				++i;
				memmove_s(cf->columns[cf->num_columns] + 1,
					cf->buffer_size - (cf->columns[cf->num_columns] - cf->buffer),
					cf->columns[cf->num_columns],
					&cf->buffer[i] - cf->columns[cf->num_columns]);
				cf->columns[cf->num_columns]++;
			} else if (cf->buffer[i] == '"') {
				quotation = false;
				parsing_token = false;
				cf->buffer[i] = 0;
			} else if (!parsing_token) {	/* start of an cf->buffer */
				cf->columns[cf->num_columns] = &cf->buffer[i];
				parsing_token = true;
			}
		} else {	/* handling text outside quotation mark */
			if (cf->buffer[i] == cf->csv_separator) {
				(cf->num_columns)++;
				if (ensure_items_array(cf)) {
					return FAILURE;
				}
				cf->columns[cf->num_columns] = 0;
				parsing_token = false;
				cf->buffer[i] = 0;
				for (j = i - spaces_at_end; j != i; ++j) {
					cf->buffer[j] = 0;
				}

			} else if (cf->buffer[i] == '"') {
				quotation = true;
				spaces_at_end = 0;
			} else if (cf->csv_comment
				   && cf->buffer[i] == cf->csv_comment) {
				cf->buffer[i] = 0;
				break;
			} else if (!isspace(cf->buffer[i])) {
				if (!parsing_token) {	/* start of an cf->buffer */
					if (!cf->columns[cf->num_columns]) {
						cf->columns[cf->num_columns] =
						    &cf->buffer[i];
					}
					parsing_token = true;
				}
				spaces_at_end = 0;
			} else {	/* no token.; clear spaces, possibly */
				parsing_token = false;
				spaces_at_end++;
			}
		}
		++i;
	}

	for (j = i - spaces_at_end; j != i; ++j) {
		cf->buffer[j] = 0;
	}

	/*always consider empty line to have exactly one empty column */
	cf->num_columns++;

	for (j = 0; j != cf->num_columns; ++j) {
		/* if no columns were detected during parse, make sure that columns[x]
		 * points to an empty string and not into (NULL) */
		if (!cf->columns[j]) {	/* so that empty columns will return empty string and
					   not a null-pointer */
			cf->columns[j] = &cf->buffer[i];
		}
	}

	return SUCCESS;
}

unsigned int csv_count_cols(CSVFILE *line)
{
	return line->num_columns;
}

int csv_empty_line(CSVFILE *cf)
{
	if (!csv_is_valid(cf)) {
		return FAILURE;
	}
	if (0 == csv_count_cols(cf)) {
		return 1;
	} else if (1 == csv_count_cols(cf)) {
		const char *value = csv_get_col(cf, 0);
		if (strempty(value)) {
			return 1;
		}
	}

	return 0;
}

char *csv_get_col(CSVFILE *cf, int coln)
{
	if (!csv_is_valid(cf)) {
		return NULL;
	}
	return cf->columns[coln];
}

char **csv_get_col_ptr(CSVFILE *cf)
{
	return cf->columns;
}

void csv_seek_beg(CSVFILE *cf)
{
	fseek(cf->f, 0, SEEK_SET);
}

int csv_feof(CSVFILE *cf)
{
	return feof(cf->f);
}

int csv_print(const char *path)
{
	int i, j, k; /* column, line, row, within column */
	int num_col_lengths = DEF_ALLOC_COL_PTRS;
	static const int def_col_len = 5;
	int actual_num_cols = 1;

	CSVFILE *cf = csv_open(path, "r");
	if (!cf) {
		return FAILURE;
	}

	int *col_lengths = malloc(num_col_lengths * sizeof(int));
	if (!col_lengths) {
		csv_close(cf);
		return FAILURE;
	}

	for (i = 0; i != num_col_lengths; ++i) {
		col_lengths[i] = def_col_len;
	}

	/*calculate length of each column */
	i = j = 0;
	while (!csv_read(cf)) {
		int num_cols = csv_count_cols(cf);
		if (num_cols > actual_num_cols) {
			actual_num_cols = num_cols;
		}

		if (num_cols > num_col_lengths) {
			/* CSV file happens to have more columns, than we have allocated
			 * memory for */
			int *tmp =
			    realloc(col_lengths, num_cols * 2 * sizeof(int));
			if (!tmp) {
				free(col_lengths);
				csv_close(cf);
				return FAILURE;
			}
			/* reallocation successful */
			col_lengths = tmp;
			for (i = num_col_lengths; i != num_cols * 2; ++i) {
				col_lengths[i] = def_col_len;
			}
			num_col_lengths = num_cols * 2;
		}

		for (i = 0; i != csv_count_cols(cf); ++i) {
			int len = strnlen(csv_get_col(cf, i), MAX_STR_LEN);
			if (col_lengths[i] < len) {
				col_lengths[i] = len;
			}
		}
		++j;
	}

	/*actually format pretty table */
	csv_seek_beg(cf);
	printf("     | ");

	for (i = 0; i != actual_num_cols; ++i) {
		int before = col_lengths[i] / 2;

		for (k = 0; k != before; ++k) {
			putchar(' ');
		}
		putchar(i + 'A');
		for (k = 0; k != col_lengths[i] - before - 1; ++k) {
			putchar(' ');
		}
		printf(" | ");
	}
	printf("\n-----|-");

	for (i = 0; i != actual_num_cols; ++i) {
		for (k = 0; k != col_lengths[i]; ++k) {
			putchar('-');
		}
		printf("-|-");
	}
	printf("\n");

	j = 1;
	while (!csv_read(cf)) {
		printf("%4d | ", j);
		int num_cols = csv_count_cols(cf);
		for (i = 0; i != actual_num_cols; ++i) {
			if (i < num_cols) {
				char *c = csv_get_col(cf, i);
				for (k = 0; c[k]; k++) {
					putchar(c[k]);
				}
			} else {
				k = 0;
			}
			for (; k != col_lengths[i]; ++k) {
				putchar(' ');
			}
			printf(" | ");
		}
		++j;
		putchar('\n');
	}

	free(col_lengths);
	csv_close(cf);
	return SUCCESS;
}

#ifdef __CSV_SAMPLE__
/**
 * usage example for csvparse library
 * gcc -ggdb csvparse.c -I../common -D__CSV_SAMPLE__ -ocsvsample
 */
int main()
{
	puts("Validated configurations to run Intel CAS");
	csv_print("../../tools/build_installer/utils/validated_configurations.csv");
	putchar('\n');

	puts("IO Classes for Intel CAS");
	csv_print("../../tools/build_installer/utils/default_ioclasses.csv");
	putchar('\n');

}
#endif