484 lines
10 KiB
C
484 lines
10 KiB
C
/*
|
|
* Copyright(c) 2012-2021 Intel Corporation
|
|
* SPDX-License-Identifier: BSD-3-Clause-Clear
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdint.h>
|
|
#include "csvparse.h"
|
|
#include "cas_lib_utils.h"
|
|
#include "safeclib/safe_lib.h"
|
|
#include <cas_ioctl_codes.h>
|
|
|
|
#define SUCCESS 0
|
|
#define FAILURE 1
|
|
|
|
struct CSVFILE_t {
|
|
FILE *f; /**< underlying byte stream*/
|
|
int num_columns; /**< number of columns in recently read
|
|
line of CSV file */
|
|
int alloc_column_ptrs; /**< number of pointers to columns
|
|
that can be fit in columns buffer */
|
|
char **columns; /**< buffer contains exactly one pointer to each
|
|
column of a csv file */
|
|
char *buffer; /**< buffer to which recently read line of a csv file
|
|
is stored */
|
|
int buffer_size; /**< size of a buffer */
|
|
|
|
char csv_comment; /**< character markng whole line comment. if set to null,
|
|
comments in file are not respected */
|
|
char csv_separator; /**< csv separator (by default coma, but in some csv formats
|
|
it is something different */
|
|
};
|
|
|
|
#define DEF_ALLOC_COL_PTRS 2
|
|
#define DEF_CSV_FILE_BUFFER_SIZE 20
|
|
|
|
/* return error when input dataset size exceeds some common sense limitations */
|
|
#define MAX_NUM_COLUMNS 100
|
|
#define MAX_LINE_LENGTH 8192
|
|
|
|
CSVFILE *csv_open(const char *path, const char *mode)
|
|
{
|
|
CSVFILE *csv;
|
|
|
|
if (!path || !mode) {
|
|
return NULL;
|
|
}
|
|
|
|
/* open underlying file as a character stream */
|
|
FILE *f = fopen(path, mode);
|
|
if (!f) {
|
|
return NULL;
|
|
}
|
|
|
|
csv = csv_fopen(f);
|
|
if (NULL == csv) {
|
|
fclose(f);
|
|
return NULL;
|
|
}
|
|
|
|
return csv;
|
|
}
|
|
|
|
CSVFILE *csv_fopen(FILE *f)
|
|
{
|
|
CSVFILE *cf = malloc(sizeof(*cf));
|
|
if (!cf) {
|
|
return NULL;
|
|
}
|
|
/* allocate storage for columns of CSV file */
|
|
cf->num_columns = 0;
|
|
cf->alloc_column_ptrs = DEF_ALLOC_COL_PTRS;
|
|
|
|
cf->columns = malloc(cf->alloc_column_ptrs * sizeof(char *));
|
|
if (!cf->columns) {
|
|
free(cf);
|
|
return NULL;
|
|
}
|
|
|
|
/* allocate storage for line of CSV file */
|
|
cf->buffer_size = DEF_CSV_FILE_BUFFER_SIZE;
|
|
cf->buffer = malloc(cf->buffer_size);
|
|
if (!cf->buffer) {
|
|
free(cf->columns);
|
|
free(cf);
|
|
return NULL;
|
|
}
|
|
|
|
/* assign underlying file as a character stream */
|
|
cf->f = f;
|
|
|
|
cf->csv_separator = ',';
|
|
cf->csv_comment = 0;
|
|
|
|
return cf;
|
|
}
|
|
|
|
void csv_close(CSVFILE *cf)
|
|
{
|
|
fclose(cf->f);
|
|
csv_close_nu(cf);
|
|
}
|
|
|
|
void csv_close_nu(CSVFILE *cf)
|
|
{
|
|
free(cf->columns);
|
|
free(cf->buffer);
|
|
memset(cf, 0, sizeof(*cf));
|
|
free(cf);
|
|
}
|
|
|
|
/**
|
|
* internal helper function for the library.
|
|
*/
|
|
static int ensure_items_array(CSVFILE *cf)
|
|
{
|
|
if (cf->num_columns > MAX_NUM_COLUMNS) {
|
|
return FAILURE;
|
|
} else if (cf->num_columns < cf->alloc_column_ptrs) {
|
|
return SUCCESS;
|
|
} else {
|
|
char **tmp;
|
|
cf->alloc_column_ptrs = cf->num_columns * 2;
|
|
tmp =
|
|
realloc(cf->columns,
|
|
cf->alloc_column_ptrs * sizeof(char *));
|
|
if (!tmp) {
|
|
return FAILURE;
|
|
} else {
|
|
cf->columns = tmp;
|
|
return SUCCESS;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function checks if CSV file is a valid one.
|
|
*/
|
|
bool csv_is_valid(CSVFILE *cf)
|
|
{
|
|
if (!cf) {
|
|
return false;
|
|
} else if (!cf->f) {
|
|
return false;
|
|
} else if (!cf->columns) {
|
|
return false;
|
|
} else if (!cf->buffer) {
|
|
return false;
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
static int csv_read_line(CSVFILE *cf)
|
|
{
|
|
char *line;
|
|
char *c;
|
|
int i, len;
|
|
int already_read = 0;
|
|
/* fgets reads at most buffer_size-1 characters and always places NULL
|
|
* at the end. */
|
|
|
|
while (true) {
|
|
line = fgets(cf->buffer + already_read,
|
|
cf->buffer_size - already_read, cf->f);
|
|
if (!line) {
|
|
return FAILURE;
|
|
}
|
|
line = cf->buffer;
|
|
/* check that entire line was read; if failed, expand buffer and retry
|
|
* or (in case of eof) be happy with what we have */
|
|
c = line;
|
|
i = 0;
|
|
|
|
while (*c && *c != '\n') {
|
|
c++;
|
|
i++;
|
|
}
|
|
len = i;
|
|
if (len > MAX_LINE_LENGTH) {
|
|
return FAILURE;
|
|
}
|
|
|
|
/* buffer ends with 0 while it is not an EOF - sign that we have NOT read entire line
|
|
* - try to expand buffer*/
|
|
if (!*c && !feof(cf->f)) {
|
|
already_read = cf->buffer_size - 1;
|
|
cf->buffer_size *= 2;
|
|
char *tmp = realloc(cf->buffer, cf->buffer_size);
|
|
|
|
if (tmp) {
|
|
cf->buffer = tmp;
|
|
continue;
|
|
} else {
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
if (cf->buffer[i] == '\n') {
|
|
cf->buffer[i] = 0;
|
|
}
|
|
break;
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
int csv_read(CSVFILE *cf)
|
|
{
|
|
int i, j, spaces_at_end;
|
|
bool parsing_token = false; /* if false, "cursor" is over whitespace, otherwise
|
|
* it is over part of token */
|
|
|
|
bool quotation = false;
|
|
if (!csv_is_valid(cf)) {
|
|
return FAILURE;
|
|
}
|
|
if (csv_read_line(cf)) {
|
|
return FAILURE;
|
|
}
|
|
|
|
i = 0;
|
|
cf->num_columns = 0;
|
|
cf->columns[0] = 0;
|
|
spaces_at_end = 0;
|
|
|
|
while (cf->buffer[i]) {
|
|
if (quotation) { /* handling text within quotation marks -
|
|
* ignore commas in this kind of text and don't strip spaces */
|
|
if (cf->buffer[i] == '"' && cf->buffer[i + 1] == '"') {
|
|
/* double quotation mark is considered escaped quotation by
|
|
* Micros~1 Excel. We should do likewise */
|
|
if (!parsing_token) { /* start of an cf->buffer */
|
|
cf->columns[cf->num_columns] =
|
|
&cf->buffer[i];
|
|
parsing_token = true;
|
|
}
|
|
++i;
|
|
memmove_s(cf->columns[cf->num_columns] + 1,
|
|
cf->buffer_size - (cf->columns[cf->num_columns] - cf->buffer),
|
|
cf->columns[cf->num_columns],
|
|
&cf->buffer[i] - cf->columns[cf->num_columns]);
|
|
cf->columns[cf->num_columns]++;
|
|
} else if (cf->buffer[i] == '"') {
|
|
quotation = false;
|
|
parsing_token = false;
|
|
cf->buffer[i] = 0;
|
|
} else if (!parsing_token) { /* start of an cf->buffer */
|
|
cf->columns[cf->num_columns] = &cf->buffer[i];
|
|
parsing_token = true;
|
|
}
|
|
} else { /* handling text outside quotation mark */
|
|
if (cf->buffer[i] == cf->csv_separator) {
|
|
(cf->num_columns)++;
|
|
if (ensure_items_array(cf)) {
|
|
return FAILURE;
|
|
}
|
|
cf->columns[cf->num_columns] = 0;
|
|
parsing_token = false;
|
|
cf->buffer[i] = 0;
|
|
for (j = i - spaces_at_end; j != i; ++j) {
|
|
cf->buffer[j] = 0;
|
|
}
|
|
|
|
} else if (cf->buffer[i] == '"') {
|
|
quotation = true;
|
|
spaces_at_end = 0;
|
|
} else if (cf->csv_comment
|
|
&& cf->buffer[i] == cf->csv_comment) {
|
|
cf->buffer[i] = 0;
|
|
break;
|
|
} else if (!isspace(cf->buffer[i])) {
|
|
if (!parsing_token) { /* start of an cf->buffer */
|
|
if (!cf->columns[cf->num_columns]) {
|
|
cf->columns[cf->num_columns] =
|
|
&cf->buffer[i];
|
|
}
|
|
parsing_token = true;
|
|
}
|
|
spaces_at_end = 0;
|
|
} else { /* no token.; clear spaces, possibly */
|
|
parsing_token = false;
|
|
spaces_at_end++;
|
|
}
|
|
}
|
|
++i;
|
|
}
|
|
|
|
for (j = i - spaces_at_end; j != i; ++j) {
|
|
cf->buffer[j] = 0;
|
|
}
|
|
|
|
/*always consider empty line to have exactly one empty column */
|
|
cf->num_columns++;
|
|
|
|
for (j = 0; j != cf->num_columns; ++j) {
|
|
/* if no columns were detected during parse, make sure that columns[x]
|
|
* points to an empty string and not into (NULL) */
|
|
if (!cf->columns[j]) { /* so that empty columns will return empty string and
|
|
not a null-pointer */
|
|
cf->columns[j] = &cf->buffer[i];
|
|
}
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
unsigned int csv_count_cols(CSVFILE *line)
|
|
{
|
|
return line->num_columns;
|
|
}
|
|
|
|
int csv_empty_line(CSVFILE *cf)
|
|
{
|
|
if (!csv_is_valid(cf)) {
|
|
return FAILURE;
|
|
}
|
|
if (0 == csv_count_cols(cf)) {
|
|
return 1;
|
|
} else if (1 == csv_count_cols(cf)) {
|
|
const char *value = csv_get_col(cf, 0);
|
|
if (strempty(value)) {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
char *csv_get_col(CSVFILE *cf, int coln)
|
|
{
|
|
if (!csv_is_valid(cf)) {
|
|
return NULL;
|
|
}
|
|
return cf->columns[coln];
|
|
}
|
|
|
|
char **csv_get_col_ptr(CSVFILE *cf)
|
|
{
|
|
return cf->columns;
|
|
}
|
|
|
|
void csv_seek_beg(CSVFILE *cf)
|
|
{
|
|
fseek(cf->f, 0, SEEK_SET);
|
|
}
|
|
|
|
int csv_feof(CSVFILE *cf)
|
|
{
|
|
return feof(cf->f);
|
|
}
|
|
|
|
int csv_print(const char *path)
|
|
{
|
|
int i, j, k; /* column, line, row, within column */
|
|
int num_col_lengths = DEF_ALLOC_COL_PTRS;
|
|
static const int def_col_len = 5;
|
|
int actual_num_cols = 1;
|
|
|
|
CSVFILE *cf = csv_open(path, "r");
|
|
if (!cf) {
|
|
return FAILURE;
|
|
}
|
|
|
|
int *col_lengths = malloc(num_col_lengths * sizeof(int));
|
|
if (!col_lengths) {
|
|
csv_close(cf);
|
|
return FAILURE;
|
|
}
|
|
|
|
for (i = 0; i != num_col_lengths; ++i) {
|
|
col_lengths[i] = def_col_len;
|
|
}
|
|
|
|
/*calculate length of each column */
|
|
i = j = 0;
|
|
while (!csv_read(cf)) {
|
|
int num_cols = csv_count_cols(cf);
|
|
if (num_cols > actual_num_cols) {
|
|
actual_num_cols = num_cols;
|
|
}
|
|
|
|
if (num_cols > num_col_lengths) {
|
|
/* CSV file happens to have more columns, than we have allocated
|
|
* memory for */
|
|
int *tmp =
|
|
realloc(col_lengths, num_cols * 2 * sizeof(int));
|
|
if (!tmp) {
|
|
free(col_lengths);
|
|
csv_close(cf);
|
|
return FAILURE;
|
|
}
|
|
/* reallocation successful */
|
|
col_lengths = tmp;
|
|
for (i = num_col_lengths; i != num_cols * 2; ++i) {
|
|
col_lengths[i] = def_col_len;
|
|
}
|
|
num_col_lengths = num_cols * 2;
|
|
}
|
|
|
|
for (i = 0; i != csv_count_cols(cf); ++i) {
|
|
int len = strnlen(csv_get_col(cf, i), MAX_STR_LEN);
|
|
if (col_lengths[i] < len) {
|
|
col_lengths[i] = len;
|
|
}
|
|
}
|
|
++j;
|
|
}
|
|
|
|
/*actually format pretty table */
|
|
csv_seek_beg(cf);
|
|
printf(" | ");
|
|
|
|
for (i = 0; i != actual_num_cols; ++i) {
|
|
int before = col_lengths[i] / 2;
|
|
|
|
for (k = 0; k != before; ++k) {
|
|
putchar(' ');
|
|
}
|
|
putchar(i + 'A');
|
|
for (k = 0; k != col_lengths[i] - before - 1; ++k) {
|
|
putchar(' ');
|
|
}
|
|
printf(" | ");
|
|
}
|
|
printf("\n-----|-");
|
|
|
|
for (i = 0; i != actual_num_cols; ++i) {
|
|
for (k = 0; k != col_lengths[i]; ++k) {
|
|
putchar('-');
|
|
}
|
|
printf("-|-");
|
|
}
|
|
printf("\n");
|
|
|
|
j = 1;
|
|
while (!csv_read(cf)) {
|
|
printf("%4d | ", j);
|
|
int num_cols = csv_count_cols(cf);
|
|
for (i = 0; i != actual_num_cols; ++i) {
|
|
if (i < num_cols) {
|
|
char *c = csv_get_col(cf, i);
|
|
for (k = 0; c[k]; k++) {
|
|
putchar(c[k]);
|
|
}
|
|
} else {
|
|
k = 0;
|
|
}
|
|
for (; k != col_lengths[i]; ++k) {
|
|
putchar(' ');
|
|
}
|
|
printf(" | ");
|
|
}
|
|
++j;
|
|
putchar('\n');
|
|
}
|
|
|
|
free(col_lengths);
|
|
csv_close(cf);
|
|
return SUCCESS;
|
|
}
|
|
|
|
#ifdef __CSV_SAMPLE__
|
|
/**
|
|
* usage example for csvparse library
|
|
* gcc -ggdb csvparse.c -I../common -D__CSV_SAMPLE__ -ocsvsample
|
|
*/
|
|
int main()
|
|
{
|
|
puts("Validated configurations to run Intel CAS");
|
|
csv_print("../../tools/build_installer/utils/validated_configurations.csv");
|
|
putchar('\n');
|
|
|
|
puts("IO Classes for Intel CAS");
|
|
csv_print("../../tools/build_installer/utils/default_ioclasses.csv");
|
|
putchar('\n');
|
|
|
|
}
|
|
#endif
|