使用带动态分配的C从CSV读取和保存值

时间:2017-08-30 04:50:00

标签: c arrays csv

关于底部的编辑(我把它写成编辑而不是它自己的问题,因为它是如此相关):

我正在编写一个CSV阅读器,它应该将所有值保存为大型多维数组中的字符。在以前的帖子中,我被警告过要么对我的缩减代码过于含糊,所以我要发布更多内容。我想为它的长度道歉,因为我仍在试图估计这个网站上的长度是多少。

最终,该程序将成为我创建的用于执行数据分析的头文件的头条新闻。我用于此程序的头文件是:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

我遇到的问题是读者似乎在名为read_csv()的函数中工作。我知道这是因为放在该函数中的printf()语句。当我在main()中printf()多维字符数组'buffer'时,数据无法正确显示。

在main()中,第一列将正确打印,但接下来的三列不会。此外,我的目标是能够读取任何带有最多20个字符的单元格的MxN CSV文件。使用malloc()创建通用代码是此任务的下一个目标。

我的main()结构如下:

int main(){

  FILE *f;
  char fname[20];
  int i, j;

  printf("enter name of csv file : ") ;
  scanf("%s",fname) ;       

  f = fopen(fname, "r");

  //find row/col
  int find_c_r[2];
  int * pfrc = &find_c_r[0];
  pfrc = find_col_row(f);
  printf("Find_c_r[0] = %d \t Find_c_r[1] = %d\n", *pfrc, *(pfrc+1));

  int numCol = *pfrc;
  int numRow = *(pfrc+1);

  char buffer[50][50][20];// ideally size buffer[numCol][numRow][20]

  //sets all values to NULL
  for(j = 0 ; j < 50 ; j++){
    for(i = 0 ; i < 10 ; i++){   
      memset(buffer[i][j],'\0', 20);
    }
  }

  read_csv(f, numCol, numRow, buffer); 
  /////
  printf("\n\n");

 for(j = 0 ; j < numRow ; j++){
    for(i = 0 ; i < numCol; i++){      
      printf("[%d][%d]",i, j);
      printf("_%s_  ",buffer[i][j]);
    } 
    printf("\n");
  }

  printf("END OF PROGRAM.\n");
 }

其中一部分是我的数组'缓冲区'的动态分配。我不太确定如何以这种格式使用malloc()。

第一个函数main()调用是find_col_row(FILE * f)。它没有问题,但人们在问题中要求更多我的代码。它返回一个指向int数组的指针,该数组保存正在读取的CSV文件中的Columns和Rows数:

int * find_col_row(FILE *f){
 //Find numCol and numRow
  int numCol, numRow;
  char c;
  int new_line= 0;
  int comma = 0;
  int z = 0;
  numCol = 0;
  numRow = 0;
  while (c != EOF) {
    c = fgetc(f) ; 
    if(c == ','){ //WORDS MUST BE SEPARATED BY COMMAS
      comma++;
    }
    if(c == ';'){ //LINES MUST BE SEPARATED BY SEMI-COLONS
      new_line++;
      if(numCol == 0){
         numCol = comma + 1;
      }
    } 
  }


  numRow = new_line - 1;

  int a[2] = {numCol, numRow};
  int * pa = &a[0];

  return pa;
}

被调用的第二个函数是read_csv(...)。此函数的目标是“读取”CSV文件并“保存”多维,字符数组'缓冲区'中每个单元格的值:

void read_csv(FILE *f, int numCol, int numRow, char buffer[numCol][numRow][20])  //cells split by ',', row split by ';'
{
  char fname[100];
  int i = 0, j = 0;
  int c = 0,n = 0, z = 0;

  if (f == NULL) {
    printf("can't open file, %s\n", fname) ;
    exit(1) ;
  }

  n = 0 ;

  fseek(f, 0, SEEK_SET); //starts reading the file from the start
  c = fgetc(f) ;

  i = 0;
  j = 0;


  char temp[20];
  memset(temp, '\0', 20);
  int tc = 0; //temp counter
  int mv_temp = 0; //this aids in removing the first character if == ' '
  temp[tc] = c;
  while (c != EOF) {

    if(c == ','){
      if(temp[0] == ' '){
         for(mv_temp = 0 ; mv_temp < tc ; mv_temp++){
           temp[mv_temp] = temp[mv_temp + 1];
         }
      }
      strncpy(buffer[i][j], temp, 20);
      i++; 
      tc = 0;
      memset(temp, '\0', 20);
    }else if(c == ';'){
      if(temp[0] == ' '){
         for(mv_temp = 0 ; mv_temp < tc ; mv_temp++){
           temp[mv_temp] = temp[mv_temp + 1];
         }
      }
      strncpy(buffer[i][j], temp, 20);
      j++;
      i = 0;
      tc = 0;
      memset(temp, '\0', 20);
       c = fgetc(f);

    }else{
      temp[tc] = c;
      tc++;
    }
    c = fgetc(f);    
  }  /////while loop over


  for(j = 0 ; j < numRow ; j++){
    for(i = 0 ; i < numCol; i++){      
      printf("[%d][%d]",i, j);
      printf("_%s_  ",buffer[i][j]);
    } 
    printf("\n");
  }

}

未尝试使用任何其他CSV文件运行此程序,这是我使用的CSV。运行程序时,第一步是scanf()文件的名称。我称之为

simp.csv

作为参考,这个数据是指基本的美式足球数据:形成,阵型变化,向下,距离。该文件看起来像:

OFF_FORM,FORM_VAR, DN, DIST;
DEUCE,RIGHT, 1, 10;
DEUCE,LEFT, 2, 7;
TRIO,RIGHT, 3, 3;
TREY,LEFT, 1, 10;
TRIO,RODDY, 1, 10;
TREY,LION, 2, 3;
DEEP,LEFT, 1, 10;
DEUCE,LION, 2, 15;
DEUCE,RIGHT, 3, 4;
DEEP,RODDY, 1, 10;
TREY,RIGHT, 1, 10;
TRIO,RAM, 2, 8;
TRIO,RAM, 3, 8;
DEEP,ROCK, 1, 10;
DEUCE,LION, 1, 10;
TRIO,LOUIE, 1, 10;
TRIO,RIGHT, 2,4;
DEUCE,RIGHT, 3, 6;
DEUCE,LION, 4, 2;
TREY,LION,1,10;

再次,我为问题的长度道歉。我希望我有足够的信息来帮助我们。作为一名年轻/新手程序员,我愿意接受任何反馈。如果您可以回答我的问题并指出优化我的代码以更有效地工作的方法,我将非常感谢您的反馈。

//////////////////////////////// ////////////////////////////////

编辑:

@ BLUEPIXY在他们的回答中分享的代码完美无缺。现在,我只是想把它变成一个基本的头文件,我不知道如何修改我看到的一些问题。我对代码所做的只是更改函数的名称并将它们转换为头文件。

#ifndef bp_csv_reader
#define bp_csv_reader    

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>

//https://tools.ietf.org/html/rfc4180
char *csv_get_field(FILE *fp, char separator, int *state)

char ***csv_read(const char *filename, size_t *rows, size_t *cols)

char *csv_trim(char *s)

#endif

csv.c看起来像:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "csv.h"

//https://tools.ietf.org/html/rfc4180
char *csv_get_field(FILE *fp, char separator, int *state){
    int ch = fgetc(fp);

    if(ch == EOF)
        return NULL;

    size_t size = 1, index = 0;
    char *field = malloc(size);
    bool quoted_in = false;

    for(;ch != EOF; ch = fgetc(fp)){
        if(ch == '"'){
            if(quoted_in){
                int prefetch = fgetc(fp);
                if(prefetch == '"'){
                    ch = prefetch;
                } else {
                    quoted_in = false;
                    ungetc(prefetch, fp);
                    continue;
                }
            } else {
                quoted_in = true;
                continue;
            }
        } else if(!quoted_in && (ch == separator || ch == '\n')){
            break;
        }
        field[index++] = ch;
        char *temp = realloc(field, ++size);
        if(!temp){
            perror("realloc:");
            free(field);
            exit(EXIT_FAILURE);
        }
        field = temp;
    }
    field[index] = 0;
    *state = ch;
    if(quoted_in){
        fprintf(stderr, "The quotes is not closed.\n");
        free(field);
        return NULL;
    }
    return field;
}

char ***csv_read(const char *filename, size_t *rows, size_t *cols){
    *rows = *cols = 0;

    FILE *fp = fopen(filename, "r");
    if(!fp){
        fprintf(stderr, "%s can't open in %s\n", filename, __func__);
        perror("fopen");
        return NULL;
    }


    char *field;
    int state;
    size_t r = 0, c = 0;
    char ***mat = NULL;
    void *temp;

    while(field = csv_get_field(fp, ',', &state)){
        if(c == 0){
            mat = realloc(mat, (r + 1)*sizeof(*mat));
            if(!mat){
                fprintf(stderr, "realloc failed in %s\n", __func__);
                exit(EXIT_FAILURE);
            }
            mat[r] = NULL;
        }
        mat[r] = realloc(mat[r], (c + 1)*sizeof(**mat));
        if(!mat[r]){
            fprintf(stderr, "realloc failed in %s\n", __func__);
            exit(EXIT_FAILURE);
        }
        mat[r][c++] = field;
        if(state == '\n' || state == EOF){
            if(*cols == 0){
                *cols = c;
            } else if(c != *cols){
                fprintf(stderr, "line %zu doesn't match number of columns in %s\n", r, filename);
                exit(EXIT_FAILURE);
            }
            c  = 0;
            *rows = ++r;
        }
    }
    fclose(fp);

    return mat;
}

#include <ctype.h>

char *csv_trim(char *s){
    if(!s || !*s)
        return s;

    char *from, *to;

    for(from = s; *from && isspace((unsigned char)*from); ++from);
    for(to = s; *from;){
        *to++ = *from++;
    }
    *to = 0;
    while(s != to && isspace((unsigned char)to[-1])){
        *--to = 0;
    }
    return s;
}

调用它的代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <csv.h>


int main(void){
    size_t rows, cols;


    char ***mat = csv_read("simp.csv", &rows, &cols);

    size_t r, c;

    for(r = 0; r < rows; ++r){
        for(c = 0; c < cols; ++c){
            if(c)
                putchar(',');
            printf("%s", csv_trim(mat[r][c]));
            free(mat[r][c]);
        }
        puts("");
        free(mat[r]);
    }
    free(mat);
    return 0;
}

我不知道为什么我遇到了我遇到的错误。我得到的代码完全在自己的文件中运行。直到我将它们放入头文件中才出现问题。这是我编译int终端的方式:

 acom test_csv.c csv.c -I. csv.h

这些是我看到的错误。

In file included from test_cesv.c:5:0:
./csv.h: In function ‘csv_get_field’:
./csv.h:15:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘char’
 char *csv_trim(char *s)
 ^
test_cesv.c:142:1: error: expected ‘{’ at end of input
 }
 ^
In file included from csv.c:5:0:
csv.h: In function ‘csv_get_field’:
csv.h:15:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘char’
 char *csv_trim(char *s)
 ^
csv.c:55:67: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
 char ***csv_read(const char *filename, size_t *rows, size_t *cols){
                                                                   ^
csv.c:105:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
 char *csv_trim(char *s){
                        ^
csv.c:120:1: error: expected ‘{’ at end of input
 }
 ^
csv.h: In function ‘csv_get_field’:
csv.h:15:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘char’
 char *csv_trim(char *s)
 ^

1 个答案:

答案 0 :(得分:0)

试试这个

data<- read.table(text = "
A B C D
99 101 99 50
90 110 110 151", header = TRUE)

for(cl in 1:ncol(data)){
  data[,cl] <- ifelse(data[,cl]>=100, 1, 0)
}
data