如何从混乱的CSV中提取数据?

时间:2019-02-17 12:50:52

标签: python python-3.x pandas data-analysis

如何提取数据

#include <stdio.h>
#include <windows.h>

WNDENUMPROC DisplayData(HWND str, LPARAM p) {
    LPDWORD process_id;
    DWORD P_ID;
    printf("PID :: %x\n", process_id);

    //this is where error occurs
    //P_ID = GetWindowThreadProcessId(str, process_id);

    printf("Found: %x, P_ID: %x\n", str, process_id);
    return TRUE;
}

int main() {
    EnumWindows( (WNDENUMPROC) DisplayData, 1);
    return 0;
}

我用过

  

Aps_data = pd.read_csv(“ test.csv”)

得到错误:utf-8'编解码器无法解码位置7的字节0xb5:无效的起始字节

2 个答案:

答案 0 :(得分:0)

只需使用csv模块

import csv

with open('mycsv.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row) # print whole row
        print(row[0]) # print first column

Edit1:打开文件时将rb替换为r,以便它在Python2和Python3中均可使用。

答案 1 :(得分:0)

读取数据(复制和粘贴)就可以了。请注意,在使用pandas.read_csv时应指定编码;检查例如this answer并咨询docs

将数据读入数据框:

import pandas as pd
from io import StringIO

s = """
Sample_File,C:\app\ok,,,,,,,,,,,,,,,,
Sample Time,20,,,,,,,,,,,,,,,,
Density,1,,,,,,,,,,,,,,,,
Stokes,off,,,,,,,,,,,,,,,,
Lower,0.486,,,,,,,,,,,,,,,,
Upper ,20.53,,,,,,,,,,,,,,,,
Sample #,75,,,,,,,,,,,,,,,,
Date,1/30/2019,,,,,,,,,,,,,,,,
Start Time,8:59:44,,,,,,,,,,,,,,,,
Correlate ,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,conts total
<0.523,0,3,1,0,0,4,9,2,0,0,0,0,0,0,0,0,19
0.542,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3
1.037,0,0,4,8,2,1,1,2,0,0,0,0,0,0,0,0,18
1.114,0,1,5,16,3,0,0,0,0,0,0,0,0,0,0,0,25
1.197,0,0,2,11,7,2,1,2,0,0,0,0,0,0,0,0,25
2.129,0,15,49,21,150,401,4,13,8,0,0,0,0,0,0,0,661
2.288,0,15,68,53,201,795,18,13,3,0,0,0,0,0,0,0,1166
2.458,0,9,72,99,238,1533,15,32,6,0,0,0,0,0,0,0,2004
3.786,0,0,0,0,85,10054,1303,333,41,0,0,0,0,0,0,0,11816
4.068,0,0,0,1,33,8310,1504,422,38,0,1,0,0,0,0,0,10309
Diameter,Raw Counts,,,,,,,,,,,,,,,,
<0.523,19,,,,,,,,,,,,,,,,
0.542,3,,,,,,,,,,,,,,,,
0.583,4,,,,,,,,,,,,,,,,
0.626,4,,,,,,,,,,,,,,,,
0.673,9,,,,,,,,,,,,,,,,
Side,Raw Counts,,,,,,,,,,,,,,,,
1,0,,,,,,,,,,,,,,,,
2,129,,,,,,,,,,,,,,,,
3,361,,,,,,,,,,,,,,,,
Event 1,971,,,,,,,,,,,,,,,,
Event 3,7091,,,,,,,,,,,,,,,,
Event 4,1,,,,,,,,,,,,,,,,
Dead Time,448,,,,,,,,,,,,,,,,
pressure,1006,,,,,,,,,,,,,,,,
"""

df = pd.read_csv(StringIO(s))