kf = 10
sets = 90
for i in range(0, kf):
chunk[i] = pd.read_csv("Dataset.csv", skiprows=(i*sets), nrows=sets)
通过打印,我总是得到90个第一行,而不是0到89和90到179等。我如何纠正初始化以便首先跳过行并开始读取文件?
kf = 100且设置为9的输出。
X1 X2 X3 ... X29 X30 Target
0 -2.335543 -2.325887 -2.367347 ... 2.001746 3.102024 1
1 -0.132771 0.463992 -0.282286 ... 3.003794 2.473191 1
2 -1.000121 -1.512276 -3.326958 ... 0.155254 5.855211 1
3 -1.170981 -3.493062 -2.241450 ... 3.228326 3.301115 1
4 -1.449553 -1.428624 -1.401973 ... 1.547833 2.008935 1
5 -1.657024 -1.567071 -1.784387 ... 0.606907 -2.135309 1
6 -0.323730 -1.237250 -2.679961 ... -1.365039 3.101155 1
7 -1.011255 -0.706056 -1.583983 ... -0.678562 -1.950106 1
8 0.388855 0.359412 0.037113 ... -3.413041 -4.051897 1
[9 rows x 31 columns]
X1 X2 X3 ... X29 X30 Target
0 -2.335543 -2.325887 -2.367347 ... 2.001746 3.102024 1
1 -0.132771 0.463992 -0.282286 ... 3.003794 2.473191 1
2 -1.000121 -1.512276 -3.326958 ... 0.155254 5.855211 1
3 -1.170981 -3.493062 -2.241450 ... 3.228326 3.301115 1
4 -1.449553 -1.428624 -1.401973 ... 1.547833 2.008935 1
5 -1.657024 -1.567071 -1.784387 ... 0.606907 -2.135309 1
6 -0.323730 -1.237250 -2.679961 ... -1.365039 3.101155 1
7 -1.011255 -0.706056 -1.583983 ... -0.678562 -1.950106 1
8 0.388855 0.359412 0.037113 ... -3.413041 -4.051897 1
[9 rows x 31 columns]
答案 0 :(得分:3)
for df in pd.read_csv("Dataset.csv", chunksize=sets):
print(df)
编辑:
我使用您的代码创建了示例数据,问题是列的值被错误地解析,因此对于第一组来说,必要的参数names
与if-else
与None
一起使用
import pandas as pd
#original data
temp=u"""colA,colB
A,1
B,2
A,3
C,4
B,5
A,6
C,7
B,8
A,9
C,10
B,11
A,12
C,13
D,14
B,15
C,16"""
kf = 3
sets = 6
#after testing replace 'pd.compat.StringIO(temp)' to 'Dataset.csv'
cols = pd.read_csv(pd.compat.StringIO(temp), nrows=0).columns
print (cols)
Index(['colA', 'colB'], dtype='object')
for i in range(0, kf):
if i == 0:
val = 0
names = None
else:
val = 1
names = cols
df = pd.read_csv(pd.compat.StringIO(temp),
skiprows=(i*sets) + val,
nrows=sets,
names=names)
print (df)
colA colB
0 A 1
1 B 2
2 A 3
3 C 4
4 B 5
5 A 6
colA colB
0 C 7
1 B 8
2 A 9
3 C 10
4 B 11
5 A 12
colA colB
0 C 13
1 D 14
2 B 15
3 C 16