我有以下CSV数据
var
i: Int32;
stream: TFileStream;
Path1: string;
WordApp: TWordApplication;
begin
Path1 := IncludeTrailingPathDelimiter(GetEnvironmentVariable('TEMP')) +
FormatDateTime('yyyymmddhhnnssz', Now) + '.doc';
WordApp := TWordApplication.Create(nil);
stream := TFileStream.Create(Path1, fmCreate);
try
try
with dm do
begin
DataSet.First;
CopyBlobFieldToStream(dm.DataSet,
dm.DataSetBlobField, stream);
ShowMessage(IntToStr(stream.Size));
DataSet.Next;
CopyBlobFieldToStream(dm.DataSet,
dm.DataSetBlobField, stream);
ShowMessage(IntToStr(stream.Size));
CopyBlobFieldToStream(dm.DataSet2,
dm.DataSet2BlobField, stream);
ShowMessage(IntToStr(stream.Size));
end;
finally
stream.Free;
end;
WordApp.Documents.Open(Path1, EmptyParam, False, EmptyParam, EmptyParam,
EmptyParam, EmptyParam, EmptyParam, EmptyParam, EmptyParam, EmptyParam,
EmptyParam, EmptyParam, EmptyParam, EmptyParam, EmptyParam);
WordApp.Visible := True;
finally
WordApp.Free;
end;
前三行是标题。我想要做的是选择第1行和第3行 并将其转换为如下所示的数据框:
id,gene,celltype,stem,stem,stem,bcell,bcell,tcell
id,gene,organs,bm,bm,fl,pt,pt,bm
id,gene,organs,stem1,stem2,stem3,b1,b2,t1
134,foo,about_foo,20,10,11,23,22,79
222,bar,about_bar,17,13,55,12,13,88
我坚持以下内容:
Coln1 Coln2
stem stem1
stem stem2
stem stem3
bcell b1
bcell b2
tcell t1
答案 0 :(得分:1)
您可以在read_csv
中使用参数nrows
和skiprows
:
import pandas as pd
import io
temp=u"""id,gene,celltype,stem,stem,stem,bcell,bcell,tcell
id,gene,organs,bm,bm,fl,pt,pt,bm
id,gene,organs,stem1,stem2,stem3,b1,b2,t1
134,foo,about_foo,20,10,11,23,22,79
222,bar,about_bar,17,13,55,12,13,88"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp),header=None,index_col=[1,2], nrows=2, skiprows=[1])
df = df.ix[:, 1:].reset_index(drop=True).T
df.columns = ['Coln1', 'Coln2']
print df.reset_index(drop=True)
Coln1 Coln2
0 stem stem1
1 stem stem2
2 stem stem3
3 bcell b1
4 bcell b2
5 tell t1
要将前3个标题选择为列,请执行以下操作:
df = pd.read_csv(io.StringIO(temp),header=None,index_col=[1,2], nrows=3, skiprows=[4])
df = df.ix[:, 1:].reset_index(drop=True).T
df.columns = ['Coln1', 'Coln2','Coln3']
print df.reset_index(drop=True)