我遇到了data.table :: fread的问题。请参阅下面的第一个有问题的文件以及使用fread读取它的结果,以及第二个正确读取的文件。
# Test data.table::fread on following data with two new lines after Trust and before CEF
textProblem <- "Ticker;FullName;NorgateID;IsIndex;MarketID;Market;GroupID;Group;IndustryID;Industry;GicsID;GicsCategoryName;IcbID;IcbCategoryName;trbccode;trbcname;gicscode;gicsname;indexsymbol1;indexsymbol2;indexsymbol3;indexsymbol4;DelistingDate;PointValue;MarginDeposit;RoundLotSize;TickSize
TFA-200101;Morgan Stanley Dean Witter Municipal Income Trust
CEF;265990;0;14;US Delisted Stocks;2;Investment Company;0;Unassigned or n/a;;;;;n/a;n/a;n/a;n/a;;;;;2001-01-23;1;0;0;0"
as.data.frame(data.table::fread(input=textProblem, sep=";", na.strings=c("Unassigned or n/a", "n/a", "{EMPTY}")))
textWorking <- "Ticker;FullName;NorgateID;IsIndex;MarketID;Market;GroupID;Group;IndustryID;Industry;GicsID;GicsCategoryName;IcbID;IcbCategoryName;trbccode;trbcname;gicscode;gicsname;indexsymbol1;indexsymbol2;indexsymbol3;indexsymbol4;DelistingDate;PointValue;MarginDeposit;RoundLotSize;TickSize
&6J_CCB;Japanese Yen Continuous Futures Backadjusted;687021;0;77;CME Group;32;Currency Futures;0;Unassigned or n/a;;;;;n/a;n/a;n/a;n/a;;;;;{EMPTY};125000;2200;1;5e-005"
as.data.frame(data.table::fread(input=textWorking, sep=";", na.strings=c("Unassigned or n/a", "n/a", "{EMPTY}")))
我系统上的输出:
> # Test data.table::fread on following data with two new lines after Trust and before CEF
> textProblem <- "Ticker;FullName;NorgateID;IsIndex;MarketID;Market;GroupID;Group;IndustryID;Industry;GicsID;GicsCategoryName;IcbID;IcbCategoryName;trbccode;trbcname;gicscode;gicsname;indexsymbol1;indexsymbol2;indexsymbol3;indexsymbol4;DelistingDate;PointValue;MarginDeposit;RoundLotSize;TickSize
+ TFA-200101;Morgan Stanley Dean Witter Municipal Income Trust
+
+ CEF;265990;0;14;US Delisted Stocks;2;Investment Company;0;Unassigned or n/a;;;;;n/a;n/a;n/a;n/a;;;;;2001-01-23;1;0;0;0"
>
> as.data.frame(data.table::fread(input=textProblem, sep=";", na.strings=c("Unassigned or n/a", "n/a", "{EMPTY}")))
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26
1 CEF 265990 0 14 US Delisted Stocks 2 Investment Company 0 NA NA NA NA NA NA NA NA NA NA NA NA NA 2001-01-23 1 0 0 0
>
> textWorking <- "Ticker;FullName;NorgateID;IsIndex;MarketID;Market;GroupID;Group;IndustryID;Industry;GicsID;GicsCategoryName;IcbID;IcbCategoryName;trbccode;trbcname;gicscode;gicsname;indexsymbol1;indexsymbol2;indexsymbol3;indexsymbol4;DelistingDate;PointValue;MarginDeposit;RoundLotSize;TickSize
+ &6J_CCB;Japanese Yen Continuous Futures Backadjusted;687021;0;77;CME Group;32;Currency Futures;0;Unassigned or n/a;;;;;n/a;n/a;n/a;n/a;;;;;{EMPTY};125000;2200;1;5e-005"
>
> as.data.frame(data.table::fread(input=textWorking, sep=";", na.strings=c("Unassigned or n/a", "n/a", "{EMPTY}")))
Ticker FullName NorgateID IsIndex MarketID Market GroupID Group IndustryID Industry GicsID GicsCategoryName IcbID IcbCategoryName trbccode trbcname gicscode gicsname indexsymbol1 indexsymbol2
1 &6J_CCB Japanese Yen Continuous Futures Backadjusted 687021 0 77 CME Group 32 Currency Futures 0 NA NA NA NA NA NA NA NA NA NA NA
indexsymbol3 indexsymbol4 DelistingDate PointValue MarginDeposit RoundLotSize TickSize
1 NA NA NA 125000 2200 1 5e-05
> sessionInfo()
R version 3.4.4 (2018-03-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 16.04.4 LTS
Matrix products: default
BLAS: /usr/lib/openblas-base/libblas.so.3
LAPACK: /usr/lib/libopenblasp-r0.2.18.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] compiler_3.4.4 tools_3.4.4 yaml_2.1.19 data.table_1.11.2