我想在R中加载csv-file但是,有些值会分配给错误的列。我正在阅读文件:
bm <- read.csv("bm.csv", quote = "", fill=TRUE)
colnames(bm) <- c("id","user","url","title","description","favorite","private","date","removed","classifications")
导致以下数据帧:
structure(list(id = structure(c(3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L,
49L, 50L, 51L, 2L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L), .Label = c("\"\"voeding;\"\"\"", "\"\"ziekte;\"\"\"", "\"33",
"\"34", "\"35", "\"36", "\"37", "\"38", "\"39", "\"40", "\"41",
"\"42", "\"43", "\"44", "\"45", "\"46", "\"47", "\"48", "\"49",
"\"50", "\"51", "\"52", "\"53", "\"54", "\"55", "\"56", "\"57",
"\"58", "\"59", "\"60", "\"61", "\"62", "\"63", "\"64", "\"65",
"\"66", "\"67", "\"68", "\"69", "\"70", "\"71", "\"72", "\"73",
"\"74", "\"75", "\"76", "\"77", "\"78", "\"79", "\"80", "\"81",
"\"82", "\"83", "\"84", "\"85", "\"86", "\"87", "\"88", "\"89",
"\"90", "\"91"), class = "factor"), user = structure(c(16L, 16L,
17L, 17L, 17L, 17L, 19L, 1L, 15L, 16L, 7L, 16L, 10L, 9L, 14L,
19L, 21L, 9L, 16L, 14L, 19L, 12L, 10L, 7L, 3L, 9L, 5L, 2L, 7L,
3L, 10L, 14L, 13L, 16L, 17L, 15L, 8L, 18L, 21L, 10L, 4L, 11L,
17L, 6L, 12L, 7L, 9L, 4L, 16L, 5L, 1L, 2L, 12L, 22L, 20L, 14L,
24L, 4L, 10L, 23L, 12L), .Label = c("", "\"\"fxf01\"\"", "\"\"fxf02\"\"",
"\"\"fxf03\"\"", "\"\"fxf04\"\"", "\"\"fxf05\"\"", "\"\"fxf06\"\"",
"\"\"fxf08\"\"", "\"\"fxf09\"\"", "\"\"fxf10\"\"", "\"\"fxf11\"\"",
"\"\"fxf15\"\"", "\"\"fxf16\"\"", "\"\"fxf17\"\"", "\"\"fxf18\"\"",
"\"\"fxf19\"\"", "\"\"fxf20\"\"", "\"\"fxf21\"\"", "\"\"fxf22\"\"",
"\"\"fxf23\"\"", "\"\"fxf24\"\"", "\"\"fxf25\"\"", "\"\"fxf26\"\"",
"\"\"fxf27\"\""), class = "factor"), url = structure(c(14L, 14L,
9L, 5L, 2L, 8L, 12L, 1L, 21L, 8L, 6L, 7L, 7L, 14L, 16L, 15L,
11L, 21L, 6L, 6L, 11L, 6L, 8L, 17L, 20L, 19L, 11L, 7L, 3L, 18L,
11L, 8L, 6L, 17L, 17L, 21L, 13L, 7L, 21L, 5L, 13L, 6L, 4L, 9L,
11L, 16L, 5L, 15L, 16L, 10L, 1L, 6L, 15L, 5L, 21L, 19L, 18L,
11L, 6L, 6L, 13L), .Label = c("", "\"\"http://gezondheid.blog.nl/overgewicht/2008/06/07/dik-zijn-heeft-veel-nadelen\"\"",
"\"\"http://home.deds.nl/~obesitasinfo.nl/\"\"", "\"\"http://home.deds.nl/~obesitasinfo.nl/Behandeling.htm\"\"",
"\"\"http://mens-en-gezondheid.infonu.nl/ziekten/18079-risicos-van-overgewicht-en-de-gevolgen-van-obesitas.html\"\"",
"\"\"http://nl.wikipedia.org/wiki/Obesitas\"\"", "\"\"http://overgewicht.pilliewillie.nl/obesitas/behandeling.overgewicht.3.php\"\"",
"\"\"http://www.erfelijkheid.nl/node/325\"\"", "\"\"http://www.gezonderafvallen.nl/page/938/overgewicht-als-gevolg-van-de-evolutie.html\"\"",
"\"\"http://www.hely.net/gevolgen.html\"\"", "\"\"http://www.hely.net/oorzaken.html\"\"",
"\"\"http://www.kiloafvallen.nl/\"\"", "\"\"http://www.nisb.nl/kennisplein-sport-bewegen/dossiers/bewegen-en-overgewicht/oorzaken-obesitas.html\"\"",
"\"\"http://www.novarum.nl/eetproblemen/obesitas/signalen-en-gevolgen\"\"",
"\"\"http://www.obesitas.azdamiaan.be/nl/index.aspx?n=280\"\"",
"\"\"http://www.obesitaskliniek.nl/over-obesitas/\"\"", "\"\"http://www.obesitasvereniging.nl/\"\"",
"\"\"http://www.sagbmaagband.nl/minder-gewicht/morbideobesitas.html\"\"",
"\"\"http://www.tweestedenziekenhuis.nl/script/Template_SubsubMenu.asp?PageID=1144&SSMID=1247\"\"",
"\"\"http://www.volkskrant.nl/vk/nl/2672/Wetenschap-Gezondheid/article/detail/3143483/2012/01/30/Balanstop-in-Madurodam-mueslireep-tegen-obesitas.dhtml\"\"",
"\"\"http://www.zuivelengezondheid.nl/?pageID=332\"\""), class = "factor"),
title = structure(c(7L, 7L, 18L, 19L, 3L, 9L, 20L, 1L, 5L,
9L, 8L, 13L, 13L, 7L, 17L, 12L, 15L, 5L, 8L, 8L, 15L, 8L,
9L, 11L, 2L, 4L, 15L, 16L, 10L, 6L, 15L, 9L, 8L, 11L, 11L,
5L, 14L, 13L, 5L, 19L, 14L, 8L, 10L, 18L, 15L, 17L, 19L,
12L, 17L, 21L, 1L, 13L, 12L, 19L, 5L, 4L, 6L, 15L, 8L, 8L,
14L), .Label = c("", "\"\"Balanstop in Madurodam: mueslireep tegen obesitas - Wetenschap & Gezondheid - VK\"\"",
"\"\"Dik zijn heeft veel nadelen | Gezondheid\"\"", "\"\"Gevolgen obesitas - TweeSteden ziekenhuis\"\"",
"\"\"Gevolgen overgewicht | Zuivel en gezondheid\"\"", "\"\"Morbide obesitas / ernstig overgewicht - Website over de SAGB Maagband\"\"",
"\"\"Novarum - Wat zijn de signalen en gevolgen van obesitas?\"\"",
"\"\"Obesitas - Wikipedia\"\"", "\"\"Obesitas | erfelijkheid.nl\"\"",
"\"\"Obesitas info\"\"", "\"\"Obesitas Vereniging\"\"", "\"\"Obesitas- en overgewichtskliniek - Home - Algemene info - Oorzaken\"\"",
"\"\"OBESITAS\"\"", "\"\"Oorzaken obesitas\"\"", "\"\"oorzaken van obesitas\"\"",
"\"\"OU Bookmarktool\"\"", "\"\"Over obesitas | Nederlandse Obesitas Kliniek\"\"",
"\"\"Overgewicht als gevolg van de evolutie\"\"", "\"\"Risico\xe4\xf3\xbbs van overgewicht en de Gevolgen van Obesitas | Mens en Gezondheid: Ziekten\"\"",
"\"\"Snel Afvallen - Phase 2", "\"\"wat zijn de gevolgen van obesitas"
), class = "factor"), description = structure(c(4L, 4L, 4L,
4L, 4L, 4L, 3L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("",
" morbide obesitas en overgewicht\"\"", " Tips & Trucs en nog veel meer\"\"",
"\"\"\"\""), class = "factor"), favorite = structure(c(2L,
2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 2L
), .Label = c("", "\"\"\"\"", "\"\"1\"\""), class = "factor"),
private = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "\"\"\"\"", "\"\"1\"\""
), class = "factor"), date = structure(c(3L, 3L, 3L, 4L,
4L, 5L, 2L, 1L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L,
2L, 1L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("",
"\"\"\"\"", "\"\"16-04-2012 9:09:45\"\"", "\"\"16-04-2012 9:09:46\"\"",
"\"\"16-04-2012 9:09:49\"\"", "\"\"16-04-2012 9:09:50\"\"",
"\"\"16-04-2012 9:09:51\"\"", "\"\"16-04-2012 9:09:52\"\"",
"\"\"16-04-2012 9:09:53\"\""), class = "factor"), removed = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("", "\"\"\"\"", "\"\"16-04-2012 9:09:49\"\"",
"\"\"16-04-2012 9:09:53\"\""), class = "factor"), classifications = structure(c(8L,
8L, 7L, 8L, 6L, 5L, 3L, 1L, 2L, 5L, 8L, 7L, 7L, 8L, 8L, 5L,
8L, 6L, 6L, 8L, 4L, 8L, 5L, 8L, 8L, 8L, 8L, 2L, 8L, 8L, 7L,
5L, 2L, 5L, 8L, 8L, 4L, 7L, 8L, 8L, 8L, 8L, 4L, 2L, 7L, 8L,
8L, 7L, 8L, 3L, 1L, 2L, 7L, 6L, 6L, 6L, 8L, 7L, 6L, 7L, 7L
), .Label = c("", "\"\";\"\"\"", "\"\"\"\"", "\"\"beweging;\"\"\"",
"\"\"erfelijkheid;\"\"\"", "\"\"sociale aspecten;\"\"\"",
"\"\"voeding;\"\"\"", "\"\"ziekte;\"\"\""), class = "factor")), .Names = c("id",
"user", "url", "title", "description", "favorite", "private",
"date", "removed", "classifications"), class = "data.frame", row.names = c(NA,
-61L))
我可以看出它出错的原因:description
列大多是空的,但是如果没有,则每个值都放在下一列中,最后一列的值放在下一列的第一列中行。任何想法如何解决这个问题?
答案 0 :(得分:1)
文件格式错误,用引号分隔但引用,并且还有row.names。如果您创建此文件,则使用下一个命令再次打印它将更容易阅读。
write.csv("bm.csv", header=TRUE, row.names=FALSE)
然后阅读:
bm <- read.csv("bm.csv", sep = "\t", header=TRUE)
如果你没有创建文件,这个步骤可以解决你的问题(我认为):
bm <- read.csv("bm.csv", fill=TRUE, sep="\"")
如果你现在查看数据(head(bm)
),它就会出现空列。要只选择你想要的那个,你可以制作一个矢量:
bm <- bm[c("user","url","title","favorite", "date", "classifications")]
然后将名称更改为您想要的名称:
names(bm) <- c("user", "url", "title descriptions","favorite", "private date", "classifications")