Question

我使用R，v.3.3.2 ..我正在尝试解析一些HTML文档。我有以下功能：

relation <- function(html_files){
    zk_files <- htmlParse(html_files, encoding = "UTF-8")
    kopija <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[3]/td[3]/span", fun = xmlValue)
    zemlja <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[4]/td[2]/span", fun = xmlValue)
    sud <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[6]/td[2]/span", fun = xmlValue)
    odjel <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[7]/td[2]/span", fun = xmlValue)
    y <- as.data.frame(odjel); colnames(y) <- "odjel"
    odjel_2 <- merge(x = y, y = zk_odjeli[,c(2,3)], by.x = "odjel", by.y = "Zk_Odjel", all == FALSE)
    stanje <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[8]/td[2]/span", fun = xmlValue)
    opcina <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[10]/td[2]/span", fun = xmlValue)
    opcina <- as.numeric(gsub(pattern = "[^0-9]", replacement = "\\1", x = opcina))
    ulozak <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[10]/td[3]/span", fun = xmlValue)
    ulozak <- as.numeric(gsub(pattern = "[^0-9]", replacement = "\\1", x = ulozak))
    broj_dnevnika <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[12]/td[2]/span", fun = xmlValue)
    tip_vlasnistva <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[12]/td[3]/span", fun = xmlValue)
    tip_vlasnistva <- if (length(tip_vlasnistva)==0) {NA}else{returntip_vlasnistva}
    plombe <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[13]/td[2]/span", fun = xmlValue)
    tip_izvatka <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[14]/td[2]/span", fun = xmlValue)
    id <- paste0(odjel_2[,2], "-",opcina, "-",ulozak)
    final_list <- list(kopija, zemlja, odjel, sud, stanje, opcina, ulozak, broj_dnevnika, plombe, 
                       tip_vlasnistva, id)
}

然后我使用lapply函数在html文件上应用此函数：

lapply(files, relation)

我的输出如下：

structure(list(Html = list(c("NESLUŽBENA KOPIJA", "NESLUŽBENA KOPIJA", 
"NESLUŽBENA KOPIJA"), c("REPUBLIKA HRVATSKA", "REPUBLIKA HRVATSKA", 
"REPUBLIKA HRVATSKA"), c("POSEBNI ZEMLJIŠNOKNJIŽNI ODJEL U KLANJCU", 
"POSEBNI ZEMLJIŠNOKNJIŽNI ODJEL U KLANJCU", "POSEBNI ZEMLJIŠNOKNJIŽNI ODJEL U KLANJCU"
), c("Općinski sud u Zlataru", "Općinski sud u Zlataru", "Općinski sud u Zlataru"
), c("Stanje na dan: 09.05.2016. 23:24", "Stanje na dan: 09.05.2016. 23:24", 
"Stanje na dan: 09.05.2016. 23:24"), c(313637, 313637, 313637
), c(8, 16, 21), c("Broj zadnjeg dnevnika: Z-1001/2014", "Broj zadnjeg dnevnika: Z-633/2011", 
"Broj zadnjeg dnevnika: Z-274/2012"), c("Aktivne plombe: ", "Aktivne plombe: ", 
"Aktivne plombe: "), NA, c("112-313637-8", "112-313637-16", "112-313637-21"
))), .Names = "Html")

问题在于部分

tip_vlasnistva <- xpathSApply(doc = zk_files, path = "/html/body/table/tr[12]/td[3]/span", fun = xmlValue)
tip_vlasnistva <- if (length(tip_vlasnistva)==0) {NA}else{tip_vlasnistva}

我有一个空集（logical(0)），我将其转换为NA，但在输出中我没有得到NA'sn次，但只有一个你可以在输出中看到的时间。我如何处理空集（逻辑（0））以获得与其他变量相同的维度？

Answer 1

以某种方式找到答案：我刚才使用了paste0（）函数：

tip_vlasnistva <- paste0(xpathSApply(doc = zk_files, path = "/html/body/table/tr[12]/td[3]", fun = xmlValue))

它使它成为＆＃34;＆＃34;而不是逻辑零，这导致想要的输出。

里面的空集在拉普利

1 个答案: