readLines,解析XML流消息以读入R Studio中的数据框

时间:2016-07-31 14:40:47

标签: java r xml readlines

我需要一些帮助才能将XML格式的消息读入数据框。

我有一个通过fifos和stream2()连接到R.From R的java应用程序我运行我的应用程序,如下所示:

runBroker <- function(input, output) {
  # wd is where the fifos and the pom.xml sit
  #setwd("..")
  library(XML)
  pr <- parallel:::mcfork()
  if (inherits(pr, "masterProcess")) {
    # this is the child process
    #print("child process")
    childOut <- fifo(input, open="w", blocking=TRUE)
    childIn <- fifo(output, open="r", blocking=TRUE)
    args <- c("exec:exec", " -Dexec.args='", "--prop     samplebroker.r.charStreamAdapter.inputFilename:", output,  " --prop     samplebroker.r.charStreamAdapter.outputFilename:", input, "'")
#print(paste(args))
#writeLines(args, childOut)
writeLines(paste(args, collapse=""), childOut)
#writeLines(c("<tsx/>", "<ts-done ts=32/>"), childOut)
system2("mvn", args=paste(args, collapse=""))
close(childOut)
parallel:::mcexit("child exit")
  }
      # parent process
  print("start parent")
  infile <- fifo(input, open="r", blocking=TRUE)
  outfile <- fifo(output, open="w", blocking=TRUE)

  done <- FALSE
  while (!done) {
line <- readLines(infile, n=1)
xline <- xmlParse(readLines(infile, n=1))
if (length(line) == 0) {
  print("parent done")
  done <- TRUE
}
# process lines, send messages
   print(line)


if (!is.na(charmatch("<ts-done", c(line))))
{
  # process input here
  writeLines(c("continue"), outfile)
}
  }

  close(infile)
  close(outfile)
}

当我运行此文件时,它会将消息抛出到R控制台,这可以正常工作。但是,在我的环境中我看不到“行”,唯一创建的对象是runBroker函数。 以下代码是我在R控制台中收到的示例。我想把它们写成几个数据帧。

   <competition id="0" name="t2b" pomId="1.3.1-SNAPSHOT" timeslotLength="60" bootstrapTimeslotCount="336" bootstrapDiscardedTimeslots="24" timeslotsOpen="24" deactivateTimeslotsAhead="1" minimumOrderQuantity="0.01" timezoneOffset="-6" latitude="45" simulationRate="720" simulationModulo="3600000">
  <description></description>
  <simulationBaseTime>
    <iMillis>1255132800000</iMillis>
  </simulationBaseTime>
  <broker>default broker</broker>
  <broker>Sample</broker>
  <customer id="3840" name="b12" population="1" powerType="BATTERY_STORAGE" customerClass="SMALL" controllableKW="-40.0
" upRegulationKW="-40.0" downRegulationKW="40.0" storageCapacity="90.0" multiContracting="false" canNegotiate="false"/>
  <customer id="4096" name="HighIncome-2_10" population="1" powerType="ELECTRIC_VEHICLE" customerClass="SMALL" controllableKW="-6.6" upRegulationKW="-6.6" downRegulationKW="6.6" storageCapacity="24.0" multiContracting="false" canNegotiate="false"/>
  <customer id="4099" name="HighIncome-2_11" population="1" powerType="ELECTRIC_VEHICLE" customerClass="SMALL" controllableKW="-6.6" upRegulationKW="-6.6" downRegulationKW="6.6" storageCapacity="24.0" multiContracting="false" canNegotiate="false"/>
  <customer id="3332" name="DowntownOffices" population="30" powerType="CONSUMPTION" customerClass="SMALL" controllableKW="0.0" upRegulationKW="0.0" downRegulationKW="0.0" storageCapacity="0.0" multiContracting="true" canNegotiate="false"/>
</competition>

<customer-bootstrap-data id="938315" customerName="CentervilleHomes" powerType="CONSUMPTION">
<netUsage>-12391.35,-10114.47,-10140.86,-11078.46,-11878.63,-14476.89,-13162.94,-14258.63,-14123.5,-14169.36,-13705.84,-14090.340000000002,...    </netUsage>
</customer-bootstrap-data>

<market-bootstrap-data id="938607">
  <mwh>40.95884800000005,40.328089861100096,43.10454465350664,51.85378530783136,54.49313841123366,60.92061090600654,44.605544455502276,59.36643951972485,...</mwh>
  <marketPrice>-124.12755392710862,-17.709759037038168,-34.78745750315098,-21    8.34365565481434,-81.44333206309733,-205.4500215178752,-20.06859471666362,-33.03741812546915</marketPrice>
</market-bootstrap-data>

<weather-report id="626" currentTimeslot="24" temperature="11.3"     windSpeed="3.0" windDirection="250.0" cloudCover="0.5"/>
2127 INFO  core.BrokerMessageReceiver: onMessage(String) - received message:
<weather-report id="627" currentTimeslot="25" temperature="11.7" windSpeed="3.0" windDirection="250.0" cloudCover="0.125"/>
2127 INFO  core.BrokerMessageReceiver: onMessage(String) - received message:
<weather-report id="628" currentTimeslot="26" temperature="10.8" windSpeed="2.0" windDirection="240.0" cloudCover="1.0"/>
2128 INFO  core.BrokerMessageReceiver: onMessage(String) - received message:
<weather-report id="629" currentTimeslot="27" temperature="11.5" windSpeed="2.0" windDirection="230.0" cloudCover="1.0"/>

我有成功从xml文件中读取数据的代码,但是我使用xpathSapply函数,当我在代码中实现它时,它不想工作:

##------------Customers------------##
# MATRIX OF CUSTOMER ATTRIBS
customer <- xpathSApply(doc=xml, path="//customer",  xmlAttrs)

# TRANSPOSE TO DATA FRAME
customer <- data.frame(t(customer))

##------------net usage per cusotmer------------##------#source:         http://stackoverflow.com/questions/37565910/xpath-select-two-consecutive-    elements-and-transform-them-into-one-data-frame/37566740#37566740


customerBoot <- xpathSApply(doc=xml, path="//customer-bootstrap-data", xmlAttrs)
customerBoot <- data.frame(t(customerBoot), stringsAsFactors=FALSE)

customerBoot <- data.table::rbindlist(apply(customerBoot, 1, function(x) {

  path <- sprintf("//customer-bootstrap-data[@id='%s']/netUsage", x["id"])
  vals <- strsplit(xpathSApply(doc=xml, path=path, xmlValue), ",")[[1]]
  c(as.list(x), as.list(setNames(as.numeric(vals), sprintf("X%d", 1:length(vals)))))

}), fill=TRUE)

colnames(customerBoot)[2] <- "customer"


##------------market bootstrap data------------##------#-------

marketBootmwh <- strsplit(xpathSApply(doc=xml, path="//market-bootstrap-data/mwh", xmlValue), ",")[[1]]
marketBootmarketPrice <- strsplit(xpathSApply(doc=xml, path="//market-bootstrap-data/marketPrice", xmlValue), ",")[[1]]
marketBootmwh <- as.data.frame(marketBootmwh)
marketBootmarketPrice <- as.data.frame(marketBootmarketPrice)
marketBoot <- cbind(marketBootmwh, marketBootmarketPrice)

##------------weather-report------------##
# MATRIX OF weather 
weather <- xpathSApply(doc=xml, path="//weather-report",  xmlAttrs)

# TRANSPOSE TO DATA FRAME
weather <- data.frame(t(weather))

我只是不确定如何通过readLines函数读取它。我读到我必须解析readLine函数使其成为一个DOM,然后我可以将其转换为数据帧是正确的吗?哪个xml函数可以使用? xmlparse和parse似乎不起作用,但也可能是我错误地实现它。

0 个答案:

没有答案