我有一个使用时间戳的数据帧,结果显示每一秒,如果数据每60秒或1分钟带来数据,我怎样才能得到这个数据帧的聚合?框架
string fullPath="1$2$3";
List<string> nodesToBeAdded = fullPath.Split('$').ToList();
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(filePath);
XmlNode nodeContent = xmlDocument.CreateNode(XmlNodeType.Element, "Content", null);
XmlNode nodeUID = xmlDocument.CreateNode(XmlNodeType.Element, "UId", null);
nodeUID.InnerText = value.UId;
XmlNode nodeFileName = xmlDocument.CreateNode(XmlNodeType.Element, "FileName", null);
nodeFileName.InnerText = value.FileName;
XmlNode nodeImage = xmlDocument.CreateNode(XmlNodeType.Element, "Image", null);
nodeImage.InnerText = value.Image;
bool addNodeContent = false;
for (int i = 0; i < nodesToBeAdded.Count - 1; i++)
{
XmlNode nodeFullPath = xmlDocument.CreateNode(XmlNodeType.Element, "FullPath", null);
nodeFullPath.InnerText = nodesToBeAdded[i];
if (xmlDocument.SelectNodes(string.Concat("//", "Content", '/', "FullPath", "[text()='" + nodesToBeAdded[i] + "']")).Count == 0)
{
nodeContent.AppendChild(nodeUID);
nodeContent.AppendChild(nodeFileName);
nodeContent.AppendChild(nodeImage);
nodeContent.AppendChild(nodeFullPath);
addNodeContent = true;
}
}
if (addNodeContent)
{
//add parent node to document
xmlDocument.DocumentElement.AppendChild(nodeContent);
}
xmlDocument.Save(filePath);
我想使用spark SQL和Scala 非常感谢 我想减少数据集的大小 非常感谢