我不知道我尝试执行的查询是否可行,但是如果你们中的LINQ to SQL / XML专家之一能弄清楚这一点,我将非常感谢并向LINQ神致敬。我的最终目标是识别所有重复的XML模型,并显示除一个以外的所有重复的CECID。所以可以说我有一个看起来像这样的Xdocument:
<ApplianceModels xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" ApplianceType="IceMakers">
<Model>
<ReferenceNumber>201877149</ReferenceNumber>
<Action>C</Action>
<Brand>4564</Brand>
<ModelNumber>1234212</ModelNumber>
<EquipmentType>A</EquipmentType>
<CoolingType>W</CoolingType>
<IceType>C</IceType>
<IceMakerProcessType>B</IceMakerProcessType>
<TestLabCode>ARN3190</TestLabCode>
<ManufacturerCode>ARN2396</ManufacturerCode>
<HarvestRateLbs24Hr>56</HarvestRateLbs24Hr>
<EnergyCons_kWhPer100Lbs>4.00</EnergyCons_kWhPer100Lbs>
<WaterCons_galPer100Lbs>12</WaterCons_galPer100Lbs>
<IceHardnessAdjustmentFactor xsi:nil="true" />
<RegulatoryStatus>I</RegulatoryStatus>
<CECID>d579ae7a-f3f7-4627-a3f1-f17b23aa28e3</CECID>
</Model>
<Model>
<ReferenceNumber>201877143</ReferenceNumber>
<Action>C</Action>
<Brand>4564</Brand>
<ModelNumber>12342</ModelNumber>
<EquipmentType>A</EquipmentType>
<CoolingType>W</CoolingType>
<IceType>C</IceType>
<IceMakerProcessType>B</IceMakerProcessType>
<TestLabCode>ARN3190</TestLabCode>
<ManufacturerCode>ARN2396</ManufacturerCode>
<HarvestRateLbs24Hr>56</HarvestRateLbs24Hr>
<EnergyCons_kWhPer100Lbs>4.00</EnergyCons_kWhPer100Lbs>
<WaterCons_galPer100Lbs>12</WaterCons_galPer100Lbs>
<IceHardnessAdjustmentFactor xsi:nil="true" />
<RegulatoryStatus>I</RegulatoryStatus>
<CECID>94c6d6e6-5b6a-4f45-a7ff-70a64e50e4e6</CECID>
</Model>
<Model>
<ReferenceNumber>201877152</ReferenceNumber>
<Action>C</Action>
<Brand>4564</Brand>
<ModelNumber>1231114234</ModelNumber>
<EquipmentType>A</EquipmentType>
<CoolingType>W</CoolingType>
<IceType>C</IceType>
<IceMakerProcessType>C</IceMakerProcessType>
<TestLabCode>ARN3190</TestLabCode>
<ManufacturerCode>ARN2396</ManufacturerCode>
<HarvestRateLbs24Hr>81</HarvestRateLbs24Hr>
<EnergyCons_kWhPer100Lbs>1.10</EnergyCons_kWhPer100Lbs>
<WaterCons_galPer100Lbs>12</WaterCons_galPer100Lbs>
<IceHardnessAdjustmentFactor>4.45</IceHardnessAdjustmentFactor>
<RegulatoryStatus>I</RegulatoryStatus>
<CECID>d97a603c-1836-43a3-b564-ab8d1bdec65f</CECID>
</Model>
</ApplianceModels>
然后在SQL Server中,有一个名为tApplianceTypeColumns的表,对于给定的设备类型,该表如下所示:
ApplianceTypeID ApplianceColumnUnique ApplianceColumnName
10 0 ReferenceNumber
10 1 Brand
10 1 ModelNumber
10 0 EquipmentType
10 0 CoolingType
10 0 IceType
10 0 IceMakerProcessType
10 0 HarvestRateLbs24Hr
10 0 EnergyCons_kWhPer100Lbs
10 0 WaterCons_galPer100lbs
10 1 RegulatoryStatus
所以这是我的开始,但距离还很遥远:
var DupeItems = from m in doc.Descendants("Model").Elements()
join at in entities.tApplianceTypeColumns on m.Name equals at.ApplianceColumnName
group m by m.Element(at.ApplianceColumnName).Value into d
where at.ApplianceTypeID == ApplianceTypeID
所以我真的希望能够按品牌,型号和监管状态分组,这些是tApplianceTypeColumns表中将ApplianceColumnUnique位列设置为true的列。真实位数可能会有所不同,具体取决于我在该表中查找的ApplianceTypeID。
此外,我还需要在分组中包括两个永远不在tApplianceTypeColumns表中的元素,这些元素是Action然后是ManufacturerCode,然后是tApplianceTypeColumns中的所有其他唯一元素(没有特定顺序)。
ApplianceTypeID是一个已知参数,将传递给查询。因此,对于任何重复项,我都需要显示第二个及后续重复项的CECID,以便我可以获取这些CECID并在其他表中进行查找以更改其状态。但是第一步很难。我不在乎哪些重复项不会显示。我只需要显示除1以外的所有其他内容。我希望我已经对此进行了充分解释。
答案 0 :(得分:1)
任务可以分为3个步骤:
找到要与之分组的唯一列:
所以我真的希望能够按品牌,型号和监管状态分组,这些是tApplianceTypeColumns表中将ApplianceColumnUnique位列设置为true的列。 真实位数可能会有所不同,具体取决于我在该表中查找的ApplianceTypeID。 另外,我还需要在分组中包括两个元素,这些元素永远不在tApplianceTypeColumns表中,并且这些元素依次为
Action
和ManufacturerCode
tApplianceTypeColumns中的所有其他唯一元素都没有特定顺序。
Enumerable.Concat(
"Action,ManufacturerCode".Split(','),
applianceTypeColumns
.Where(at => at.ApplianceColumnUnique)
.Select(at => at.ApplianceColumnName)
);
按上一步中的列将模型分组:
我们将列名投影到每个模型的列值中
applianceModels.GroupBy(
model => uniqueColumns.Select(columnName => model.Element(columnName)?.Value).ToArray()
但是,我们不能仅按字符串数组进行分组,因此我们需要提供一个自定义IEqualityComparer:
new LambdaComparer<string[]>((a, b) => a.SequenceEqual(b), x => x.Aggregate(13, (hash, y) => hash * 7 + y?.GetHashCode() ?? 0))
汇总重复项:
.Select(g => new { g.Key, Duplicates = g.Select(x => x.Element("CECID")?.Value) })
一切融合在一起:
void Main()
{
const int ApplianceTypeID = 10;
var applianceModels = GetApplianceModels().XPathSelectElements("Model"); //.Dump();
var applianceTypeColumns = GetApplianceTypeColumns().Where(x => x.ApplianceTypeID == ApplianceTypeID); //.Dump();
var uniqueColumns = Enumerable.Concat(
"Action,ManufacturerCode".Split(','),
applianceTypeColumns
.Where(at => at.ApplianceColumnUnique)
.Select(at => at.ApplianceColumnName)
);
var query = applianceModels
.GroupBy(
model => uniqueColumns.Select(columnName => model.Element(columnName)?.Value).ToArray(),
new LambdaComparer<string[]>((a, b) => a.SequenceEqual(b), x => x.Aggregate(13, (hash, y) => hash * 7 + y?.GetHashCode() ?? 0))
)
.Where(x => x.Count() > 1)
.Select(g => new { g.Key, Duplicates = g.Select(x => x.Element("CECID")?.Value) });
//.Dump();
}
// Define other methods and classes here
XElement GetApplianceModels()
{
return XElement.Parse(
@"<ApplianceModels xmlns:xsi=""http://www.w3.org/2001/XMLSchema-instance"" xmlns:xsd=""http://www.w3.org/2001/XMLSchema"" ApplianceType=""IceMakers"">
<Model>
<ReferenceNumber>201877149</ReferenceNumber>
<Action>C</Action>
<Brand>4564</Brand>
<ModelNumber>1234212</ModelNumber>
<EquipmentType>A</EquipmentType>
<CoolingType>W</CoolingType>
<IceType>C</IceType>
<IceMakerProcessType>B</IceMakerProcessType>
<TestLabCode>ARN3190</TestLabCode>
<ManufacturerCode>ARN2396</ManufacturerCode>
<HarvestRateLbs24Hr>56</HarvestRateLbs24Hr>
<EnergyCons_kWhPer100Lbs>4.00</EnergyCons_kWhPer100Lbs>
<WaterCons_galPer100Lbs>12</WaterCons_galPer100Lbs>
<IceHardnessAdjustmentFactor xsi:nil=""true"" />
<RegulatoryStatus>I</RegulatoryStatus>
<CECID>d579ae7a-f3f7-4627-a3f1-f17b23aa28e3</CECID>
</Model>
<Model>
<ReferenceNumber>201877143</ReferenceNumber>
<Action>C</Action>
<Brand>4564</Brand>
<ModelNumber>12342</ModelNumber>
<EquipmentType>A</EquipmentType>
<CoolingType>W</CoolingType>
<IceType>C</IceType>
<IceMakerProcessType>B</IceMakerProcessType>
<TestLabCode>ARN3190</TestLabCode>
<ManufacturerCode>ARN2396</ManufacturerCode>
<HarvestRateLbs24Hr>56</HarvestRateLbs24Hr>
<EnergyCons_kWhPer100Lbs>4.00</EnergyCons_kWhPer100Lbs>
<WaterCons_galPer100Lbs>12</WaterCons_galPer100Lbs>
<IceHardnessAdjustmentFactor xsi:nil=""true"" />
<RegulatoryStatus>I</RegulatoryStatus>
<CECID>94c6d6e6-5b6a-4f45-a7ff-70a64e50e4e6</CECID>
</Model>
<Model>
<ReferenceNumber>201877152</ReferenceNumber>
<Action>C</Action>
<Brand>4564</Brand>
<ModelNumber>1231114234</ModelNumber>
<EquipmentType>A</EquipmentType>
<CoolingType>W</CoolingType>
<IceType>C</IceType>
<IceMakerProcessType>C</IceMakerProcessType>
<TestLabCode>ARN3190</TestLabCode>
<ManufacturerCode>ARN2396</ManufacturerCode>
<HarvestRateLbs24Hr>81</HarvestRateLbs24Hr>
<EnergyCons_kWhPer100Lbs>1.10</EnergyCons_kWhPer100Lbs>
<WaterCons_galPer100Lbs>12</WaterCons_galPer100Lbs>
<IceHardnessAdjustmentFactor>4.45</IceHardnessAdjustmentFactor>
<RegulatoryStatus>I</RegulatoryStatus>
<CECID>d97a603c-1836-43a3-b564-ab8d1bdec65f</CECID>
</Model>
</ApplianceModels>");
}
IEnumerable<(int ApplianceTypeID, bool ApplianceColumnUnique, string ApplianceColumnName)> GetApplianceTypeColumns()
{
var data =
@"ApplianceTypeID ApplianceColumnUnique ApplianceColumnName
10 0 ReferenceNumber
10 1 Brand
10 1 ModelNumber
10 0 EquipmentType
10 0 CoolingType
10 0 IceType
10 0 IceMakerProcessType
10 0 HarvestRateLbs24Hr
10 0 EnergyCons_kWhPer100Lbs
10 0 WaterCons_galPer100lbs
10 1 RegulatoryStatus";
return Regex.Matches(data, @"^(\d+)\s+(\d+)\s+(\w+)", RegexOptions.Multiline)
.Cast<Match>()
.Select(x =>
(
/*ApplianceTypeID = */int.Parse(x.Groups[1].Value),
/*ApplianceColumnUnique = */int.Parse(x.Groups[2].Value) != 0,
/*ApplianceColumnName = */x.Groups[3].Value
));
}
class LambdaComparer<T> : IEqualityComparer<T>
{
private readonly Func<T, T, bool> equals;
private readonly Func<T, int> getHashCode;
public LambdaComparer(Func<T, T, bool> equals, Func<T, int> getHashCode)
{
this.equals = equals;
this.getHashCode = getHashCode;
}
public bool Equals(T x, T y) => equals(x, y);
public int GetHashCode(T obj) => getHashCode(obj);
}
答案 1 :(得分:0)
这是我基于Xiaoy312解决方案的最终代码。再次感谢你。它运作良好。我向LINQ神致敬:
private List<string> XMLDuplicatesToEliminate(XDocument doc, Guid ApplianceTypeID)
{
var entities = new DbContextFactory().MAEDBSEntities;
var applianceModels = doc.Descendants("Model");
var applianceTypeColumns =
(
from at in entities.tApplianceTypeColumns
where
at.ApplianceTypeID == ApplianceTypeID &&
at.ApplianceColumnUnique == true
select new { at.ApplianceColumnName }
).ToList();
var uniqueColumns = Enumerable.Concat(
"Action,ManufacturerCode".Split(','),
applianceTypeColumns
.Select(at => at.ApplianceColumnName)
);
List<string> DuplicatesToEliminate = new List<string>();
var duplicates = applianceModels
.GroupBy(
model => uniqueColumns.Select(columnName => model.Element(columnName)?.Value).ToArray(),
new LambdaComparer<string[]>((a, b) => a.SequenceEqual(b), x => x.Aggregate(13, (hash, y) => hash * 7 + y?.GetHashCode() ?? 0)))
.Where(x => x.Count() > 1)
.Select(g => new { g.Key, Duplicates = g.Select(x => x.Element("CECID")?.Value) })
.ToList();
foreach (var duperow in duplicates)
{
string firstdupe = duperow.Duplicates.First();
IEnumerable<string> allbutone = duperow.Duplicates.Where(x => x != firstdupe);
foreach (string dupeitem in allbutone)
{
DuplicatesToEliminate.Add(dupeitem);
}
}
return DuplicatesToEliminate;
}