从结构/列表中删除重复的项目,保持最佳状态

时间:2012-10-25 21:23:14

标签: c# .net linq duplicate-removal

我有一个经典的从列表/嵌套列表问题中删除重复的项目。但是,由于我要遵循的具体规则,解决方案并不是直截了当的。我编写了一个可以按需运行的示例应用程序。但它看起来很笨重。我正在寻找更优雅,如果可能更有效率。也许LINQ /扩展方法可能有所帮助。有什么建议吗?


class Program
{
    static void Main(string[] args)
    {
        var sellers = new List<Seller>()
        {
            new Seller()
            {
                Id = 1,
                Products = new List<Product>()
                {
                    new Product() { Sku = "Alpha", Price = 5.0, Shipping = 2.0 },
                    new Product() { Sku = "Beta", Price = 5.0, Shipping = 2.0 }, // more expensive sku within same seller
                    new Product() { Sku = "Beta", Price = 4.0, Shipping = 2.0 },
                    new Product() { Sku = "Gamma", Price = 8.0, Shipping = 2.0 }
                }
            },
            new Seller()
            {
                Id = 2,
                Products = new List<Product>()
                {
                    new Product() { Sku = "Alpha", Price = 5.0, Shipping = 1.0 },
                    new Product() { Sku = "Beta", Price = 5.0, Shipping = 1.0 },
                    new Product() { Sku = "Gamma", Price = 8.0, Shipping = 2.0 }
                }
            }
        };

        // Eliminate duplicate Products amongst all sellers that have matching "Sku".
        // Rules: 
        // Keep the Product with the lowest price. 
        // If price is equal, keep the product with lower shipping.
        // If shipping is also equal, then keep the product with lowest seller Id.
        // If at the end of all comparisons, a seller ends up with no products, then remove that seller.

        // In this example, I expect to have (not necessarily in this order):
        // 1.{Beta, 4.0, 2.0} // Fred.Beta has a lower price than Bob.Beta
        // 1.{Gamma, 8.0, 2.0} // Fred.Gamma is an identical deal to Bob, but Fred is first in the list
        // 2.{Alpha, 5.0, 1.0} // Bob.Alpha has a lower shipping cost than Fred.Alpha

        var newSellers = new List<Seller>();

        foreach (var seller in sellers)
        {
            foreach (var product in seller.Products)
            {
                // TODO: Possible performance improvement? Check for existing seller & product in newSellers before calling any code below.
                var bestSeller = seller;
                var bestProduct = product;
                FindBestSellerAndProduct(sellers, ref bestSeller, ref bestProduct);
                AddIfNotExists(newSellers, bestSeller, bestProduct);
            }
        }

        newSellers.Sort((x, y) => x.Id.CompareTo(y.Id)); // Ensures the list is sorted by seller id... do I really care?
    }

    private static void FindBestSellerAndProduct(IList<Seller> sellers, ref Seller seller, ref Product product)
    {
        string sku = product.Sku;

        foreach (var s in sellers)
        {
            foreach (var p in s.Products.Where(x => x.Sku == sku))
            {
                if ((product.Price > p.Price) ||
                    (product.Price == p.Price && product.Shipping > p.Shipping) ||
                    (product.Price == p.Price && product.Shipping == p.Shipping && seller.Id > s.Id))
                {
                    seller = s;
                    product = p;
                }
            }
        }
    }

    private static void AddIfNotExists(IList<Seller> sellers, Seller seller, Product product)
    {
        var newSeller = sellers.SingleOrDefault(x => x.Id == seller.Id);
        if (newSeller == null)
        {
            // Add input seller and product if seller doesn't already exist in our list.
            newSeller = new Seller() { Id = seller.Id, Products = new List<Product>() };
            newSeller.Products.Add(product);
            sellers.Add(newSeller);
        }
        else
        {
            var newProduct = newSeller.Products.Find(x => x.Sku == product.Sku);
            if (newProduct == null)
            {
                // Add input product if it doesn't already exist in our list
                newSeller.Products.Add(product);
            }
        }
    }

}

// I cannot modify the below classes.
public sealed class Seller
{
    public int Id;
    public List<Product> Products;
}

public sealed class Product
{
    public string Sku;
    public double Price;
    public double Shipping;
}

1 个答案:

答案 0 :(得分:3)

此查询将完成工作

var query = sellers.SelectMany(s => s.Products.Select(p => new { 
                                                   SellerId = s.Id, 
                                                   Product = p })) // 1
                   .OrderBy(x => x.Product.Price) // 2
                   .ThenBy(x => x.Product.Shipping)
                   .ThenBy(x => x.SellerId)
                   .GroupBy(x => x.Product.Sku) // 3
                   .Select(g => g.First()) // 4
                   .GroupBy(x => x.SellerId) // 5
                   .Select(g => new Seller() {  
                        Id = g.Key,
                        Products = g.Select(x => x.Product).ToList() })
                   .ToList();

工作原理:

  1. 第一步 - 将序列展平为匿名类型列表{ settlerId, product }
  2. 根据您的条件订购顺序 - priceshippingsettlerId
  3. 按产品sku对有序序列进行分组。它会生成{ settlerId, product }个组,其中的产品具有相同的sku,但可能属于不同的卖家。
  4. 从每个组中选择第一项。因此,所有组都按您的条件排序,它将为我们提供具有相同sku的最畅销产品。
  5. 现在我们需要再次创建层次结构。按sellerId进行分组,并创建Seller对象及其所有畅销产品(如果有)。如果某些卖家没有畅销产品,则此卖家不会有该组,卖家将从结果中删除。