MongoDB聚合查询通过C#驱动程序缓慢

时间:2017-02-23 15:53:51

标签: c# mongodb performance aggregation-framework

我们有一个聚合查询,它通过shell超快速运行(0.005s返回7条记录),但从C#调用时速度要慢一些(约4s)。

这是一份示例文件 - 大约有。我当地收藏的1m文档:

{ 
    "_id" : "00000000_20160707", 
    "TimeStamp" : ISODate("2016-07-08T01:47:00.182+0000"), 
    "Valor" : "00000000", 
    "NominalInCertificateCurrency" : -1189575.0, 
    "NominalInLocalCurrency" : -1287239.0, 
    "Position" : -15300.0, 
    "Price" : 77.75, 
    "ProductName" : "TRACKER CERT. ABC 123", 
    "At" : ISODate("2016-07-07T00:00:00.000+0000"), 
    "CurrencyId" : "814"
}

这是索引:

{ 
    "At" : 1, 
    "Valor" : 1, 
    "IssueNostro" : 1
}

这是通过shell运行的查询:

db.productpositions.aggregate([
    { $match: { 
      $and: [ 
      {"At": {$gte: ISODate("2016-10-01T00:00:00Z")}}, 
      {"At": {$lte: ISODate("2016-12-31T00:00:00Z")} },
      {"Valor": { $in: [...]}}, // about a dozen valors
      {"IssueNostro": { $ne: null} }
      ]
    }},
    { $sort: { "At": 1 } },
    { $group: { 
      "_id": { "Valor": "$Valor", "IssueNostro": "$IssueNostro" },
       "CurrencyId": { $last: "$CurrencyId" },
       "DescriptionCombo": { $last: "$DescriptionCombo" },
       "IssueNostro": { $last: "$IssueNostro" },
       "NominalInCertificateCurrency": { $last: "$NominalInCertificateCurrency" },
       "NominalInLocalCurrency": { $last: "$NominalInLocalCurrency" },
       "PaymentDate": { $last: "$PaymentDate" },
       "Position": { $last: "$Position" },
       "PositionFirstDate": { $first: "$At" },
       "PositionLastDate": { $last: "$At" },
       "Price": { $last: "$Price" },
       "ProductName": { $last: "$ProductName" },
       "RedemptionDate": { $last: "$RedemptionDate" },
       "TimeStamp": { $last: "$TimeStamp" },
       "Valor": { $last: "$Valor" }
      } 
    },
    ])

这是C#:

public virtual async Task<IEnumerable<IProductPeriodFee>> GetProductPeriodPositionsAsync(IEnumerable<string> valors, DateTime fromDate, DateTime toDate)
{
    var valorArray = valors.ToArray();

    using (new SimpleTimer(this, "GetProductPeriodPositionsAsync", new {valorArray, fromDate, toDate}))
    {
        var @where = CreateWhereExpression(valorArray, fromDate, toDate);

        var raw = await GroupAsync(@where,
            x => x.At,
            x => new ProductPositionKey {Valor = x.Valor, IssueNostro = x.IssueNostro},
            g => new ProductPositionGroupItem
            {
                CurrencyId = g.Last().CurrencyId,
                DescriptionCombo = g.Last().DescriptionCombo,
                Id = g.Last().Id,
                IssueNostro = g.Last().IssueNostro,
                NominalInCertificateCurrency = g.Last().NominalInCertificateCurrency,
                NominalInLocalCurrency = g.Last().NominalInLocalCurrency,
                PaymentDate = g.Last().PaymentDate,
                Position = g.Last().Position,
                PositionFirstDate = g.First().At,
                PositionLastDate = g.Last().At,
                Price = g.Last().Price,
                ProductName = g.Last().ProductName,
                RedemptionDate = g.Last().RedemptionDate,
                TimeStamp = g.Last().TimeStamp,
                Valor = g.Last().Valor
            });

        return raw.Select(item => new ProductPeriodFee
        {
            CurrencyId = item.CurrencyId,
            DescriptionCombo = item.DescriptionCombo,
            Id = item.Id,
            IssueNostro = item.IssueNostro,
            NominalInCertCny = (decimal?) item.NominalInCertificateCurrency,
            NominalInLocalCurrency = (decimal?) item.NominalInLocalCurrency,
            PaymentDate = item.PaymentDate,
            PeriodEndDate = toDate,
            PeriodStartDate = fromDate,
            Position = (decimal?) item.Position,
            PositionFirstDate = item.PositionFirstDate,
            PositionLastDate = item.PositionLastDate,
            Price = (decimal?) item.Price,
            ProductName = item.ProductName,
            RedemptionDate = item.RedemptionDate,
            TimeStamp = item.TimeStamp,
            Valor = item.Valor
        });
    }
}

    private static Expression<Func<ProductPositionDataDb, bool>> CreateWhereExpression(IReadOnlyCollection<string> valors, DateTime? from, DateTime? to)
    {
        // Doing it this incredibly long way around, because the group/select does not work if you include "Enumerable<T>.Contains" in the where clause
        var argParam = Expression.Parameter(typeof(ProductPositionDataDb), "x");
        var atProp = Expression.Property(argParam, "At");
        var atGreaterThan = Expression.GreaterThanOrEqual(atProp, Expression.Constant(from));
        var atLessThan = Expression.LessThanOrEqual(atProp, Expression.Constant(to));
        var exp = Expression.And(atGreaterThan, atLessThan);
        if (valors.Any())
        {
            var valorProp = Expression.Property(argParam, "Valor");
            var valorExp = valors.Select(v => Expression.Equal(valorProp, Expression.Constant(v))).Aggregate(Expression.Or);
            exp = Expression.AndAlso(exp, valorExp);
        }

        var atPropNostro = Expression.Property(argParam, "IssueNostro");
        var nostroExp = Expression.NotEqual(atPropNostro, Expression.Constant(null, atPropNostro.Type));
        exp = Expression.AndAlso(exp, nostroExp);

        return Expression.Lambda<Func<ProductPositionDataDb, bool>>(exp, argParam);
    }

public async Task<IEnumerable<TResult>> GroupAsync<TKey, TResult>(Expression<Func<T, bool>> predicate, Expression<Func<T, object>> sort, Expression<Func<T, TKey>> groupBy, Expression<Func<IGrouping<TKey, T>, TResult>> project)
{
    return await _mongoCollection.Aggregate(new AggregateOptions {AllowDiskUse = true})
        .Match(predicate)
        .Sort(Builders<T>.Sort.Ascending(sort))
        .Group(groupBy, project)
        .ToListAsync();
}

根据个人资料分析师的说法,该应用程序三次点击此功能的时间占31.56%MongoDB.Driver.Core.Misc.StreamExtensionMethods.<ReadBytesAsync>d__3.MoveNext()

想法?

1 个答案:

答案 0 :(得分:0)

所以问题是我使用表达式手动构建的where子句的OR。 MongoDB驱动程序支持IEnumerable<T>.Contains(..),至少在传递Array值时。

以下代码运行速度很快:

                var raw = await Collection.GroupAsync(
                x => x.At >= fromDate && x.At <= toDate && valorArray.Contains(x.Valor),
                x => x.At,
                x => new ProductPositionKey {Valor = x.Valor, IssueNostro = x.IssueNostro},
                g => new ProductPositionGroupItem
                {
                    CurrencyId = g.Last().CurrencyId,
                    DescriptionCombo = g.Last().DescriptionCombo,
                    Id = g.Last().Id,
                    IssueNostro = g.Last().IssueNostro,
                    NominalInCertificateCurrency = g.Last().NominalInCertificateCurrency,
                    NominalInChf = g.Last().NominalInChf,
                    PaymentDate = g.Last().PaymentDate,
                    Position = g.Last().Position,
                    PositionFirstDate = g.First().At,
                    PositionLastDate = g.Last().At,
                    Price = g.Last().Price,
                    ProductName = g.Last().ProductName,
                    RedemptionDate = g.Last().RedemptionDate,
                    Source = g.Last().Source,
                    TimeStamp = g.Last().TimeStamp,
                    Valor = g.Last().Valor
                });