我有一个应用程序来跟踪网站的页面访问量。 这是我的模特:
public class VisitSession {
public string SessionId { get; set; }
public DateTime StartTime { get; set; }
public string UniqueVisitorId { get; set; }
public IList<PageVisit> PageVisits { get; set; }
}
当访问者访问该网站时,访问会话开始。一次访问会话有很多页面访问。当访问者第一次访问网站时,跟踪器将编写UniqueVisitorId(GUID)cookie。因此,我们可以知道访客是否回访了访客。
现在我想写一个视图,显示每天的TotalVisitSessions,TotalPageVisits,TotalUniqueVisitors。所以我写了这个多图缩减:
public class VisitSummaryByDateIndex : AbstractMultiMapIndexCreationTask<VisitSummaryByDate>
{
public VisitSummaryByDateIndex()
{
AddMap<VisitSession>(sessions => from s in sessions
select new VisitSummaryByDate
{
Date = s.StartTime.Date,
TotalVisitSessions = 1,
TotalPageVisits = 0,
TotalNewVisitors = s.IsNewVisit ? 1 : 0,
TotalUniqueVisitors = 0,
UniqueVisitorId = s.UniqueVisitorId
});
AddMap<PageVisit>(visits => from v in visits
select new VisitSummaryByDate
{
Date = v.VisitTime.Date,
TotalVisitSessions = 0,
TotalPageVisits = 1,
TotalNewVisitors = 0,
TotalUniqueVisitors = 0,
UniqueVisitorId = String.Empty
});
Reduce = results => from result in results
group result by result.Date into g
select new VisitSummaryByDate
{
Date = g.Key,
TotalVisitSessions = g.Sum(it => it.TotalVisitSessions),
TotalPageVisits = g.Sum(it => it.TotalPageVisits),
TotalNewVisitors = g.Sum(it => it.TotalNewVisitors),
TotalUniqueVisitors = g.Select(it => it.UniqueVisitorId).Where(it => it.Length > 0).Distinct().Count(),
UniqueVisitorId = String.Empty
};
}
}
问题在于“TotalUniqueVisitors”计算,有时索引结果的TotalUniqueVisitors是1,有时是2.但是我查了数据,它永远不会那么少。我的Map / Reduce语法有问题吗?
相关帖子: Raven DB: How to create "UniqueVisitorCount by date" index
可在此处找到包含示例数据的代码: https://gist.github.com/2702071
答案 0 :(得分:2)
正确的索引是:
public class VisitSummaryByDateIndex : AbstractMultiMapIndexCreationTask<VisitSummaryByDate>
{
public VisitSummaryByDateIndex()
{
AddMap<VisitSession>(sessions => from s in sessions
select new VisitSummaryByDate
{
Date = s.StartTime.Date,
TotalVisitSessions = 1,
TotalPageVisits = 0,
TotalNewVisitors = s.IsNewVisit ? 1 : 0,
TotalUniqueVisitors = 0,
UniqueVisitorId = s.UniqueVisitorId
});
AddMap<PageVisit>(visits => from v in visits
select new VisitSummaryByDate
{
Date = v.VisitTime.Date,
TotalVisitSessions = 0,
TotalPageVisits = 1,
TotalNewVisitors = 0,
TotalUniqueVisitors = 0,
UniqueVisitorId = string.Empty,
});
Reduce = results => from result in results
group result by result.Date into g
select new VisitSummaryByDate
{
Date = g.Key,
TotalVisitSessions = g.Sum(it => it.TotalVisitSessions),
TotalPageVisits = g.Sum(it => it.TotalPageVisits),
TotalNewVisitors = g.Sum(it => it.TotalNewVisitors),
TotalUniqueVisitors = g.Select(it => it.UniqueVisitorId).Where(x => x.Length > 0).Distinct().Count(),
UniqueVisitorId = g.FirstOrDefault().UniqueVisitorId,
};
}
}
不同之处在于在reduce中设置了UniqueVisitorId。我不能100%肯定为什么还需要这个,我必须承认。
答案 1 :(得分:2)
Reduce实际上是在结果上多次处理的。 您的索引假设这只发生一次,并且可以访问整个结果集。
您的索引需要如下所示:
public class VisitSummaryByDateIndex : AbstractMultiMapIndexCreationTask<VisitSummaryByDate>
{
public VisitSummaryByDateIndex()
{
AddMap<VisitSession>(sessions => from s in sessions
select new VisitSummaryByDate
{
Date = s.StartTime.Date,
TotalVisitSessions = 1,
TotalPageVisits = 0,
TotalNewVisitors = s.IsNewVisit ? 1 : 0,
TotalUniqueVisitors = 1,
UniqueVisitorId = new[]{s.UniqueVisitorId}
});
AddMap<PageVisit>(visits => from v in visits
select new VisitSummaryByDate
{
Date = v.VisitTime.Date,
TotalVisitSessions = 0,
TotalPageVisits = 1,
TotalNewVisitors = 0,
TotalUniqueVisitors = 0,
UniqueVisitorId = new string[0]
});
Reduce = results => from result in results
group result by result.Date into g
select new VisitSummaryByDate
{
Date = g.Key,
TotalVisitSessions = g.Sum(it => it.TotalVisitSessions),
TotalPageVisits = g.Sum(it => it.TotalPageVisits),
TotalNewVisitors = g.Sum(it => it.TotalNewVisitors),
TotalUniqueVisitors = g.Sum(it => it.TotalUniqueVisitors),,
UniqueVisitorId = g.Select(x=>x.UniqueVisitorId).Distinct()
};
}
}