我正在通过Javascript在cosmosdb上准备存储过程,但是它获取的文档数量少于集合中的实际文档数量。
sproc由C#调用,C#传递一个参数“transmitterMMSI”,它也是这个集合的分区键。
首先,在sproc中执行以下查询:
var query = 'SELECT COUNT(1) AS Num FROM AISData a WHERE a.TransmitterMMSI="' + transmitterMMSI + '"';
结果输出作为响应,值为5761,与集合中的实际文档数相同。
但是,当我将查询更改为以下内容时:
var query = 'SELECT * FROM AISData a WHERE a.TransmitterMMSI="' + transmitterMMSI + '"';
documents.length输出为5574,小于实数。
我已经更改了 pageSize:-1 ,这意味着无限制。
我用google和堆栈溢出进行了一些搜索,似乎延续可以提供帮助。但是,我尝试了一些例子,它们不起作用。
任何熟悉此事的人都可以提供帮助吗?
以下列出了脚本。
sproc js脚本在这里,它也是C#代码中使用的文件“DownSampling.js”:
function DownSampling(transmitterMMSI, interval) {
var context = getContext();
var collection = context.getCollection();
var response = context.getResponse();
var receiverTime;
var tempTime;
var groupKey;
var aggGroup = new Object();
var query = 'SELECT * FROM AISData a WHERE a.TransmitterMMSI="' + transmitterMMSI + '"';
var accept = collection.queryDocuments(collection.getSelfLink(), query, { pageSize: -1},
function (err, documents, responseOptions) {
if (err) throw new Error("Error" + err.message);
// Find the smallest deviation comparting to IntervalTime in each group
for (i = 0; i < documents.length; i++) {
receiverTime = Date.parse(documents[i].ReceiverTime);
tempTime = receiverTime / 1000 + interval / 2;
documents[i].IntervalTime = (tempTime - tempTime % interval) * 1000;
documents[i].Deviation = Math.abs(receiverTime - documents[i].IntervalTime);
// Generate a group key for each group, combinated of TransmitterMMSI and IntervalTime
groupKey = documents[i].IntervalTime.toString();
if (typeof aggGroup[groupKey] === 'undefined' || aggGroup[groupKey] > documents[i].Deviation) {
aggGroup[groupKey] = documents[i].Deviation;
}
}
// Tag the downsampling
for (i = 0; i < documents.length; i++) {
groupKey = documents[i].IntervalTime;
if (aggGroup[groupKey] == documents[i].Deviation) {
documents[i].DownSamplingTag = 1;
} else {
documents[i].DownSamplingTag = 0;
}
// Remove the items that are not used
delete documents[i].IntervalTime;
delete documents[i].Deviation;
// Replace the document
var acceptDoc = collection.replaceDocument(documents[i]._self, documents[i], {},
function (errDoc, docReplaced) {
if (errDoc) {
throw new Error("Update documents error:" + errDoc.message);
}
});
if (!acceptDoc) {
throw "Update documents not accepted, abort ";
}
}
response.setBody(documents.length);
});
if (!accept) {
throw new Error("The stored procedure timed out.");
}
}
C#代码在这里:
private async Task DownSampling()
{
Database database = this.client.CreateDatabaseQuery().Where(db => db.Id == DatabaseId).ToArray().FirstOrDefault();
DocumentCollection collection = this.client.CreateDocumentCollectionQuery(database.SelfLink).Where(c => c.Id == AISTestCollectionId).ToArray().FirstOrDefault();
string scriptFileName = @"..\..\StoredProcedures\DownSampling.js";
string scriptId = Path.GetFileNameWithoutExtension(scriptFileName);
var sproc = new StoredProcedure
{
Id = scriptId,
Body = File.ReadAllText(scriptFileName)
};
await TryDeleteStoredProcedure(collection.SelfLink, sproc.Id);
sproc = await this.client.CreateStoredProcedureAsync(collection.SelfLink, sproc);
IQueryable<dynamic> query = this.client.CreateDocumentQuery(
UriFactory.CreateDocumentCollectionUri(DatabaseId, AISTestCollectionId),
new SqlQuerySpec()
{
//QueryText = "SELECT a.TransmitterMMSI FROM " + AISTestCollectionId + " a",
QueryText = "SELECT a.TransmitterMMSI FROM " + AISTestCollectionId + " a WHERE a.TransmitterMMSI=\"219633000\"",
}, new FeedOptions { MaxItemCount = -1, EnableCrossPartitionQuery = true, MaxDegreeOfParallelism = -1, MaxBufferedItemCount = -1 });
List<dynamic> transmitterMMSIList = query.ToList(); //TODO: Remove duplicates
Console.WriteLine("TransmitterMMSI count: {0}", transmitterMMSIList.Count());
HashSet<string> exist = new HashSet<string>();
foreach (var item in transmitterMMSIList)
{
//int transmitterMMSI = Int32.Parse(item.TransmitterMMSI.ToString());
string transmitterMMSI = item.TransmitterMMSI.ToString();
if (exist.Contains(transmitterMMSI))
{
continue;
}
exist.Add(transmitterMMSI);
Console.WriteLine("TransmitterMMSI: {0} is being processed.", transmitterMMSI);
var response = await this.client.ExecuteStoredProcedureAsync<string>(sproc.SelfLink,
new RequestOptions { PartitionKey = new PartitionKey(transmitterMMSI) }, transmitterMMSI, 30);
string s = response.Response;
Console.WriteLine("TransmitterMMSI: {0} is processed completely.", transmitterMMSI);
}
}
private async Task TryDeleteStoredProcedure(string collectionSelfLink, string sprocId)
{
StoredProcedure sproc = this.client.CreateStoredProcedureQuery(collectionSelfLink).Where(s => s.Id == sprocId).AsEnumerable().FirstOrDefault();
if (sproc != null)
{
await client.DeleteStoredProcedureAsync(sproc.SelfLink);
}
}
我试图评论JS代码中的2个循环,只有 documents.length 输出,而响应数仍然较少。但是,我将查询更改为 SELECT a.id , documents.length 是正确的。看起来这是一个延续问题。
答案 0 :(得分:0)
sproc可能会超时。要在这些情况下使用延续令牌,您需要将其返回到C#调用代码,然后再次调用传递到令牌中的sproc。如果您向我们展示您的代码,我们可以提供更多帮助。
答案 1 :(得分:0)
您可以使用延续令牌从sproc中重复调用queryDocuments(),而无需向客户端进行额外的往返。请记住,如果你这么做很多次,你的sproc最终会超时。在你的情况下,听起来你已经非常接近获得你正在寻找的所有文件,所以也许你会好的。
以下是在sproc中使用延续令牌查询多页数据的示例:
function getManyThings() {
var collection = getContext().getCollection();
var query = {
query: 'SELECT r.id, r.FieldOne, r.FieldTwo FROM root r WHERE r.FieldThree="sought"'
};
var continuationToken;
getThings(continuationToken);
function getThings(continuationToken) {
var requestOptions = {
continuation: continuationToken,
pageSize: 1000 // Adjust this to suit your needs
};
var isAccepted = collection.queryDocuments(collection.getSelfLink(), query, requestOptions, function (err, feed, responseOptions) {
if (err) {
throw err;
}
for (var i = 0, len = feed.length; i < len; i++) {
var thing = feed[i];
// Do your logic on thing...
}
if (responseOptions.continuation) {
getThings(responseOptions.continuation);
}
else {
var response = getContext().getResponse();
response.setBody("RESULTS OF YOUR LOGIC");
}
});
if (!isAccepted) {
var response = getContext().getResponse();
response.setBody("Server rejected query - please narrow search criteria");
}
}
}