在TPL数据流中给出以下设置。
var directory = new DirectoryInfo(@"C:\dev\kortforsyningen_dsm\tiles");
var dirBroadcast=new BroadcastBlock<DirectoryInfo>(dir=>dir);
var dirfinder = new TransformManyBlock<DirectoryInfo, DirectoryInfo>((dir) =>
{
return directory.GetDirectories();
});
var tileFilder = new TransformManyBlock<DirectoryInfo, FileInfo>((dir) =>
{
return directory.GetFiles();
});
dirBroadcast.LinkTo(dirfinder);
dirBroadcast.LinkTo(tileFilder);
dirfinder.LinkTo(dirBroadcast);
var block = new XYZTileCombinerBlock<FileInfo>(3, (file) =>
{
var coordinate = file.FullName.Split('\\').Reverse().Take(3).Reverse().Select(s => int.Parse(Path.GetFileNameWithoutExtension(s))).ToArray();
return XYZTileCombinerBlock<CloudBlockBlob>.TileXYToQuadKey(coordinate[0], coordinate[1], coordinate[2]);
},
(quad) =>
XYZTileCombinerBlock<FileInfo>.QuadKeyToTileXY(quad,
(z, x, y) => new FileInfo(Path.Combine(directory.FullName,string.Format("{0}/{1}/{2}.png", z, x, y)))),
() => new TransformBlock<string, string>((s) =>
{
Trace.TraceInformation("Combining {0}", s);
return s;
}));
tileFilder.LinkTo(block);
using (new TraceTimer("Time"))
{
dirBroadcast.Post(directory);
block.LinkTo(new ActionBlock<FileInfo>((s) =>
{
Trace.TraceInformation("Done combining : {0}", s.Name);
}));
block.Complete();
block.Completion.Wait();
}
我想知道如何因为这个周期而将其标记为完成。一个目录被发布到dirBroadcast广播公司,该广播公司发布到dirfinder,可能会向广播公司发回新的dirfinder,所以我不能简单地将其标记为完整,因为它会阻止从dirfinder添加的任何目录。我应该重新设计它以跟踪目录的数量,或者在TPL中是否有任何内容。
答案 0 :(得分:4)
如果您的代码的目的是使用某种并行方式遍历目录结构,那么我建议不使用TPL Dataflow并使用Microsoft的Reactive Framework。我认为它变得更加简单。
我将如何做到这一点。
首先定义一个递归函数来构建目录列表:
Func<DirectoryInfo, IObservable<DirectoryInfo>> recurse = null;
recurse = di =>
Observable
.Return(di)
.Concat(di.GetDirectories()
.ToObservable()
.SelectMany(di2 => recurse(di2)))
.ObserveOn(Scheduler.Default);
执行目录的递归并使用默认的Rx调度程序,使得observable并行运行。
因此,通过使用输入recurse
调用DirectoryInfo
,我得到一个输入目录及其所有后代的可观察列表。
现在我可以建立一个相当直接的查询来获得我想要的结果:
var query =
from di in recurse(new DirectoryInfo(@"C:\dev\kortforsyningen_dsm\tiles"))
from fi in di.GetFiles().ToObservable()
let zxy =
fi
.FullName
.Split('\\')
.Reverse()
.Take(3)
.Reverse()
.Select(s => int.Parse(Path.GetFileNameWithoutExtension(s)))
.ToArray()
let suffix = String.Format("{0}/{1}/{2}.png", zxy[0], zxy[1], zxy[2])
select new FileInfo(Path.Combine(di.FullName, suffix));
现在我可以像这样操作查询:
query
.Subscribe(s =>
{
Trace.TraceInformation("Done combining : {0}", s.Name);
});
现在我可能已经错过了一些自定义代码,但如果这是一种你想采取的方法,我相信你可以很容易地解决任何逻辑问题。
当代码用完子目录和文件时,此代码会自动处理完成。
要将Rx添加到项目中,请在NuGet中查找“Rx-Main”。
答案 1 :(得分:1)
我认为没有办法做到这一点,因为每个块(dirBroadcast
和tileFilder
)都依赖于另一个块而无法自行完成。
我建议您在没有TPL Dataflow的情况下重新设计目录遍历,这不适合这类问题。在我看来,更好的方法就是递归扫描目录并用文件流填充block
:
private static void FillBlock(DirectoryInfo directoryInfo, XYZTileCombinerBlock<FileInfo> block)
{
foreach (var fileInfo in directoryInfo.GetFiles())
{
block.Post(fileInfo);
}
foreach (var subDirectory in directoryInfo.GetDirectories())
{
FillBlock(subDirectory, block);
}
}
FillBlock(directory, block);
block.Complete();
await block.Completion;
答案 2 :(得分:1)
我确信这不是始终可能,但在许多情况下(包括目录枚举),您可以使用正在运行的计数器和Interlocked
函数来实现循环的一对一许多数据流完成:
public static ISourceBlock<string> GetDirectoryEnumeratorBlock(string path, int maxParallel = 5)
{
var outputBuffer = new BufferBlock<string>();
var count = 1;
var broadcastBlock = new BroadcastBlock<string>(s => s);
var getDirectoriesBlock = new TransformManyBlock<string, string>(d =>
{
var files = Directory.EnumerateDirectories(d).ToList();
Interlocked.Add(ref count, files.Count - 1); //Adds the subdir count, minus 1 for the current directory.
if (count == 0) //if count reaches 0 then all directories have been enumerated.
broadcastBlock.Complete();
return files;
}, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = maxParallel });
broadcastBlock.LinkTo(outputBuffer, new DataflowLinkOptions() { PropagateCompletion = true });
broadcastBlock.LinkTo(getDirectoriesBlock, new DataflowLinkOptions() { PropagateCompletion = true });
getDirectoriesBlock.LinkTo(broadcastBlock);
getDirectoriesBlock.Post(path);
return outputBuffer;
}
我使用它稍加修改枚举文件,但效果很好。小心最大并行度,这可以快速饱和网络文件系统!
答案 3 :(得分:1)
这是安德鲁·汉隆(Andrew Hanlon)solution的一种概括方法。它返回一个TransformBlock
,它支持递归地向自己发布消息,并在没有更多消息要处理时自动完成。
transform
lambda具有三个参数,而不是通常的参数。第一个参数是要处理的项目。第二个参数是已处理消息的“路径”,它是包含其父消息的序列IEnumerable<TInput>
。第三个参数是Action<TInput>
,它将新消息作为当前消息的子级发布到该块。
/// <summary>Creates a dataflow block that supports posting messages to itself,
/// and knows when it has completed processing all messages.</summary>
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, Task<TOutput>> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
if (transform == null) throw new ArgumentNullException(nameof(transform));
dataflowBlockOptions = dataflowBlockOptions ?? new ExecutionDataflowBlockOptions();
int pendingCount = 1; // The initial 1 represents the completion of input1 block
var input1 = new TransformBlock<TInput, (TInput, IEnumerable<TInput>)>(item =>
{
Interlocked.Increment(ref pendingCount);
return (item, Enumerable.Empty<TInput>());
}, new ExecutionDataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken,
BoundedCapacity = dataflowBlockOptions.BoundedCapacity
});
var input2 = new BufferBlock<(TInput, IEnumerable<TInput>)>(new DataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken
// Unbounded capacity
});
var output = new TransformBlock<(TInput, IEnumerable<TInput>), TOutput>(async entry =>
{
try
{
var (item, path) = entry;
var postChildAction = CreatePostAction(item, path);
return await transform(item, path, postChildAction).ConfigureAwait(false);
}
finally
{
if (Interlocked.Decrement(ref pendingCount) == 0) input2.Complete();
}
}, dataflowBlockOptions);
Action<TInput> CreatePostAction(TInput parentItem, IEnumerable<TInput> parentPath)
{
return item =>
{
// The Post will be unsuccessful only in case of block failure
// or cancellation, so no specific action is needed here.
if (input2.Post((item, parentPath.Append(parentItem))))
{
Interlocked.Increment(ref pendingCount);
}
};
}
input1.LinkTo(output);
input2.LinkTo(output);
PropagateCompletion(input1, input2,
condition: () => Interlocked.Decrement(ref pendingCount) == 0);
PropagateCompletion(input2, output);
PropagateFailure(output, input1, input2); // Ensure that all blocks are faulted
return DataflowBlock.Encapsulate(input1, output);
async void PropagateCompletion(IDataflowBlock block1, IDataflowBlock block2,
Func<bool> condition = null)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch { }
if (block1.Completion.Exception != null)
{
block2.Fault(block1.Completion.Exception.InnerException);
}
else
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
if (condition == null || condition()) block2.Complete();
}
}
async void PropagateFailure(IDataflowBlock block1, IDataflowBlock block2,
IDataflowBlock block3)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch (Exception ex)
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
block2.Fault(ex); block3.Fault(ex);
}
}
}
// Overload with synchronous delegate
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, TOutput> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
return CreateRecursiveTransformBlock<TInput, TOutput>((item, path, postAction) =>
Task.FromResult(transform(item, path, postAction)), dataflowBlockOptions);
}
结果块在内部由三个块组成:两个接收消息的输入块和一个处理消息的输出块。第一输入块从外部接收消息,第二输入块从内部接收消息。第二个输入块具有无穷大的容量,因此无限递归最终将导致OutOfMemoryException
。
用法示例:
var fileCounter = CreateRecursiveTransformBlock<string, int>(
(folderPath, parentPaths, postChild) =>
{
var subfolders = Directory.EnumerateDirectories(folderPath);
foreach (var subfolder in subfolders) postChild(subfolder);
var files = Directory.EnumerateFiles(folderPath);
Console.WriteLine($"{folderPath} has {files.Count()} files"
+ $", and is {parentPaths.Count()} levels deep");
return files.Count();
});
fileCounter.LinkTo(DataflowBlock.NullTarget<int>());
fileCounter.Post(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments));
fileCounter.Complete();
fileCounter.Completion.Wait();
上面的代码在控制台中打印文件夹“ MyDocuments”的所有子文件夹。
答案 4 :(得分:0)
只是为了展示我的真实答案,即TPL和Rx的组合。
Func<DirectoryInfo, IObservable<DirectoryInfo>> recurse = null;
recurse = di =>
Observable
.Return(di)
.Concat(di.GetDirectories()
.Where(d => int.Parse(d.Name) <= br_tile[0] && int.Parse(d.Name) >= tl_tile[0])
.ToObservable()
.SelectMany(di2 => recurse(di2)))
.ObserveOn(Scheduler.Default);
var query =
from di in recurse(new DirectoryInfo(Path.Combine(directory.FullName, baselvl.ToString())))
from fi in di.GetFiles().Where(f => int.Parse(Path.GetFileNameWithoutExtension(f.Name)) >= br_tile[1]
&& int.Parse(Path.GetFileNameWithoutExtension(f.Name)) <= tl_tile[1]).ToObservable()
select fi;
query.Subscribe(block.AsObserver());
Console.WriteLine("Done subscribing");
block.Complete();
block.Completion.Wait();
Console.WriteLine("Done TPL Block");
其中block是我的var block = new XYZTileCombinerBlock<FileInfo>