如何从SFTP读取CSV文件并使用CSVHelper解析内容而不在本地保存CSV?
这是否可行,还是我们必须将其保存在本地,解析并删除文件?
我正在使用SSH.Net和CSVHelper。
答案 0 :(得分:1)
它需要依靠文件的流处理:
public async Task ProcessRemoteFilesAsync()
{
var credentials = new Credentials("host", "username", "password");
var filePaths = new List<string>();
// initializing filePaths ..
var tasks = filePaths
.Select(f => ParseRemoteFileAsync(credentials, f))
.ToArray();
var results = await Task.WhenAll(tasks).ConfigureAwait(false);
// traverse through results..
}
public async Task<FileContent> ParseRemoteFileAsync(Credentials credentials, string filePath)
{
using (var sftp = new SftpClient(credentials.host, credentials.username, credentials.password))
{
sftp.Connect();
try
{
using (var remoteFileStream = sftp.OpenRead(filePath))
{
using (var reader = new StreamReader(remoteFileStream))
{
using (var csv = new CsvReader(reader))
{
/*
// Example of CSV parsing:
var records = new List<Foo>();
csv.Read();
csv.ReadHeader();
while (csv.Read())
{
var record = new Foo
{
Id = csv.GetField<int>("Id"),
Name = csv.GetField("Name")
};
records.Add(record);
}
*/
}
}
}
}
finally {
sftp.Disconnect();
}
}
}
使用 SftpClient
池的修改版本请参见C# Object Pooling Pattern implementation。
从How to: Create an Object Pool by Using a ConcurrentBag借来的 pool 的实现:
/// <summary>
/// Implementation borrowed from [How to: Create an Object Pool by Using a
/// ConcurrentBag](https://docs.microsoft.com/en-us/dotnet/standard/collections/thread-safe/how-to-create-an-object-pool).
/// </summary>
/// <typeparam name="T"></typeparam>
public class ObjectPool<T> : IDisposable
where T : IDisposable
{
private readonly Func<T> _objectGenerator;
private readonly ConcurrentBag<T> _objects;
public ObjectPool(Func<T> objectGenerator)
{
_objectGenerator = objectGenerator ?? throw new ArgumentNullException(nameof(objectGenerator));
_objects = new ConcurrentBag<T>();
}
public void Dispose()
{
while (_objects.TryTake(out var item))
{
item.Dispose();
}
}
public T GetObject()
{
return _objects.TryTake(out var item) ? item : _objectGenerator();
}
public void PutObject(T item)
{
_objects.Add(item);
}
}
最简单的基于池的实现(它不关心异常处理,重试策略):
internal class SftpclientTest
{
private readonly ObjectPool<SftpClient> _objectPool;
public SftpclientTest(Credentials credentials)
{
_objectPool = new ObjectPool<SftpClient>(() =>
{
var client = new SftpClient(credentials.host, credentials.username, credentials.password);
client.Connect();
return client;
});
}
public void GetDirectoryList()
{
var client = _objectPool.GetObject();
try
{
// client.ListDirectory() ..
}
finally
{
if (client.IsConnected)
{
_objectPool.PutObject(client);
}
}
}
public async Task ProcessRemoteFilesAsync()
{
var filePaths = new List<string>();
// initializing filePaths ..
var tasks = filePaths
.Select(f => ParseRemoteFileAsync(f))
.ToArray();
var results = await Task.WhenAll(tasks).ConfigureAwait(false);
// traverse through results..
}
public Task<FileContent> ParseRemoteFileAsync(string filePath)
{
var client = _objectPool.GetObject();
try
{
using (var remoteFileStream = client.OpenRead(filePath))
{
using (var reader = new StreamReader(remoteFileStream))
{
using (var csv = new CsvReader(reader))
{
// ..
}
}
return Task.FromResult(new FileContent());
}
}
finally
{
if (client.IsConnected)
{
_objectPool.PutObject(client);
}
}
}
}