如何从SFTP读取CSV文件并使用CSVHelper解析内容而不在本地保存CSV

时间:2019-05-13 18:46:36

标签: c#

如何从SFTP读取CSV文件并使用CSVHelper解析内容而不在本地保存CSV?

这是否可行,还是我们必须将其保存在本地,解析并删除文件?

我正在使用SSH.Net和CSVHelper。

1 个答案:

答案 0 :(得分:1)

它需要依靠文件的流处理:

    public async Task ProcessRemoteFilesAsync()
    {
        var credentials = new Credentials("host", "username", "password");
        var filePaths = new List<string>();

        // initializing filePaths ..

        var tasks = filePaths
            .Select(f => ParseRemoteFileAsync(credentials, f))
            .ToArray();

        var results = await Task.WhenAll(tasks).ConfigureAwait(false);

        // traverse through results..
    }

    public async Task<FileContent> ParseRemoteFileAsync(Credentials credentials, string filePath)
    {
        using (var sftp = new SftpClient(credentials.host, credentials.username, credentials.password))
        {
            sftp.Connect();

            try
            {
                using (var remoteFileStream = sftp.OpenRead(filePath))
                {
                    using (var reader = new StreamReader(remoteFileStream))
                    {
                        using (var csv = new CsvReader(reader))
                        {
                            /*
                            // Example of CSV parsing:
                            var records = new List<Foo>();
                            csv.Read();
                            csv.ReadHeader();
                            while (csv.Read())
                            {
                                var record = new Foo
                                {
                                    Id = csv.GetField<int>("Id"),
                                    Name = csv.GetField("Name")
                                };
                                records.Add(record);
                            }
                            */
                        }
                    }
                }
            }
            finally {
                sftp.Disconnect();
            }
        }
    }

使用 SftpClient

池的修改版本

请参见C# Object Pooling Pattern implementation

How to: Create an Object Pool by Using a ConcurrentBag借来的 pool 的实现:

/// <summary>
///     Implementation borrowed from [How to: Create an Object Pool by Using a
///     ConcurrentBag](https://docs.microsoft.com/en-us/dotnet/standard/collections/thread-safe/how-to-create-an-object-pool).
/// </summary>
/// <typeparam name="T"></typeparam>
public class ObjectPool<T> : IDisposable
    where T : IDisposable
{
    private readonly Func<T> _objectGenerator;
    private readonly ConcurrentBag<T> _objects;

    public ObjectPool(Func<T> objectGenerator)
    {
        _objectGenerator = objectGenerator ?? throw new ArgumentNullException(nameof(objectGenerator));
        _objects = new ConcurrentBag<T>();
    }

    public void Dispose()
    {
        while (_objects.TryTake(out var item))
        {
            item.Dispose();
        }
    }

    public T GetObject()
    {
        return _objects.TryTake(out var item) ? item : _objectGenerator();
    }

    public void PutObject(T item)
    {
        _objects.Add(item);
    }
}

最简单的基于池的实现(它不关心异常处理,重试策略):

internal class SftpclientTest
{
    private readonly ObjectPool<SftpClient> _objectPool;


    public SftpclientTest(Credentials credentials)
    {
        _objectPool = new ObjectPool<SftpClient>(() =>
        {
            var client = new SftpClient(credentials.host, credentials.username, credentials.password);
            client.Connect();

            return client;
        });
    }


    public void GetDirectoryList()
    {
        var client = _objectPool.GetObject();

        try
        {
            // client.ListDirectory() ..
        }
        finally
        {
            if (client.IsConnected)
            {
                _objectPool.PutObject(client);
            }
        }
    }


    public async Task ProcessRemoteFilesAsync()
    {
        var filePaths = new List<string>();

        // initializing filePaths ..

        var tasks = filePaths
            .Select(f => ParseRemoteFileAsync(f))
            .ToArray();

        var results = await Task.WhenAll(tasks).ConfigureAwait(false);

        // traverse through results..
    }

    public Task<FileContent> ParseRemoteFileAsync(string filePath)
    {
        var client = _objectPool.GetObject();

        try
        {
            using (var remoteFileStream = client.OpenRead(filePath))
            {
                using (var reader = new StreamReader(remoteFileStream))
                {
                    using (var csv = new CsvReader(reader))
                    {
                        // ..
                    }
                }

                return Task.FromResult(new FileContent());
            }
        }
        finally
        {
            if (client.IsConnected)
            {
                _objectPool.PutObject(client);
            }
        }
    }
}