c# - 如何多线程迭代通过目录树,在文件夹1000000文件中

时间:2016-08-11 14:12:25

标签: c# multithreading

我需要在文件夹和所有子文件夹中以字节为单位保存数据库文件名称和大小。 在这个文件夹中放置1 000 000个文件。 当我使用msdn中的示例时,它会工作4天,这非常缓慢。

     static void Main(string[] args)
    {
        string pdxPathDocFiles = System.Configuration.ConfigurationManager.AppSettings["PDX_PathDocFiles"] as string;
        if (string.IsNullOrEmpty(pdxPathDocFiles))
        {
            Console.WriteLine("In the configuration file is missing the path to the root directory - PDX_PathDocFiles.");
        }
        else
        {
            if (!Directory.Exists(pdxPathDocFiles))
            {
                Console.WriteLine("Directory not found");
            }
            else
            {
                try
                {
                    Console.WriteLine("rootPath: " + pdxPathDocFiles);
                    PayDox_EPD19_T20_RGMEntities db = new PayDox_EPD19_T20_RGMEntities();
                    System.IO.DirectoryInfo rootDir = new DirectoryInfo(pdxPathDocFiles);
                    db.FileDBRecord.RemoveRange(db.FileDBRecord);

                    WalkDirectoryTree(rootDir, rootDir.ToString(), db);
                    db.SaveChanges();
                }
                catch (Exception)
                {
                    Console.WriteLine("Failed to connect to the database");
                }
                Console.WriteLine("All ok");
            }
        }
        Console.WriteLine("Bye, Good Day.");
    }
    static void WalkDirectoryTree(System.IO.DirectoryInfo root, string rootDir, PayDox_EPD19_T20_RGMEntities db)
    {
        System.IO.FileInfo[] files = null;
        System.IO.DirectoryInfo[] subDirs = null;
        try
        {
            files = root.GetFiles("*.*");
        }

        catch (UnauthorizedAccessException e)
        {
            Console.WriteLine(e.Message);
        }

        catch (System.IO.DirectoryNotFoundException e)
        {
            Console.WriteLine(e.Message);
        }

        if (files != null)
        {
            foreach (System.IO.FileInfo fi in files)
            {
                db.FileDBRecord.Add(new FileDBRecord { FileName = fi.FullName.Replace(rootDir, ""), FileSize = fi.Length });
            }

            subDirs = root.GetDirectories();

            foreach (System.IO.DirectoryInfo dirInfo in subDirs)
            {
                WalkDirectoryTree(dirInfo, rootDir, db);
            }
        }
        db.SaveChanges();
    }  

当我尝试另一种方式时,它会抛出异常堆栈溢出异常。

    static void Main(string[] args)
    {
        string pdxPathDocFiles = System.Configuration.ConfigurationManager.AppSettings["PDX_PathDocFiles"] as string;
        if (string.IsNullOrEmpty(pdxPathDocFiles))
        {
            Console.WriteLine("In the configuration file is missing the path to the root directory - PDX_PathDocFiles.");
        }
        else
        {
            if (!Directory.Exists(pdxPathDocFiles))
            {
                Console.WriteLine("Directory not found");
            }
            else
            {
                try
                {
                    Console.WriteLine("rootPath: " + pdxPathDocFiles);
                    PayDox_EPD19_T20_RGMEntities db = new PayDox_EPD19_T20_RGMEntities();
                    db.FileDBRecord.RemoveRange(db.FileDBRecord);
                    db.SaveChanges();
                    Console.WriteLine("Remove data from table");

                    System.IO.FileInfo[] files = null;
                    System.IO.DirectoryInfo rootDir2 = new DirectoryInfo(pdxPathDocFiles);
                    try
                    {
                        files = rootDir2.GetFiles("*.*", SearchOption.AllDirectories);
                        Console.WriteLine("Reed {0} fileName", files.Length);
                    }
                    catch (UnauthorizedAccessException ex)
                    {
                        Console.WriteLine("You do not have permission to access one or more folders in this directory tree.");
                        Console.WriteLine(ex.Message);
                        return;
                    }

                    db.FileDBRecord.AddRange(files.Select(x => new FileDBRecord { FileName = x.FullName.Replace(pdxPathDocFiles, ""), FileSize = x.Length }));
                    db.SaveChanges();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
                Console.WriteLine("All ok");
            }
        }
        Console.WriteLine("Bye, Good Day.");
    }

如何让程序更快,可能会添加多线程?

3 个答案:

答案 0 :(得分:1)

首先,您的代码不是异步的。将其分解为单独的类并使方法异步。这允许在等待IO操作时使用该线程。无论何时调用数据库或文件系统都使用异步等效方法。

我要做的第二件事是尝试制作,因此每个事务都是原子的。如果你做了很多次的事情,那么编写程序的方式是每个x时间可以完成隔离。完成后,您可以通过创建新任务(Task.Run)来运行这些并行操作。

完成这两项任务并且任务仍需要一段时间后,请查看https://msdn.microsoft.com/en-us/library/ms824684.aspx。这可以缓冲您对优化流程的请求。

答案 1 :(得分:0)

我通过添加TPL库改进了msdn的第一个例子。 现在它工作4小时,而不是4天。



static void Main(string[] args)
        {
            string pdxPathDocFiles = System.Configuration.ConfigurationManager.AppSettings["PDX_PathDocFiles"] as string;
            if (string.IsNullOrEmpty(pdxPathDocFiles))
            {
                Console.WriteLine("In the configuration file is missing the path to the root directory - PDX_PathDocFiles.");
            }
            else
            {
                if (!Directory.Exists(pdxPathDocFiles))
                {
                    Console.WriteLine("Directory not found");
                }
                else
                {
                    try
                    {
                        Console.WriteLine("rootPath: " + pdxPathDocFiles);
                        PayDox_EPD19_T20_RGMEntities db = new PayDox_EPD19_T20_RGMEntities();
                        System.IO.DirectoryInfo rootDir = new DirectoryInfo(pdxPathDocFiles);
                        db.Database.ExecuteSqlCommand("TRUNCATE TABLE [FileDBRecord]");
                        db.SaveChanges();
                        db.Dispose();
                        Console.WriteLine("Remove data from table");

                        WalkDirectoryTree(rootDir, rootDir.ToString());
                        Console.WriteLine("All ok");
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                }
            }
            Console.WriteLine("Bye, Good Day.");
            Console.WriteLine("Processing complete. Press any key to exit.");
            Console.ReadKey();
        }
        static void WalkDirectoryTree(System.IO.DirectoryInfo root, string rootDir)
        {
            //Console.WriteLine("Go to folder: "+  root.FullName.Replace(rootDir, ""));
            System.IO.FileInfo[] files = null;
            System.IO.DirectoryInfo[] subDirs = null;
            try
            {
                files = root.GetFiles("*.*");
            }

            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }

            if (files != null)
            {
                PayDox_EPD19_T20_RGMEntities db = new PayDox_EPD19_T20_RGMEntities();
                foreach (var currentElement in files)
                {
                    db.FileDBRecord.Add(new FileDBRecord { FileName = currentElement.FullName.Replace(rootDir, ""), FileSize = currentElement.Length });
                }
               
                db.SaveChanges();
                db.Dispose();
                subDirs = root.GetDirectories();


                Parallel.ForEach(subDirs,
                currentElement =>
                {
                    try
                    {
                        WalkDirectoryTree(currentElement, rootDir);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                    }
                });
            }

        }
    }




答案 2 :(得分:-2)

也许我们可以修复你的第二个代码..(未经测试,但可能不会抛出异常) 如果你测试它,请告诉我它是否更快..

static void Main(string[] args)
{
    string pdxPathDocFiles = System.Configuration.ConfigurationManager.AppSettings["PDX_PathDocFiles"] as string;
    if (string.IsNullOrEmpty(pdxPathDocFiles))
    {
        Console.WriteLine("In the configuration file is missing the path to the root directory - PDX_PathDocFiles.");
    }
    else
    {
        if (!Directory.Exists(pdxPathDocFiles))
        {
            Console.WriteLine("Directory not found");
        }
        else
        {
            try
            {
                Console.WriteLine("rootPath: " + pdxPathDocFiles);
                PayDox_EPD19_T20_RGMEntities db = new PayDox_EPD19_T20_RGMEntities();
                db.FileDBRecord.RemoveRange(db.FileDBRecord);
                db.SaveChanges();
                Console.WriteLine("Remove data from table");

                IList<FileDBRecord> files = null;
                System.IO.DirectoryInfo rootDir2 = new DirectoryInfo(pdxPathDocFiles);
                try
                {
                    files = rootDir2.GetFiles("*.*", SearchOption.AllDirectories).Select(x => new FileDBRecord { FileName = x.FullName.Replace(pdxPathDocFiles, ""), FileSize = x.Length });
                    Console.WriteLine("Reed {0} fileName", files.Length);
                }
                catch (UnauthorizedAccessException ex)
                {
                    Console.WriteLine("You do not have permission to access one or more folders in this directory tree.");
                    Console.WriteLine(ex.Message);
                    return;
                }

                files.Foreach(db.FileDBRecord);
                db.SaveChanges();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            Console.WriteLine("All ok");
        }
    }
    Console.WriteLine("Bye, Good Day.");
}