我有一个应用程序可以递归遍历非常大的文件夹(6 TB)。为了加快速度,我为每个递归创建了一个新线程。有一次我的线程数超过了12,000。随着任务接近完成,我的线程数减少了,但是在任务管理器上,线程数一直在增加。我认为这表明线程完成时并未被垃圾回收。
有一次,我的内部线程数显示5575个线程,而Windows资源监视器显示的任务使用了33,023个线程。
static void Main(string[] args)
{
string folderName = Properties.Settings.Default.rootFolder;
ParameterizedThreadStart needleThreader = new ParameterizedThreadStart(needle);
Thread eye = new Thread(needleThreader);
threadcount = 1;
eye.Start(folderName);
}
static void needle(object objFolderName)
{
string folderName = (string)objFolderName;
FolderData folderData = getFolderData(folderName);
addToDB(folderData);
//since the above statement gets executed (my database table
//gets populated), I think the thread should get garbage collected
//here, but the windows thread count keeps climbing.
}
// recursive routine to walk directory structure and create annotated treeview
private static FolderData getFolderData(string folderName)
{
//Console.WriteLine(folderName);
long folderSize = 0;
string[] directories = new string[] { };
string[] files = new string[] { };
try
{
directories = Directory.GetDirectories(folderName);
}
catch { };
try
{
files = Directory.GetFiles(folderName);
}
catch { }
for (int f = 0; f < files.Length; f++)
{
try
{
folderSize += new FileInfo(files[f]).Length;
}
catch { } //cannot access file so skip;
}
FolderData folderData = new FolderData(folderName, directories.Length, files.Length, folderSize);
List<String> directoryList = directories.ToList<String>();
directoryList.Sort();
for (int d = 0; d < directoryList.Count; d++)
{
Console.Write(" " + threadcount + " ");
//threadcount is my internal counter. it increments here
//where i start a new thread and decrements when the thread ends
//see below
threadcount++;
ParameterizedThreadStart needleThreader = new ParameterizedThreadStart(needle);
Thread eye = new Thread(needleThreader);
eye.Start(directoryList[d]);
}
//thread is finished, so decrement
threadcount--;
return folderData;
}
答案 0 :(得分:0)
多亏了matt-dot-net的建议,我花了几个小时研究TPL(任务并行库),这是非常值得的。
这是我的新代码。它运行速度极快,不占用CPU(使用了41%,虽然很多,但在沙箱中仍然发挥出色),仅使用约160MB的内存(而不是几乎所有可用的4GB),并且最多使用约70线程。
您几乎会以为我在做什么。但是.net TPL可以处理所有困难的事情,例如确定正确的线程数并确保它们自己清理之后。
班计划 {
static object padlock = new object();
static void Main(string[] args)
{
OracleConnection ora = new OracleConnection(Properties.Settings.Default.ora);
ora.Open();
new OracleCommand("DELETE FROM SCRPT_APP.S_DRIVE_FOLDERS", ora).ExecuteNonQuery();
ora.Close();
string folderName = Properties.Settings.Default.rootFolder;
Task processRoot = new Task((value) =>
{
getFolderData(value);
}, folderName);
//wait is like join; it waits for this asynchronous task to finish.
processRoot.Start();
processRoot.Wait();
}
// recursive routine to walk directory structure and create annotated treeview
private static void getFolderData(object objFolderName)
{
string folderName = (string)objFolderName;
Console.WriteLine(folderName);
long folderSize = 0;
string[] directories = new string[] { };
string[] files = new string[] { };
try
{
directories = Directory.GetDirectories(folderName);
}
catch { };
try
{
files = Directory.GetFiles(folderName);
}
catch { }
for (int f = 0; f < files.Length; f++)
{
try
{
folderSize += new FileInfo(files[f]).Length;
}
catch { } //cannot access file so skip;
}
FolderData folderData = new FolderData(folderName, directories.Length, files.Length, folderSize);
List<String> directoryList = directories.ToList<String>();
directoryList.Sort();
//create a task for each subdirectory
List<Task> dirTasks = new List<Task>();
for (int d = 0; d < directoryList.Count; d++)
{
dirTasks.Add(new Task((value) =>
{
getFolderData(value);
}, directoryList[d]));
}
//start all tasks
foreach (Task task in dirTasks)
{
task.Start();
}
//wait fo them to finish
Task.WaitAll(dirTasks.ToArray());
addToDB(folderData);
}
private static void addToDB(FolderData folderData)
{
lock (padlock)
{
OracleConnection ora = new OracleConnection(Properties.Settings.Default.ora);
ora.Open();
OracleCommand addFolderData = new OracleCommand(
"INSERT INTO FOLDERS " +
"(PATH, FOLDERS, FILES, SPACE_USED) " +
"VALUES " +
"(:PATH, :FOLDERS, :FILES, :SPACE_USED) ",
ora);
addFolderData.BindByName = true;
addFolderData.Parameters.Add(":PATH", OracleDbType.Varchar2);
addFolderData.Parameters.Add(":FOLDERS", OracleDbType.Int32);
addFolderData.Parameters.Add(":FILES", OracleDbType.Int32);
addFolderData.Parameters.Add(":SPACE_USED", OracleDbType.Int64);
addFolderData.Prepare();
addFolderData.Parameters[":PATH"].Value = folderData.FolderName;
addFolderData.Parameters[":FOLDERS"].Value = folderData.FolderCount;
addFolderData.Parameters[":FILES"].Value = folderData.FileCount;
addFolderData.Parameters[":SPACE_USED"].Value = folderData.Size;
addFolderData.ExecuteNonQuery();
ora.Close();
}
}
}
}