在PowerShell中进行FileSystemWatcher批量处理

时间:2019-02-05 14:49:54

标签: powershell batch-processing filesystemwatcher

我正在编写一个PowerShell脚本,该脚本处理特定目录中的数千个文件。移动完所有文件后,它将等待使用FileSystemWatcher将更多文件存放到目录中。

我最近更新了脚本以构建多插入SQL语句,以最大程度地减少数据库调用。这样做足够好,但是我很难让FileSystemWatcher以相同的方式处理文件,因为即使添加到目录中的每个文件一次被移动,它也会为添加到目录中的每个文件触发。

我想做的是让它等待几百毫秒,然后批量处理所有新文件。

任何人都可以教我如何操作

  1. 正在进行批量处理时,不处理任何其他文件。
  2. 能够在构建SQL语句时引用用于跟踪文件信息的全局变量。

FileSystemWatcher处理程序位于代码的底部:

[void][System.Reflection.Assembly]::LoadWithPartialName("MySql.Data")

$SOURCE = "F:\In"
$TARGET = "F:\parsed"
$TARGET_CSG="F:\CSG"
#$TARGET_CSG = "\\di-dev\HL7In\"
#$TARGET = "J:"
$DB_NAME = "processing_center"
$SP_NAME = "sp_parser"
#$SP_NAME = "feedParser"
$fileLimit = 12
$errorFolder = "F:\hl7_ERR"
$DirNameStart= '20180901' #directory to start on
$filter = '*.txt'
$batch_size = 10
$global:fileNames    = @{}
$global:folderCache  = @()
$global:inserts      = @()
$global:fileContents = @()
$global:starttime   = (Get-Date)
$global:currentTime = (Get-Date)
$global:counter     = 0


function insertRows() {
    $insertCommand = New-Object MySql.Data.MySqlClient.MySqlCommand
    $insertCommand.Connection=$connection
    $ofs = ',' # after this all casts work this way until $ofs changes!
    $insertCommand.CommandText = 'INSERT INTO msggrp (filename,msgStr)  VALUES ' + $global:inserts
    $insertCommand.Prepare | Out-Null
    for($i = 0; $i -lt $global:fileContents.Count; $i++) {
        $filesCounter = $i + 1
        $insertCommand.Parameters.AddWithValue("@fileName$filesCounter",    $global:fileNames["$filesCounter"])    | Out-Null
        $insertCommand.Parameters.AddWithValue("@fileContent$filesCounter", $global:fileContents[$i]) | Out-Null
    }

    $iRowsAffected = $insertCommand.ExecuteNonQuery()

    $procCommand = New-Object MySql.Data.MySqlClient.MySqlCommand
    $procCommand.Connection=$connection
    $procCommand.CommandText = "CALL $DB_NAME.$SP_NAME()"
    $dataAdapter = New-Object MySql.Data.MySqlClient.MySqlDataAdapter ($procCommand)
    $dataSet     = New-Object System.Data.DataSet
    $recordCount = $dataAdapter.Fill($dataSet,"data") 
    foreach ($Row in $dataset.Tables.Rows) {
        $rowId = $Row.id
        $fileName = $global:fileNames["$rowId"]
        if ($fileName) {
            $inCSG  = $Row.inCSG
            $folder = $global:folderCache[$rowId - 1]
            #regenerate the source path
            $fromPath = Join-Path $SOURCE $folder
            $fromPath = Join-Path $fromPath $fileName

            $toDir  = Join-Path $TARGET $folder
            if (!([System.IO.Directory]::Exists($toDir))) {
                [System.IO.Directory]::CreateDirectory($toDir)
            }
            $toPath = Join-Path $toDir $fileName
            Move-Item -LiteralPath "$fromPath" -destination "$toPath"  -Force -ErrorAction Stop

            #remove the file info from the cache
            $global:fileNames.Remove("$rowId")
        }
        else {
            Write-Host "Can't find ID $rowId in cache!"
        }
    }
    #re-init the global file arrays
    $global:inserts      = @()
    $global:folderCache  = @()
    $global:fileContents = @()
    #make sure folderCache we handled all of our cached files
    if ($global:fileNames.Count -gt 0) {
        Write-Host "There were unhandled files after bulk processing:"
        Write-Host $global:fileNames
    }
    $global:fileNames = @{}
}

function processFile($filePath) {
    $fileName = [System.IO.Path]::GetFileName($filePath)
    $filePath1 =$filePath.Replace("[","?")
    $filePath1 =$filePath1.Replace("]","?")
    $dirName = $filePath.Split("\")[-2]
    $fileContent = Get-Content -Raw $filePath1

    try {
        if ($global:inserts.length -eq $batch_size -or $global:counter -eq $fileLimit) { 
            insertRows
        }
        $filesCounter         = $global:inserts.length + 1
        $global:inserts      += "(@fileName$filesCounter, @fileContent$filesCounter)"
        $global:fileNames.add("$filesCounter", $fileName)
        $global:fileContents += $fileContent
        $global:folderCache  += $dirName
        $global:counter++;
        if ($global:counter -eq $fileLimit -and $global:inserts.length -eq 0) { 
          $filewatcher = Get-EventSubscriber -SourceIdentifier FileCreated -ErrorAction SilentlyContinue
          if ($filewatcher) {
            Write-Host "File limit reached. Unregistering file watcher."
            try {
              Unregister-Event -SourceIdentifier FileCreated
            }
            catch {
               Write-Host "Couldn't unregister the file watcher!"
            }
          }
          break loop 
        }
    }
    catch {
        $ErrorMessage = $_.Exception.Message
        $FailedItem   = $_.Exception.ItemName
        Write-Host "Something went wrong locally: $ErrorMessage"
        Write-Log $_
    }
}

$folders = [IO.Directory]::EnumerateDirectories($SOURCE) 
:loop foreach ($folder in $folders) {  #ForEach-Object

    $dirNameTop = $folder.Split("\")[-1]

    if($dirNameTop -ge $dirNameStart)  #only process directories greater than or equal to $dirNameStart
    {
        $files = [IO.Directory]::EnumerateFiles($folder,"*.txt")
        foreach ($file in $files) 
        {
            processFile($file)
        }
    }
    else {
        Write-Host "Skipped $dirNameTop because it is before $dirNameStart."
    }
}


if ($global:counter -lt $fileLimit) { 
    Write-Host ""
    Write-Host "Listening for more files."
    Write-Host "To stop processing new files, issue command: 'Unregister-Event -SourceIdentifier FileCreated'"


    #THIS IS THE FileSystemWatcher CODE:*******************************
    $fsw = New-Object IO.FileSystemWatcher $SOURCE, $filter -Property @{
      IncludeSubdirectories = $true      
      NotifyFilter = [IO.NotifyFilters]'FileName, LastWrite'
    }
    $onCreated = Register-ObjectEvent $fsw Created -SourceIdentifier FileCreated -Action {
      if ($global:counter -lt $fileLimit) { 
        $path    = $Event.SourceEventArgs.FullPath
        $dirpath = [System.IO.Path]::GetDirectoryName($path)
        $parent  = [System.IO.Path]::GetFileName($dirpath)
        $name    = $Event.SourceEventArgs.Name
        $changeType = $Event.SourceEventArgs.ChangeType
        $timeStamp = $Event.TimeGenerated
        Write-Host "The file '$name' was $changeType at $timeStamp"

        if($parent -ge $dirNameStart) {
            $files = [IO.Directory]::EnumerateFiles($dirpath,"*.txt")
            foreach ($file in $files) 
            {
                processFile($file)
            }
        }
        else {
          Write-Host "Ignoring $name because it was added to $parent, which is before $dirNameStart."
        }
      }
    }
}

0 个答案:

没有答案