我正在尝试使用Powershell中的Regex解析多个.Txt文件(我用VBA导出的电子邮件)。
但是文件名和匹配的Select-String的输出一直返回
System.Text.RegularExpressions.Match[]
我改变了选择
Select from Filename,Matches
到
select Filename,@{Name=’Matches‘;Expression={[string]::join(“;”, ($_.Matches))}}
但是,导出有时会多次返回具有相同匹配的结果
E.g。
fsisac.com;fsisac.com
BankofAmerica.com;BankofAmerica.com
troweprice.com;troweprice.com
#Variables
$user= $env:userprofile
$desktop = [Environment]::GetFolderPath("Desktop")
$input_path = $FileBrowser.FileNames
$output_fileDomain = "$env:userprofile\Desktop\Domains.csv"
$Removefromlist = @("1","2","3","4","5" )
#Regex
$regexDomain = ‘(\w+-\w+\.((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net))|\w+\.((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))’
$regexCleanDomain = '(\w+-\w+\[.]((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net))|\w+\[.]((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$regexDoT= '(\w+-\w+(dot)((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net))|\w+(dot)((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$RegexProxy = '()'
$regexIP = ‘\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b’
#Grabs the .txt files and Runs Regex and outputs to a .csv
GCI 'C:\Users\Powerboy2\Desktop\TestFile\ -include *.txt -Recurse | Select-String -Pattern $regexDomain,$regexCleanDomain,$regexIP,$regexDoT -AllMatches | select Filename,@{Name=’Matches‘;Expression={[string]::join(“;”, ($_.Matches))}} | Export-Csv -Path $output_fileDomain
#List of Matches that are white listed
$OKDOM=@("ibm.com", "secunia.com", "f5.com", "zionsbancorp.com", "fsisac.com", "BankofAmerica.com", "BankofUtah.com", "chase.com", "brandprotect.com", "keybank.com", "redhat.com", "virustotal.com", "adp.com", "hp.com", "outlook.com", "scotiabank.com", "dropbox.com", "github.com", "fireeye.com", "barclays.com", "capitalone.com", "53.com", "fsisac.com%fsisac.com", "outlook[.]com", "mitre.org" )
#Removes lists of Matches from the Export
$csv = Import-Csv "$env:userprofile\Desktop\Domains.csv"| ? { $OKDOM -notcontains $_.Matches}
$csv | ? { $OKDOM -notcontains $_.Matches} | Export-Csv "$env:userprofile\Desktop\Domains.csv" -NoTypeInformation
#Debuging
Import-CSV "$env:userprofile\Desktop\Domains.csv"
这是我的代码是否有人建议管道选择而不使用连接?
答案 0 :(得分:0)
#Variables
$user= $env:userprofile
$desktop = [Environment]::GetFolderPath("Desktop")
$input_path = $FileBrowser.FileNames
$output_fileDomaintemp = "$env:userprofile\Desktop\Domains.csv"
$output_file = "$env:userprofile\Desktop\Testing VBA"
$Removefromlist = @("1","2","3","4","5" )
#Regex that matches the from the .txt files
$regexCDomain = ‘(\w+-\w+\.((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))’
$regexCDomain1 = '(\w+\.((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$regexCDomain2 = '(\w+-\w+\[.]((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$regexCDomain3 = '(\w+\[.]((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$regexDoT = '(\w+(dot)((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$regexDot1 = '(\w+-\w+(dot)((\n(com|ru|eu|pw|biz|info|biz|org|net))|(com|ru|eu|pw|biz|info|biz|org|net)))'
$regexIP = ‘\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b’
#Grabs the .txt files and Runs Regex and outputs to a .csv
#GCI $output_file *.txt -Recurse | Select-String -Pattern $regexCDomain,$regexCDomain1,$regexCDomain2,$regexCDomain3,$regexDoT,$regexDot1,$regexIP -AllMatches | select Filename,@{Name=’Matches‘;Expression={[string]::join('%%' , ($_.Matches))}},MatchesExtra | Export-Csv -Path $output_fileDomaintemp
Get-ChildItem $output_file *.txt -Recurse | Select-String -Pattern $regexCDomain,$regexCDomain1,$regexCDomain2,$regexCDomain3,$regexDoT,$regexDot1,$regexIP | select Filename,@{Name=’Matches‘;Expression={$_.Matches}},Pattern | Export-Csv -Path $output_fileDomaintemp -NoTypeInformation
#Removes List of Matches that i don't care about
$OKDOM=@("key[.]com", "trustedsec.com", "autoshun.org", "proofpoint.com", "softpedia.com", "net-security.org", "jpmchase.com", "att.net", "SunTrust.com", "ciarmy.com", "dragonresearchgroup.org", "cisecurity.org", "gartner.com", "cisecurity.org", "bankunited.com", "facebook.com ", "twitter.com", "linkedin.com", "foursquare.com", "msasecurity.net", "usbank.com", "alienvault.com", "etrade.com", "jpmorgan.com", "tdameritrade.com", "ncfta.net", "iseclab.org", "malwr.com", "clean-mx.com", "db.com", "miscreantpunchers.net", "securityweek.com", "threatpost.com", "packetmail.net", "dnsdb.info", "farsightsecurity.com","mcafee.com", "surveymonkey.com", "badips.com", "agari.com", "key.com", "zurich.com", "morganstanley.com", "blackrock.com", "cisco.com", "norton.com", "ibm.com", "secunia.com", "f5.com", "zionsbancorp.com", "fsisac.com", "BankofAmerica.com", "BankofUtah.com", "chase.com", "brandprotect.com", "keybank.com", "redhat.com", "virustotal.com", "adp.com", "hp.com", "outlook.com", "scotiabank.com", "dropbox.com", "github.com", "fireeye.com", "barclays.com", "capitalone.com", "53.com", "fsisac.com%fsisac.com", "outlook[.]com", "mitre.org" )
$csv = Import-Csv "$env:userprofile\Desktop\Domains.csv"| Where { $OKDOM -notcontains $_.Matches}
$csv | Where { $OKDOM -notcontains $_.Matches} | Export-Csv "$env:userprofile\Desktop\Domains.csv" -NoTypeInformation
#Debuging
Import-CSV "$env:userprofile\Desktop\Domains.csv"