我有一点小问题让我困扰了很长一段时间。
我试图从RSS网站获取信息。我下载了XML并完成了它。我只想要它的某些东西。我使用的是.document.getElementByID()。outerText
问题在于它以某种方式正确地提取了第一个信息,但之后一切都失败了,他只是选择随机文本或者只是从头开始保留一个文本而不刷新变量。另外,Powershell ISE说“你不能在空值表达式上调用方法。”随机
这是我的代码:
<#
AUTHOR: KOCH,MICHAEL [GRE-IT]
DESCRIPTION: RSS READER
DATE: 28.06.17
DATE LAST WRITTEN: 19.07.17
LAST CHANGE:
#>
$debug = 1 #DEBUG
$receiver="A@MailAdress.com"
$sender="A@MailAdress.com"
$smtp="A.SMTP.SERVER"
$encoding = [System.Text.Encoding]::UTF8
$path_config = "C:\RSS\Zoll\config.txt"
$output = "C:\RSS\Zoll\meldung.html"
$output_edit_path = "C:\RSS\Zoll\meldung_edit.html"
$nmbr=0
$count=0
Invoke-WebRequest -Uri 'http://www.zoll.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSZollImFokus.xml' -OutFile C:\RSS\Zoll\meldungen.xml -ErrorAction Stop
[xml]$content = Get-Content C:\RSS\Zoll\meldungen.xml
$feed = $content.rss.channel
$tag = @()
if($lines=Get-Content $path_config | Measure-Object -Line) #gets the number of lines
{
while($count -ne $lines.Lines)
{
if($entrys=(Get-Content $path_config)[$nmbr]) #gets the entrys from config.txt and goes through line by line
{
$entrys >> $tag[$nmbr]
if ($debug -eq 1)
{
Write-Output "$tag[$nmbr]"
Write-Output "$entrys"
Write-Output "$count"
}
}
$count++
$nmbr++ #jumps into the next line
}
}
$ie = New-Object -ComObject "InternetExplorer.Application"
Foreach($msg in $feed.Item)
{
$link = ($msg.link)
$subject = ($msg.title)
$ie.navigate("$link")
#$return = Invoke-WebRequest -Uri $link -OutFile "C:\RSS\Zoll\link.html"
$return = $ie.document
$innertext = $return.documentElement.document.getElementById("main").outerText
$body = $innertext#.Replace('Ä', 'Ä')
<#
$body = $innertext.Replace('ä', 'ä')
$body = $innertext.Replace('Ö', 'Ö')
$body = $innertext.Replace('ö', 'ö')
$body = $innertext.Replace('Ü', 'Ü')
$body = $innertext.Replace('ü', 'ü')
$body = $innertext.Replace('ß', 'ß')
#>
if ($debug -eq 1)
{
Write-Output "Subject $subject"
Write-Output "Tag $tag"
Write-Output "Link $link"
Write-Output $body
#exit
}
if($link -match "Zigaretten") #searchs in the <link> for the string "Zigaretten"
{
if($subject -match $tag) #searches for the specified tag in config.txt !!! only one argument per line !!!
{
if($mail = Send-MailMessage -From "$sender" -To "$receiver" -Subject "Zoll Meldung: $subject" -Body "$body" -SmtpServer "$smtp" -BodyAsHtml -encoding $encoding)
{
if($debug -eq 1)
{
Write-Output "$tag"
Write-Output "Send. Tag = $tag"
}
Write-Output "Send."
}
}
}
else
{
Write-Host "Empty."
}
}
$ie.Quit()
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($ie)
Remove-Variable ie
答案 0 :(得分:1)
添加了一个wait for busy循环以确保IE加载完整的html文档。多数民众赞成解决问题! :)
<#
AUTHOR: KOCH,MICHAEL [GRE-IT]
DESCRIPTION: RSS READER
DATE: 28.06.17
DATE LAST WRITTEN: 20.07.17
LAST CHANGE: ADDED WAIT IF BUSY !
#>
$debug = 0 #DEBUG
$receiver="A@MailAdress.de"
$sender="A@MailAdress.de"
$smtp="A.SMTP.SERVER"
$encoding = [System.Text.Encoding]::UTF8
$path_config = "C:\RSS\Zoll\config.txt"
$output = "C:\RSS\Zoll\meldung.html"
$output_edit_path = "C:\RSS\Zoll\meldung_edit.html"
$nmbr=0
$count=0
Invoke-WebRequest -Uri 'http://www.zoll.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSZollImFokus.xml' -OutFile C:\RSS\Zoll\meldungen.xml -ErrorAction Stop
[xml]$content = Get-Content C:\RSS\Zoll\meldungen.xml
$feed = $content.rss.channel
$tag = @()
if($lines=Get-Content $path_config | Measure-Object -Line) #gets the number of lines
{
while($count -ne $lines.Lines)
{
if($entrys=(Get-Content $path_config)[$nmbr]) #gets the entrys from config.txt and goes through line by line
{
$entrys >> $tag[$nmbr]
if ($debug -eq 1)
{
Write-Output "$tag[$nmbr]"
Write-Output "$entrys"
Write-Output "$count"
}
}
$count++
$nmbr++ #jumps into the next line
}
}
$ie = New-Object -ComObject InternetExplorer.Application #creates new ComObject IE
Foreach($msg in $feed.Item)
{
$link = ($msg.link)
$subject = ($msg.title)
if ($debug -eq 1)
{
$ie.visible = $true
}
$ie.navigate("$link") #navigate with Internetexplorer to the website
while ($ie.busy -and $ie.ReadyState -ne 4){ sleep -Milliseconds 200 } # if getting the website from IE.navigate is still .busy wait 200 milliseconds
$return = $ie.document
$innertext = $return.documentelement.document.IHTMLDocument3_getElementById("main").outerText #gets the outer text from the div with the element ID "main"
while ($innertext.busy -and $innertext.ReadyState -ne 4){ sleep -Milliseconds 200 } # if getting Text is .busy wait 200 milliseconds
$body = $innertext
if ($debug -eq 1)
{
Write-Output "Subject $subject"
Write-Output "Tag $tag"
Write-Output "Link $link"
Write-Output "INNERTEXT $innertext"
Write-Output "BODY $body"
#exit
}
if($link -match "Zigaretten") #searchs in the <link> for the string "Zigaretten"
{
if($subject -match $tag) #searches for the specified tag in config.txt !!! only one argument per line !!!
{
if($mail = Send-MailMessage -From "$sender" -To "$receiver" -Subject "Zoll Meldung: $subject" -Body "$body" -SmtpServer "$smtp" -BodyAsHtml -encoding $encoding)
{
Write-Output "Send."
}
}
}
else
{
Write-Host "Empty."
}
}
$ie.Quit() #----|
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($ie) # ---> Quits the Internet Explorer Session otherwise there are to many IE.exe open and no more ID's left
Remove-Variable ie