我正在使用vb.net,我使用以下代码
提取url xml文件 Dim PMIDList As String = "25241892,25451079"
Dim sb As New StringBuilder
Dim sw As New StringWriter(sb)
Dim writer As JsonWriter = New JsonTextWriter(sw)
Dim url As String = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" + PMIDList + "&rettype=fasta&retmode=xml"
Dim pmid As String = ""
Dim pmcid As String = ""
Dim nihmsid As String = ""
Dim inStream As StreamReader
Dim webRequest As WebRequest
Dim webresponse As WebResponse
webRequest = webRequest.Create(url)
webresponse = webRequest.GetResponse()
inStream = New StreamReader(webresponse.GetResponseStream())
Dim response As String = inStream.ReadToEnd
Dim pubXML As String = ""
Using reader As XmlTextReader = New XmlTextReader(New StringReader(response))
While reader.ReadToFollowing("PubmedArticle") 'Read till citation
我可以把我想要的元素拉出来 reader.ReadToFollowing(" ArticleIds")' Go to First ArticlesId 而reader.Read()
If reader.Value = "pubmed" Then 'Get
reader.ReadToFollowing("Value")
pmid = reader.ReadInnerXml()
End If
If reader.Value = "pmc" Then
reader.ReadToFollowing("Value")
pmcid = reader.ReadInnerXml()
End If
If reader.Value = "mid" Then
reader.ReadToFollowing("Value")
nihmsid = reader.ReadInnerXml()
End If
If reader.Name = "History" Then Exit While 'Exit loop End of ArticleIds
End While
但我也想保存整个PubmedArticle节点。我知道XMLTextreader只是正向读取,但有一种方法可以使用下面的pubXML字符串创建另一个读者吗?
pubXML = "<PubmedArticle>" + reader.ReadInnerXml() + "</PubmedArticle>"
我最终得到了一个黑客
Private Sub parseXMLPMID()
Dim PMIDList As String = "25241892,25451079"
Dim sb As New StringBuilder
Dim sw As New StringWriter(sb)
Dim writer As JsonWriter = New JsonTextWriter(sw)
Dim url As String = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" + PMIDList + "&rettype=fasta&retmode=xml"
Dim pmid As String = ""
Dim pmcid As String = ""
Dim nihmsid As String = ""
Dim inStream As StreamReader
Dim webRequest As WebRequest
Dim webresponse As WebResponse
webRequest = webRequest.Create(url)
webresponse = webRequest.GetResponse()
inStream = New StreamReader(webresponse.GetResponseStream())
Dim response As String = inStream.ReadToEnd
Dim pubXML As String = ""
Dim myEncoder As New System.Text.UTF8Encoding
Using reader As XmlTextReader = New XmlTextReader(New StringReader(response))
While reader.ReadToFollowing("PubmedArticle") 'Read till citation
pubXML = reader.ReadOuterXml()
Dim bytes As Byte() = myEncoder.GetBytes(pubXML)
Dim ms As MemoryStream = New MemoryStream(bytes)
Dim stream_reader As New StreamReader(ms)
While stream_reader.Peek() >= 0
Try
Dim line As String = stream_reader.ReadLine()
If line.Contains("<ArticleId IdType=""pubmed"">") Then
pmid = Strip_Line(line)
End If
If line.Contains("<ArticleId IdType=""pmc"">") Then
pmcid = Strip_Line(line)
End If
If line.Contains("<ArticleId IdType=""mid"">") Then
nihmsid = Strip_Line(line)
End If
Catch ex As Exception
End Try
End While
MessageBox.Show(pmid + " " + pmcid + " " + nihmsid + " " + pubXML)
End While
End Using
End Sub
带状线只是拉出内部文字。我宁愿拥有干净的代码