我在Excel上有一个包含产品名称列表的电子表格。我想要做的是(1)将每个产品名称分隔5行,(2)设置一个网站搜索,从给定的网站(clinicaltrials.gov)中提取数据,并将其填入每个电子表格下面的行中。 / p>
(2)对我来说更重要,更具挑战性。我知道我必须运行一个遍历所有产品名称的循环。但在关注循环之前,我需要帮助找出如何编写执行网站搜索的代码。
我收到了一些帮助:
以下Excel VBA代码snipet将使用以下形式构建URL的Cell:
="URL;http://clinicaltrials.gov/ct2/show?term="& [Cell Reference to Drug name here] &"&rank=1"
输出4行,如:
Estimated Enrollment: 40
Study Start Date: Jan-11
Estimated Study Completion Date: Apr-12
Estimated Primary Completion Date: April 2012 (Final data collection date for primary outcome measure)
With ActiveSheet.QueryTables.Add(Connection:= _ ActiveCell.Text, Destination:=Cells(ActiveCell.Row, ActiveCell.Column + 1)) .Name = "Clinical Trials" .FieldNames = True .RowNumbers = False .FillAdjacentFormulas = False .PreserveFormatting = True .RefreshOnFileOpen = False .BackgroundQuery = True .RefreshStyle = xlInsertDeleteCells .SavePassword = False .SaveData = True .AdjustColumnWidth = True .RefreshPeriod = 0 .WebSelectionType = xlSpecifiedTables .WebFormatting = xlWebFormattingNone .WebTables = "12" .WebPreFormattedTextToColumns = True .WebConsecutiveDelimitersAsOne = True .WebSingleBlockTextImport = False .WebDisableDateRecognition = False .WebDisableRedirections = False .Refresh BackgroundQuery:=False End With
答案 0 :(得分:1)
您提供的网址无效。您需要NCT ID才能到达正确的页面,而不是药物名称。假设您在A1:B2中列出了两种药物,并且正确的NCT ID在B列中
celebrex NCT00571701
naproxen NCT00586365
要使用此代码,请设置对Microsoft XML 5.0库和Microsoft Forms 2.0库的引用。
Sub GetClinical()
Dim i As Long
Dim lLast As Long
Dim oHttp As MSXML2.XMLHTTP50
Dim sHtml As String
Dim lDataStart As Long, lTblStart As Long, lTblEnd As Long
Dim doClip As DataObject
'Find the last cell in column A
lLast = Sheet1.Cells(Sheet1.Rows.Count, 1).End(xlUp).Row
Set oHttp = New MSXML2.XMLHTTP50
'Loop from the last cell to row 1 in column A
For i = lLast To 1 Step -1
'Insert 5 rows below
Sheet1.Cells(i, 1).Offset(1, 0).Resize(5).EntireRow.Insert
'get the web page
oHttp.Open "GET", "http://clinicaltrials.gov/ct2/show/" & Sheet1.Cells(i, 2).Value & "?rank=1"
oHttp.send
sHtml = oHttp.responseText
'Find the start and end to the table
lDataStart = InStr(1, sHtml, "Estimated Enrollment:")
lTblStart = InStr(lDataStart - 200, sHtml, "<table")
lTblEnd = InStr(lDataStart, sHtml, "</table>") + 8
'put the table in the clipboard
Set doClip = New DataObject
doClip.SetText Mid$(sHtml, lTblStart, lTblEnd - lTblStart)
doClip.PutInClipboard
'paste the table as text
Sheet1.Cells(i, 1).Offset(1, 0).Select
Sheet1.PasteSpecial "Text", , , , , , True
Next i
End Sub
如果您没有NCT号码,我认为您无法构建可行的URL。另请注意,我通过查找特定字符串找到该表(估计注册: - 注意其间的两个空格)并备份200个字符。 200是任意的,但对Celebrex和naproxen都有效。我不能保证他们的格式会一致。他们不使用表格ID,因此很难找到正确的表格。
在运行改变数据的代码之前,请始终备份数据。
答案 1 :(得分:0)
如果您运行搜索并查看结果页面的底部,您会看到可以选择以各种格式下载结果。例如,此网址将以制表符分隔的格式下载所有氟西汀结果:
http://clinicaltrials.gov/ct2/results/download?down_stds=all&down_flds=all&down_fmt=tsv&term=fluoxetine
唯一的复杂因素是结果是压缩的,因此您需要先保存文件并解压缩。幸运的是,我已经不得不这样做了......在与工作簿相同的文件夹中创建一个名为“files”的文件夹,然后添加此代码并测试它。对我来说工作正常。
Option Explicit
Sub Tester()
FetchUnzipOpen "fluoxetine"
End Sub
Sub FetchUnzipOpen(DrugName As String)
Dim s, sz 'don't dim these as strings-must be variants!
s = ThisWorkbook.Path & "\files"
sz = s & "\test.zip"
FetchFile "http://clinicaltrials.gov/ct2/results/download?down_stds=all&" & _
"down_flds=all&down_fmt=tsv&term=" & DrugName, sz
Unzip s, sz
'now you just need to open the data file (files/search_result.txt)
End Sub
Sub FetchFile(sURL As String, sPath)
Dim oXHTTP As Object
Dim oStream As Object
Set oXHTTP = CreateObject("MSXML2.XMLHTTP")
Set oStream = CreateObject("ADODB.Stream")
Application.StatusBar = "Fetching " & sURL & " as " & sPath
oXHTTP.Open "GET", sURL, False
oXHTTP.send
With oStream
.Type = 1 'adTypeBinary
.Open
.Write oXHTTP.responseBody
.SaveToFile sPath, 2 'adSaveCreateOverWrite
.Close
End With
Set oXHTTP = Nothing
Set oStream = Nothing
Application.StatusBar = False
End Sub
Sub Unzip(sDest, sZip)
Dim o
Set o = CreateObject("Shell.Application")
o.NameSpace(sDest).CopyHere o.NameSpace(sZip).Items
End Sub