我想获得WB Webpge的所有新闻标题和简短描述,但它返回“字符(0)”。
这是我所做的:
def wen1(df):
return df.set_index('A').B.apply(pd.Series).stack().reset_index(level=0).rename(columns={0: 'B'})
def wen2(df):
return pd.DataFrame({'A':df.A.repeat(df.B.str.len()),'B':np.concatenate(df.B.values)})
def wen3(df):
s = pd.DataFrame({'B': np.concatenate(df.B.values)}, index=df.index.repeat(df.B.str.len()))
return s.join(df.drop('B', 1), how='left')
def wen4(df):
return pd.DataFrame([[x] + [z] for x, y in df.values for z in y],columns=df.columns)
def chris1(df):
vals = np.array(df.B.values.tolist())
a = np.repeat(df.A, vals.shape[1])
return pd.DataFrame(np.column_stack((a, vals.ravel())), columns=df.columns)
def chris2(df):
vals = df.B.values.tolist()
rs = [len(r) for r in vals]
a = np.repeat(df.A.values, rs)
return pd.DataFrame(np.column_stack((a, np.concatenate(vals))), columns=df.columns)
我尝试了一些类,xpath等,但是没有结果。请指教!非常感谢!
答案 0 :(得分:0)
BIG加载延迟微调器实际上是指示器,它们指示它们通过XHR请求异步加载内容(如果您对Web抓取进行了任何研究,那么这是Web和SO上广泛涉及的主题)
如果在浏览器上打开开发人员工具并重新加载该网站,则可以看到请求:
如果您右键单击突出显示的URL,甚至还有一个方便的“复制URL”(或类似的名称-差异浏览器会进行差异化操作)。
但是,在大多数浏览器中也有一个“复制为cURL”,您可以使用curlconverter
来自动创建httr
动词功能(阅读文档并尝试使用它,或者在SO中搜索示例,例如我已经发布了吨):
httr::POST(
url = "http://search.worldbank.org/api/v2/news",
httr::add_headers(
`User-Agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0",
Accept = "application/json, text/plain, */*",
`Accept-Language` = "en-US,en;q=0.7,fr-BE;q=0.3",
Referer = "http://www.worldbank.org/en/news/all?displayconttype_exact=Speeches+and+Transcripts&lang_exact=English&qterm=",
Origin = "http://www.worldbank.org",
DNT = "1", Connection = "keep-alive",
`Cache-Control` = "max-age=0"
),
body = list(),
query = list(
format = "json",
rows = "20",
fct = "displayconttype_exact,topic_exact,lang_exact,count_exact,countcode_exact,admreg_exact",
src = "cq55",
apilang = "en",
displayconttype_exact = "Speeches+and+Transcripts",
lang_exact = "English",
qterm = ""
)
) -> res
str(httr::content(res)$documents, 2)
## List of 21
## $ 0f3dcce9e42a42509f35c1a3744f6768954e82ff:List of 20
## ..$ url : chr "http://www.worldbank.org/en/news/speech/2018/11/05/world-bank-group-president-jim-yong-kims-remarks-at-the-open"| __truncated__
## ..$ id : chr "0f3dcce9e42a42509f35c1a3744f6768954e82ff"
## ..$ title :List of 1
## ..$ descr :List of 1
## ..$ wcmsource : chr "cq5"
## ..$ cqpath : chr "/content/wb-home/en/news/speech/2018/11/05/world-bank-group-president-jim-yong-kims-remarks-at-the-opening-cere"| __truncated__
## ..$ content :List of 1
## ..$ content_1000 :List of 1
## ..$ admreg : chr "East Asia and Pacific"
## ..$ displayconttype : chr "Speeches and Transcripts"
## ..$ country : chr "China"
## ..$ originating_unit: chr "East Asia and Pacific, EAP"
## ..$ countcode : chr "CN"
## ..$ count : chr "China"
## ..$ regionname : chr "East Asia and Pacific"
## ..$ topic : chr "Trade"
## ..$ conttype : chr "Speeches and Transcripts"
## ..$ lang : chr "English"
## ..$ keywd : chr "regions:East Asia and Pacific,country:China,subject:trade"
## ..$ lnchdt : chr "2018-11-05T17:11:00Z"
## $ 82c9d8ffa337303aa54b7cf23fabc062c76a3fab:List of 13
## ..$ url : chr "http://www.worldbank.org/en/news/speech/2018/11/05/remarks-at-eu-high-level-conference-the-single-market-as-a-d"| __truncated__
## ..$ id : chr "82c9d8ffa337303aa54b7cf23fabc062c76a3fab"
## ..$ title :List of 1
## ..$ descr :List of 1
## ..$ wcmsource : chr "cq5"
## ..$ cqpath : chr "/content/wb-home/en/news/speech/2018/11/05/remarks-at-eu-high-level-conference-the-single-market-as-a-driver-of"| __truncated__
## ..$ content :List of 1
## ..$ content_1000 :List of 1
## ..$ displayconttype : chr "Speeches and Transcripts"
## ..$ originating_unit: chr "External and Corporate Relations, ECR"
## ..$ conttype : chr "Speeches and Transcripts"
## ..$ lang : chr "English"
## ..$ lnchdt : chr "2018-11-05T09:59:00Z"
## $ 527850d45d63a534497c985b0ff71970812668f9:List of 20
## ..$ url : chr "http://www.worldbank.org/en/news/speech/2018/11/01/world-bank-group-president-jim-yong-kim-remarks-at-the-inter"| __truncated__
## ..$ id : chr "527850d45d63a534497c985b0ff71970812668f9"
## ..$ title :List of 1
## ..$ descr :List of 1
## ..$ wcmsource : chr "cq5"
## ..$ cqpath : chr "/content/wb-home/en/news/speech/2018/11/01/world-bank-group-president-jim-yong-kim-remarks-at-the-international"| __truncated__
## ..$ content :List of 1
## ..$ content_1000 :List of 1
## ..$ admreg : chr "East Asia and Pacific"
## ..$ displayconttype : chr "Speeches and Transcripts"
## ..$ country : chr "China"
## ..$ originating_unit: chr "East Asia and Pacific, EAP"
## ..$ countcode : chr "CN"
## ..$ count : chr "China"
## ..$ regionname : chr "East Asia and Pacific"
## ..$ topic : chr "Poverty,Economic Growth"
## ..$ conttype : chr "Speeches and Transcripts"
## ..$ lang : chr "English"
## ..$ keywd : chr "subject:poverty,People:Jim Yong Kim,subject:economic growth,regions:East Asia and Pacific,country:China"
## ..$ lnchdt : chr "2018-11-01T15:15:00Z"
## $ 5bf8d923c6defbb00be5eb454a0505168ce38d4f:List of 13
## ..$ url : chr "http://www.worldbank.org/en/news/speech/2018/10/31/opening-remarks-by-country-director-for-western-balkans-lind"| __truncated__
## ..$ id : chr "5bf8d923c6defbb00be5eb454a0505168ce38d4f"
## ..$ title :List of 1
## ..$ descr :List of 1
## ..$ wcmsource : chr "cq5"
## ..$ cqpath : chr "/content/wb-home/en/news/speech/2018/10/31/opening-remarks-by-country-director-for-western-balkans-linda-van-ge"| __truncated__
## ..$ content :List of 1
## ..$ content_1000 :List of 1
## ..$ displayconttype : chr "Speeches and Transcripts"
## ..$ originating_unit: chr "Europe and Central Asia, ECA"
## ..$ conttype : chr "Speeches and Transcripts"
## ..$ lang : chr "English"
## ..$ lnchdt : chr "2018-10-31T13:51:00Z"
## $ 3c00ec85c4ba38a7952a1a13b2c00a789f7eebd3:List of 13
## ..$ url : chr "http://www.worldbank.org/en/news/speech/2018/10/31/remarks-by-cyril-muller-on-investing-in-human-capital-for-inclusive-growth"
## ..$ id : chr "3c00ec85c4ba38a7952a1a13b2c00a789f7eebd3"
## ..$ title :List of 1
## ..$ descr :List of 1
## ..$ wcmsource : chr "cq5"
## ..$ cqpath : chr "/content/wb-home/en/news/speech/2018/10/31/remarks-by-cyril-muller-on-investing-in-human-capital-for-inclusive-growth"
## ..$ content :List of 1
## ..$ content_1000 :List of 1
## ..$ displayconttype : chr "Speeches and Transcripts"
## ..$ originating_unit: chr "Europe and Central Asia, ECA"
## ..$ conttype : chr "Speeches and Transcripts"
## ..$ lang : chr "English"
## ..$ lnchdt : chr "2018-10-31T08:04:00Z"
## $ 2b85fb0a48c1693f79b79803f659d3449a4c4348:List of 13
## ..$ url : chr "http://www.worldbank.org/en/news/speech/2018/10/24/opening-remarks-at-sovereign-debt-management-forum"
## ..$ id : chr "2b85fb0a48c1693f79b79803f659d3449a4c4348"
## ..$ title :List of 1
## ..$ descr :List of 1
## ..$ wcmsource : chr "cq5"
## ..$ cqpath : chr "/content/wb-home/en/news/speech/2018/10/24/opening-remarks-at-sovereign-debt-management-forum"
## ..$ content :List of 1
## ..$ content_1000 :List of 1
## ..$ displayconttype : chr "Speeches and Transcripts"
## ... GOES ON A LONG TIME ..
您也可以(大部分时间)使用纯URL,但是httr
/ curlconverter
方法使您可以将其包装在函数中并进行参数化。