当我读取1000条记录文件时,每10到20条记录都会出现错误消息:
console.log(exampleCategory)
这是我的代码
elems = driver.find_elements_by_xpath(alllinks)
previousTitle = ''
for elem in elems:
url = elem.get_attribute("href")
links.append(url)
driver.get(url)
# make sure to wait until the title is changed (no issue until 2 urls have same title)
wait.until_not(EC.title_is(previousTitle))
previousTitle = driver.title
答案 0 :(得分:1)
好像您的服务器正在关闭连接。检查是否有超时,以及为什么请求需要这么长时间。 您可以异步处理scanner.Text(),因此不会阻止您的扫描以使searchAPI响应,并且请求正文不会打开太长时间。
resp.Header().Set("Content-Type", "text/plain")
scanner := bufio.NewScanner(req.Body)
ctx := context.Background()
for scanner.Scan() {
itemID := scanner.Text()
go func(itemID string){
category := api.SearchAPI.FindCategory(itemID, lang, ctx)
_, _ = fmt.Fprintf(resp, "%v,%v \n", itemID, category)
count++ //ENSURE YOU HAVE AN ATOMIC COUNTER INCREMENT, OR INCREMENT AFTER itemID IS READ
}(itemID)
}
if err := scanner.Err(); err != nil {
logger.Errorf("scan file error: %v", err)
http.Error(resp, err.Error(), http.StatusBadRequest)
return
}
//.....
}
或者,您可以简单地将所有itemID收集在一个切片中,关闭请求主体,然后逐个处理它们。
resp.Header().Set("Content-Type", "text/plain")
scanner := bufio.NewScanner(req.Body)
ctx := context.Background()
itemIDs := make([]string, 0)
for scanner.Scan() {
itemID := scanner.Text()
itemIDs = append(itemIDs, itemID)
}
if err := scanner.Err(); err != nil {
logger.Errorf("scan file error: %v", err)
http.Error(resp, err.Error(), http.StatusBadRequest)
return
}
for _, itemID := range itemIDs {
category := api.SearchAPI.FindCategory(itemID, lang, ctx)
_, _ = fmt.Fprintf(resp, "%v,%v \n", itemID, category)
count++
}
//.....
}