我正在编写一个Web爬网程序,使用30个goroutine并行爬网程序URL,该程序运行一段时间后会出现紧急情况,这是示例代码和紧急情况详细信息。 没有ioutil.ReadAll会读取httpResponse正文。
示例代码
import (
"io/ioutil"
"log"
"net/http"
"golang.org/x/sync/semaphore"
)
func main(){
urls:=make([]string,100)
crawl(urls)
}
func crawl(urls []string){
sem := semaphore.NewWeighte(30)
for _,u:=range urls {
c, _ := context.WithTimeout(context.Background(), 10*time.Second)
sem.Acquire(c, 1)
go func(u string){
defer sem.Release(1)
b,e:=down(u)
if e==nil{
log.Println(string(b))
}else{
log.Println("Error ",e)
}
}(u)
}
}
func down(u string) ([]byte,error){
resp,err:=http.Get(u)
if err!=nil {
return nil,err
}
defer resp.Body.Close()
return ioutil.ReadAll(resp.Body)
}
panic:
goroutine 14509 [sync.Cond.Wait]:
runtime.goparkunlock(...)
runtime/proc.go:307
sync.runtime_notifyListWait(0xc0000c2400, 0xc000000068)
runtime/sema.go:510 +0xf9
sync.(*Cond).Wait(0xc0000c23f0)
sync/cond.go:56 +0x9e
net/http.(*http2pipe).Read(0xc0000c23e8, 0xc011471bef, 0x90211, 0x90211, 0x0, 0x0, 0x0)
net/http/h2_bundle.go:3511 +0x8a
net/http.http2transportResponseBody.Read(0xc0000c23c0, 0xc011471bef, 0x90211, 0x90211, 0x0, 0x0, 0x0)
net/http/h2_bundle.go:8375 +0x95
bytes.(*Buffer).ReadFrom(0xc00046ee60, 0x7ff1d838ff40, 0xc0000c23c0, 0xc000136000, 0xc0008b02d0, 0xc004c05e88)
bytes/buffer.go:207 +0xbd
io/ioutil.readAll(0x7ff1d838ff40, 0xc0000c23c0, 0x200, 0x0, 0x0, 0x0, 0x0, 0x0)
io/ioutil/ioutil.go:36 +0xe5
io/ioutil.ReadAll(...)
io/ioenter code hereutil/ioutil.go:45