这是我想要用于特定于页面的Web爬网程序的代码的精简版本。我们的想法是拥有一个获取URL的函数,处理HTTP并将Reader
返回给响应正文http.Response
:
package main
import (
"io"
"log"
"net/http"
"os"
)
func main() {
const url = "https://xkcd.com/"
r, err := getPageContent(url)
if err != nil {
log.Fatal(err)
}
f, err := os.Create("out.html")
if err != nil {
log.Fatal(err)
}
defer f.Close()
io.Copy(f, r)
}
func getPageContent(url string) (io.Reader, error) {
res, err := http.Get(url)
if err != nil {
return nil, err
}
return res.Body, nil
}
响应主体永远不会关闭,这很糟糕。当然,getPageContent
函数内的io.Copy
功能无法正常工作,因为<script src="http://code.jquery.com/jquery-1.11.0.min.js"></script>
<script>
var btcs = new WebSocket('wss://ws.blockchain.info/inv');
btcs.onopen = function()
{
btcs.send( JSON.stringify( {"op":"addr_sub", "addr":"{{$sendto}}"} ) );
};
btcs.onmessage = function(onmsg)
{
var response = JSON.parse(onmsg.data);
var getOuts = response.x.out;
var countOuts = getOuts.length;
for($usd = 0; usd < countOuts; usd+++)
{
//check every output to see if it matches specified address
var outAdd = response.x.out[usd].addr;
var specAdd = "{{$sendto}}";
if (outAdd == specAdd )
{
var amount = response.x.out[i].value;
var calAmount = amount / 100000000;
$('#messages').prepend("Received " + calAmount + " BTC");
</script>
无法从封闭资源中读取任何内容。
我的问题不仅仅是针对特定用例的问题:如何使用函数抽象外部资源的收集而无需将整个资源存储在临时缓冲区中?或者我应该更好地避免这种抽象?
答案 0 :(得分:0)
正如评论部分中的用户 leaf bebop 所指出的,函数getPageCount
应该返回io.ReadCloser
而不仅仅是io.Reader
:
package main
import (
"io"
"log"
"net/http"
"os"
)
func main() {
const url = "https://xkcd.com/"
r, err := getPageContent(url)
if err != nil {
log.Fatal(err)
}
defer r.Close()
f, err := os.Create("out.html")
if err != nil {
log.Fatal(err)
}
defer f.Close()
io.Copy(f, r)
}
func getPageContent(url string) (io.ReadCloser, error) {
res, err := http.Get(url)
if err != nil {
return nil, err
}
return res.Body, nil
}
答案 1 :(得分:0)
其他解决方案是您可以直接返回响应并在main函数中关闭它。通常,如果有新要求,您可以对响应StatusCode等进行检查。这是更新的代码:
package main
import (
"io"
"log"
"net/http"
"os"
)
func main() {
const url = "https://xkcd.com/"
r, err := getPageContent(url)
if err != nil {
log.Fatal(err)
}
defer r.Body.Close()
if r.StatusCode !=http.StatusOK{
//some operations
}
f, err := os.Create("out.html")
if err != nil {
log.Fatal(err)
}
defer f.Close()
io.Copy(f, r.Body)
}
func getPageContent(url string) (*http.Response, error) {
res, err := http.Get(url)
if err != nil {
return nil, err
}
return res, nil
}