为什么通过HttpURLConnection getInputStream的内容与Linux命令curl或wget的内容不同?

时间:2014-11-06 05:00:12

标签: java xml curl wget

代码:

private static final String HTTP_URL =
        "http://stackoverflow.com/feeds/tag?tagnames=android&sort=newest";
/**
 * @param args
 */
public static void main(String[] args) {
    // TODO Auto-generated method stub

    java.net.URL url = null;

    try {
        url = new java.net.URL(HTTP_URL);
        URLConnection conn = null;
        conn = url.openConnection();
        conn.setConnectTimeout(15000);
        conn.setReadTimeout(10000);
        conn.setDoInput(true);
        conn.connect();
        InputStream in = conn.getInputStream();
        BufferedInputStream bis = new BufferedInputStream(in);

        FileOutputStream fos = new FileOutputStream(new File("tmp.xml"));

        byte[] buffer = new byte[2*1024];
        int bytesRead;
        while ((bytesRead = bis.read(buffer)) > -1) {
            fos.write(buffer, 0, bytesRead);
        }
        fos.flush();
        fos.close();
    } catch (MalformedURLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

获取内容:

<?xml version="1.0" encoding="utf-8"?>

<feed xmlns="http://www.w3.org/2005/Atom" xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule" xmlns:re="http://purl.org/atompub/rank/1.0">
    <title type="text">Newest questions tagged android - Stack Overflow</title>
    <link rel="self" href="http://stackoverflow.com/feeds/tag?tagnames=android&amp;sort=newest" type="application/atom+xml" />
    <link rel="alternate" href="http://stackoverflow.com/questions/tagged/?tagnames=android&amp;sort=newest" type="text/html" />
    <subtitle>most recent 30 from stackoverflow.com</subtitle>
    <updated>2014-11-06T03:40:16Z</updated>
    <id>http://stackoverflow.com/feeds/tag?tagnames=android&amp;sort=newest</id>
    <creativeCommons:license>http://www.creativecommons.org/licenses/by-sa/3.0/rdf</creativeCommons:license>
    <entry>
        <id>http://stackoverflow.com/q/26771304</id>
        <re:rank scheme="http://stackoverflow.com">0</re:rank>
        <title type="text">What is the correct data/operation flow of the new Camera2 api?</title>
            <category scheme="http://stackoverflow.com/tags" term="android" />
            <category scheme="http://stackoverflow.com/tags" term="multithreading" />
            <category scheme="http://stackoverflow.com/tags" term="opengl" />
            <category scheme="http://stackoverflow.com/tags" term="android-camera" />
            <category scheme="http://stackoverflow.com/tags" term="android-5.0-lollipop" />
        <author>
            <name>user977289</name>
            <uri>http://stackoverflow.com/users/977289</uri>
        </author>
        <link rel="alternate" href="http://stackoverflow.com/questions/26771304/what-is-the-correct-data-operation-flow-of-the-new-camera2-api" />
        <published>2014-11-06T03:36:53Z</published>
        <updated>2014-11-06T03:36:53Z</updated>
        <summary type="html">

使用wget获取内容:

<!DOCTYPE html>
<html itemscope itemtype="http://schema.org/QAPage">
<head>
    <title>Newest &#39;android&#39; Questions - Stack Overflow</title>
    <link rel="shortcut icon" href="//cdn.sstatic.net/stackoverflow/img/favicon.ico?v=038622610830">
    <link rel="apple-touch-icon image_src" href="//cdn.sstatic.net/stackoverflow/img/apple-touch-icon.png?v=fd7230a85918">
    <link rel="search" type="application/opensearchdescription+xml" title="Stack Overflow" href="/opensearch.xml">
    <meta name="twitter:card" content="summary">
    <meta name="twitter:domain" content="stackoverflow.com"/>
    <meta property="og:type" content="website" />
    <meta property="og:image" itemprop="image primaryImageOfPage" content="http://cdn.sstatic.net/stackoverflow/img/apple-touch-icon@2.png?v=fde65a5a78c6" />
    <meta name="twitter:title" property="og:title" itemprop="title name" content="Newest &amp;#39;android&amp;#39; Questions" />
    <meta name="twitter:description" property="og:description" itemprop="description" content="Q&amp;A for professional and enthusiast programmers" />
    <meta property="og:url" content="http://stackoverflow.com/questions/tagged/?tagnames=android&amp;sort=newest"/>

    <script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
    <script src="//cdn.sstatic.net/Js/stub.en.js?v=7bd5359278db"></script>
    <link rel="stylesheet" type="text/css" href="//cdn.sstatic.net/stackoverflow/all.css?v=847253e69e95">

    <link rel="alternate" type="application/atom+xml" title="newest android questions feed" href="/feeds/tag?tagnames=android&amp;sort=newest" />

    <script>
        StackExchange.init({"locale":"en","stackAuthUrl":"https://stackauth.com","serverTime":1415242159,"networkMetaHostname":"meta.stackexchange.com","routeName":"Questions/ListByTag","styleCode":true,"enableUserHovercards":true,"snippets":{"enabled":true,"domain":"stacksnippets.net"},"site":{"name":"Stack Overflow","description":"Q&A for professional and enthusiast programmers","isNoticesTabEnabled":true,"recaptchaPublicKey":"6LdchgIAAAAAAJwGpIzRQSOFaO0pU6s44Xt8aTwc","recaptchaAudioLang":"en","enableNewTagCreationWarning":true,"nonAsciiTags":true,"enableSocialMediaInSharePopup":true},"user":{"fkey":"43e8d647b5e1e33123144dc39f3efa41","isAnonymous":true,"ab":{"tutorial_tooltips":{"v":"b","g":1},"homepage_recommended_algo":{"v":"c","g":1}}}});
        StackExchange.using.setCacheBreakers({"js/prettify-full.en.js":"73a6d05be8f7","js/moderator.en.js":"3bf88d4bfb80","js/full-anon.en.js":"f36061371874","js/full.en.js":"91631f8e1ecf","js/wmd.en.js":"bd137db5e76f","js/third-party/jquery.autocomplete.min.js":"e5f01e97f7c3","js/third-party/jquery.autocomplete.min.en.js":"","js/mobile.en.js":"6f90cb09d971","js/help.en.js":"56d0a3a560ae","js/tageditor.en.js":"fb6ca50266c2","js/tageditornew.en.js":"f6e3914262e8","js/inline-tag-editing.en.js":"1a5cabd8e30a","js/revisions.en.js":"989c474e85a9","js/review.en.js":"cce796af5ac7","js/tagsuggestions.en.js":"bb4721d888d2","js/post-validation.en.js":"ecf3ea435618","js/explore-qlist.en.js":"4e6f969d97ee","js/events.en.js":"e3f42c90a83b","js/keyboard-shortcuts.en.js":"feb9597a8451","js/external-editor.en.js":"ea8052d19ea8","js/external-editor.en.js":"ea8052d19ea8","js/snippet-javascript.en.js":"f32a62fbdc0b","js/snippet-javascript-codemirror.en.js":"7e172e0797e2"});
        StackExchange.using("gps", function() {
             StackExchange.gps.init(true);
        });
    </script>

    <script>
        StackExchange.ready(function () {
            $('#nav-tour').click(function () {
                StackExchange.using("gps", function() {
                    StackExchange.gps.track("aboutpage.click", { aboutclick_location: "headermain" }, true);
                });
            });
        });
    </script>
</head>

<body class="tagged-questions-page new-topbar" lang="">
    <noscript><div id="noscript-padding"></div></noscript>
    <div id="notify-container"></div>
    <div id="overlay-header"></div>
    <div id="custom-header"></div>

它们完全不同,为什么?如何使用任何工具获取XML格式内容?

1 个答案:

答案 0 :(得分:1)

唯一可能导致stackoverflow服务器响应不同结果的是您使用Java或wget发送的HTTP头(例如Accept或User-Agent)。因此,您必须在两种方案中检查HTTP标头。要在wget中查看它们,可以使用以下参数运行它:

wget -d http://stackoverflow.com/feeds/tag?tagnames=android&sort=newest

-d参数强制wget命令打印请求和响应HTTP标头,以便您可以将它们与Java版本中的相应值进行比较。