我写了一些代码
public static void main(String[] args) throws HttpException, IOException,
JSONException {
// TODO Auto-generated method stub
try {
URL murl = new URL(
"http://www.baidu.com/link?url=NaethV_J2hSPVx_OdPlHk73964mU4LcwWkJmVUV4vIkuCXRf1y09ufRZVwkHJqSAa2mMSCoTLYVhGv2AyV_04_");
HttpURLConnection conn = (HttpURLConnection) murl.openConnection();
conn.setRequestProperty(
"User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36");
conn.setConnectTimeout(10000);
conn.connect();
String strencoding = null;
Map<String, List<String>> map = conn.getHeaderFields();
Set<String> keys = map.keySet();
Iterator<String> iterator = keys.iterator();
String key = null;
String tmp = null;
while (iterator.hasNext()) {
key = iterator.next();
tmp = map.get(key).toString().toLowerCase();
if (key != null && key.equals("Content-Type")) {
System.out.println(tmp);
int m = tmp.indexOf("charset=");
if (m != -1) {
strencoding = tmp.substring(m + 8).replace("]", "");
}
}
}
strencoding = strencoding == null ? "UTF-8" : strencoding;
conn.getResponseCode();
// conn.connect();
String href = conn.getURL().toString();
System.out.println(href);
href = href.replace("http://", "");
try {
href = href.split("/")[0];
} catch (Exception eee) {
}
/*
* ParseDomainName pdn = new ParseDomainName(href);
* System.out.println("Your host IP is: " +
* pdn.getMyIP().getHostAddress());
* System.out.println("The Server IP is :" +
* pdn.getServerIP().getHostAddress()); // InputStream inputstream =
* conn.getInputStream();
*/
BufferedReader reader = new BufferedReader(new InputStreamReader(
conn.getInputStream(), strencoding));
String lines;
int i = 1;
while ((lines = reader.readLine()) != null) {
if (lines.toLowerCase().indexOf("charset") > 0) {
System.out.println(lines);
String strtmp = lines;
int inttmp = strtmp.indexOf("charset");
if (inttmp > -1) {
System.out.println(strtmp.length());
strencoding = strtmp
.substring(inttmp + 7, strtmp.length())
.replace("=", "").replace("/", "")
.replace("\"", "").replace("\'", "")
.replace(" ", "").replace("<", "")
.replace(">", "");
break;
}
}
i++;
}
reader.mark(0);
reader.reset();
reader = new BufferedReader(new InputStreamReader(
conn.getInputStream(), strencoding));
while ((lines = reader.readLine()) != null) {
System.out.println(i + " " + lines);
if (lines.toLowerCase().indexOf("icp") > 0) {
// System.out.println(i + " " + lines);
}
i++;
}
System.out.println(i + "---" + strencoding);
reader.close();
conn.disconnect();
} catch (Exception e2) {
e2.printStackTrace();
}
}
最后一个while
循环,
在第一个循环中我检查页面charset并中断,
然后我重置了读卡器,再次readLine
,
但是在seconed循环中,它从第一个循环结束的位置开始。
有时它会打印结果:
有时不会在seconed循环中读取任何内容,如下所示:
那么问题是什么?
答案 0 :(得分:1)
你必须在第一个while循环之前调用reader.mark(); reader.mark()基本上保存了阅读器的当前位置,以便您在调用reader.reset()时可以返回到该位置。
您也不想将0传递给reader.mark()。请参阅以下参数的java规范:
readAheadLimit - 限制在保留标记的同时可以读取的字符数。尝试在读取字符达到此限制或更高时重置流可能会失败。大于输入缓冲区大小的限制值将导致分配一个大小不小于limit的新缓冲区。因此,应谨慎使用大值。
(换句话说,传入0将没用。你需要传入一个大于mark()和reset()之间读取的字符数的数字。
答案 1 :(得分:0)
reader = new BufferedReader(new InputStreamReader(
conn.getInputStream(), strencoding));
这一行导致您的代码创建一个新的阅读器,这将使读者从头开始。
答案 2 :(得分:0)
最后,我发现问题出在conn.getInputStream()
,它在第一次循环后已经被更改了,所以我做了一些修改,克隆了输入流,现在没问题了:
public static void main(String[] args) throws HttpException, IOException,
JSONException {
// TODO Auto-generated method stub
try {
URL murl = new URL(
"http://www.baidu.com/link?url=NaethV_J2hSPVx_OdPlHk73964mU4LcwWkJmVUV4vIkuCXRf1y09ufRZVwkHJqSAa2mMSCoTLYVhGv2AyV_04_");
HttpURLConnection conn = (HttpURLConnection) murl.openConnection();
conn.setRequestProperty(
"User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36");
conn.setConnectTimeout(10000);
conn.connect();
String strencoding = null;
strencoding = strencoding == null ? "UTF-8" : strencoding;
conn.getResponseCode();
// conn.connect();
String href = conn.getURL().toString();
System.out.println(href);
href = href.replace("http://", "");
try {
href = href.split("/")[0];
} catch (Exception eee) {
}
InputStream inputStream=conn.getInputStream();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len;
while ((len = inputStream.read(buffer)) > -1 ) {
baos.write(buffer, 0, len);
}
baos.flush();
InputStream copyInputStream1 = new ByteArrayInputStream(baos.toByteArray());
InputStream copyInputStream2 = new ByteArrayInputStream(baos.toByteArray());
BufferedReader reader = new BufferedReader(new InputStreamReader(
copyInputStream1, strencoding));
String lines;
int i = 1;
while ((lines = reader.readLine()) != null) {
if (lines.toLowerCase().indexOf("charset") > 0) {
System.out.println(lines);
String strtmp = lines;
int inttmp = strtmp.indexOf("charset");
if (inttmp > -1) {
System.out.println(strtmp.length());
strencoding = strtmp
.substring(inttmp + 7, strtmp.length())
.replace("=", "").replace("/", "")
.replace("\"", "").replace("\'", "")
.replace(" ", "").replace("<", "")
.replace(">", "");
//break;
}
}
i++;
}
reader = new BufferedReader(new InputStreamReader(
copyInputStream2, strencoding));
while ((lines = reader.readLine()) != null) {
//System.out.println(i + " " + lines);
if (lines.toLowerCase().indexOf("icp") > 0) {
System.out.println(i + " " + lines);
}
i++;
}
System.out.println(i + "---" + strencoding);
reader.close();
conn.disconnect();
} catch (Exception e2) {
e2.printStackTrace();
}
}