在Twitter上使用rest api,服务器只提供有限数量的推文。我正在尝试通过使用max_id循环来检索用户历史记录,这是api中建议的方法。但是,每次调用时,我都会得到一个新的xml标头,它会抛出一个SAX Parser错误,因此我无法检索max_id。我查看了Transformer的java api和OMIT_XML_DECLARATION,但我不知道在我的代码中将它放在何处,是否应该在连接,打印方法或处理文档时删除xml声明。我无法弄清楚Transformer的输出......虽然我已经深入研究了它。
public class DataGrabber {
File destFile;
int qcount = 0;
public void getRuserHx() throws ParserConfigurationException, IOException, InterruptedException, SAXException {
int downNodes = 0;
Integer statTot = 10;
String maxId = null;
File filename = new File(MyIds.hoopoeData + "/" + MyIds.rootUser + "Hx.xml");
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc;
//loop and download based on the id of the last tweet
while(downNodes < statTot){
System.out.println("Getting user status history...");
String filex = MyIds.rootUser + "Hx.xml";
String https_url = "https://twitter.com/statuses/user_timeline.xml?screen_name=" + MyIds.rootUser + "&count=300";
makeConnection(https_url, filex);
//validate the xml before you parse
doc = builder.parse(filename);
doc.getDocumentElement().normalize();
//set up for the loop
downNodes = HooUtil.nodeCount(filename, "status");
statTot = Integer.parseInt(HooUtil.nodeValue(filename, "user", "statuses_count", 0));
Long loopMax = (Long.valueOf(HooUtil.nodeValue(filename, "status", "id", downNodes - 1)) - 1);
maxId = loopMax.toString();
https_url = "https://twitter.com/statuses/user_timeline.xml?screen_name=" + MyIds.rootUser + "&count=300&max_id=" + maxId;
Thread.sleep(4000);
qcount ++;
}
System.out.println("Finished downloading user status history.");
}
//connect with the input query
public void makeConnection(String https_url, String filex){
URL url;
try {
url = new URL(https_url);
HttpsURLConnection con = (HttpsURLConnection)url.openConnection();
//dump all the content into an xml file
print_content(con, filex);
}
catch (MalformedURLException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
}
//the print method for the xml file
private void print_content(HttpsURLConnection con, String filex){
if(con!=null){
try {
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream()));
destFile = new File("/" + filex);
PrintWriter out = new PrintWriter(new FileWriter(MyIds.hoopoeData + destFile, true));
String input;
while ((input = br.readLine()) != null){
out.println(input);
}
out.flush();
out.close();
br.close();
}
catch (IOException e) {
e.printStackTrace();
}
}