要点: 我的Jsoup解析器可以完全独立工作,但是一旦复制粘贴到我的Android应用程序的AsyncTask任务类中,就无法收集任何值。返回的2d数组只有空值。
长版: 我一直致力于通过Jsoup使用页面抓取来从各种博客中提取和显示内容的应用程序。到目前为止,我已经编写了一些解析器,并且似乎都按预期工作。不幸的是,我最近的解析器(为nyc-shows.brooklynvegan.com编写)一直存在问题。
这是解析器方法本身,由添加了print语句的main方法调用。自己动手吧。它工作(不完美,但它的工作原理)。
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Main {
static String TAG_EVENT = "li.ds-entry";
static String TAG_TITLE = ".ds-entry-title";
static String TAG_LOCATION = ".location";
static String TAG_DATE_AND_TIME = ".ds-date";
static String TAG_TICKET_URL = ".ds-buy-tickets";
static String FEED_URL = "http://nyc-shows.brooklynvegan.com/";
public static void main(String[] args) throws IOException {
String values[][] = new String[50][6];
values = getFeedItems();
for (int i=0; i<values.length; i++) {
for (int j=0; j<6; j++) {
System.out.println(values[i][j]);
}
System.out.println("-----------------");
}
}
public static String[][] getFeedItems() throws IOException {
Document doc = null;
String values[][] = new String[50][6];
try{
doc = Jsoup.connect(FEED_URL).timeout(0).get();
Elements events = doc.select(TAG_EVENT);
String delimSpace = "[ ]";
int i = 0;
for (Element event : events) {
//Set event title
Element title = event.select(TAG_TITLE).first();
String titleString = title.text();
if (title != null) {
boolean isFake = checkFake(titleString);
if (!isFake) {
values[i][0] = titleString;
}
else {
continue;
}
}
//Set event date and time i guess
Element dateAndTime = event.select(TAG_DATE_AND_TIME).first();
if (dateAndTime != null) {
String[] dateAndTimeTokens = dateAndTime.text().split(delimSpace);
String date = dateAndTimeTokens[1];
String time = dateAndTimeTokens[3];
values[i][1] = date;
values[i][2] = time;
}
//Set price (tbd)
values[i][3] = "See Ticket";
//Set location
Element location = event.select(TAG_LOCATION).first();
if (location != null) {
values[i][4] = location.text();
}
//Set ticket urls
Element ticketContainer = event.select(TAG_TICKET_URL).first();
if (ticketContainer != null) {
String ticket = ticketContainer.select("a").attr("href");
values[i][5] = ticket;
}
else {
values[i][3] = "Free";
}
i++;
} //End of event loop
} //End of try clause
catch (IOException e) {
e.printStackTrace();
}
return values;
}
public static boolean checkFake(String s) {
boolean isFake = false;
String[] days = {"Today", "Tomorrow", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};
for (int i=0; i<days.length; i++) {
if (s.contains(days[i])) {
isFake = true;
return isFake;
}
}
return isFake;
}
}
现在,在显示加载屏幕的同时,这是与我的应用程序在后台运行的AsyncTask中运行的完全相同的方法。
package com.example.nylist;
import java.io.IOException;
import android.app.Activity;
import android.content.Context;
import android.os.AsyncTask;
import android.util.Log;
import android.widget.Toast;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class BVParser extends AsyncTask<Void, Void, String[][]> {
static String TAG_EVENT = "li.ds-entry";
static String TAG_TITLE = ".ds-entry-title";
static String TAG_LOCATION = ".location";
static String TAG_DATE_AND_TIME = ".ds-date";
static String TAG_TICKET_URL = ".ds-buy-tickets";
static String FEED_URL = "http://nyc-shows.brooklynvegan.com/";
Context context;
Activity activity;
public BVParser(Activity context) {
this.context = context.getApplicationContext();
this.activity = context;
}
@Override
protected void onPreExecute() {
super.onPreExecute();
Toast.makeText(context, "Fetching...", Toast.LENGTH_LONG).show();
}
@Override
protected String[][] doInBackground(Void... param) {
String values[][] = new String[50][6];
try {
values = getFeedItems();
}
catch (IOException e) {
Log.d("ASSERT", "Exception occured during doInBackground", e);
e.printStackTrace();
}
Log.d("ASSERT", ("values successfully returned by doInBackground, first title is: "+values[0][0]));
return values;
}
protected void onPostExecute(String[][] result) {
super.onPostExecute(result);
int eventCount = result.length;
Log.d("ASSERT", ("event count in onPostExecute is: "+eventCount));
ListRow[] listrow_data = new ListRow[eventCount];
ListRow temp;
for (int i=0; i<eventCount; i++) {
if (result[i] != null) {
temp = new ListRow(context, result[i][0], result[i][1], result[i][2],
result[i][3], result[i][4], result[i][5], i);
listrow_data[i] = temp;
}
}
((EventList) activity).setList(listrow_data);
}
public String[][] getFeedItems() throws IOException {
Document doc = null;
String values[][] = new String[50][6];
int i = 0;
try{
Log.d("ASSERT","Made it to try block");
doc = Jsoup.connect(FEED_URL).timeout(0).get();
Elements events = doc.select(TAG_EVENT);
Log.d("ASSERT","printing events, whatever it is: "+events);
String delimSpace = "[ ]";
//******THIS LOOP NEVER BEGINS*****//
for (Element event : events) {
Log.d("ASSERT","Made it to getFeedItems's main for loop");
//Set event title
Element title = event.select(TAG_TITLE).first();
String titleString = title.text();
Log.d("ASSERT","This title is: "+titleString);
boolean isFake = checkFake(titleString);
if (!isFake) {
values[i][0] = titleString;
}
else {
continue;
}
//Set event date and time i guess
Element dateAndTime = event.select(TAG_DATE_AND_TIME).first();
if (dateAndTime != null) {
String[] dateAndTimeTokens = dateAndTime.text().split(delimSpace);
String date = dateAndTimeTokens[1];
String time = dateAndTimeTokens[3];
values[i][1] = date;
values[i][2] = time;
}
//Set price
values[i][3] = "See Ticket";
//Set location
Element location = event.select(TAG_LOCATION).first();
if (location != null) {
values[i][4] = location.text();
}
//Set ticket urls
Element ticketContainer = event.select(TAG_TICKET_URL).first();
if (ticketContainer != null) {
String ticket = ticketContainer.select("a").attr("href");
values[i][5] = ticket;
}
else {
values[i][3] = "Free";
}
i++;
} //End of event loop
} //End of try clause
catch (IOException e) {
Log.d("ASSERT","Exception during getFeedItems");
e.printStackTrace();
}
Log.d("ASSERT","The first title in getFeedItems before returning is: "+values[0][0]);
return values;
}
private static boolean checkFake(String s) {
boolean isFake = false;
String[] days = {"Today", "Tomorrow", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};
for (int i=0; i<days.length; i++) {
if (s.contains(days[i])) {
isFake = true;
return isFake;
}
}
return isFake;
}
}
调试尝试:
我在整个代码中添加了日志语句,以便调试问题。如果你运行它,你会发现问题似乎发生在getFeedItems()本身的某个地方,特别是在“try”块中。虽然出现了try语句开头的日志语句,但运行events
的for循环根本没有运行,因为它开头的日志语句从不打印。
问题:
有人可以解释为什么循环events
没有开始? events
是否为空,如果是,为什么?为什么在它自己运行的方法和我的AsyncTask中运行的方法之间存在差异?我一直在撕扯我的头发。这个解析器中的逻辑几乎与我编写的(工作)其他逻辑中的逻辑完全相同,但这会返回一个只有空值的二维数组。我甚至无法理解逻辑错误可能在哪里,但我似乎无法找到错字。
PS: 如果将此与我的其他解析器进行比较会有所帮助,请告诉我,我将发布源代码。提前谢谢。