我正在尝试创建一个Android应用程序,在网格/列表中解析来自在线的图片但是我想出了一些运行时错误..它说我正在解析我的家庭狗的错误。有谁知道我在哪里犯错误?我知道为什么数组会超出界限但我不知道如何修复它!
我正在尝试解析http://www.dogbreedslist.info/family-dog-breeds/这个网站数据..但是我在
的这些部分遇到了运行时错误DogActivity.class
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
}
Parser.class
public class Parser {
Document doc;
Context context;
Elements dogRows;
public Parser(String url, Context context) {
this.context = context;
try {
doc = Jsoup.connect(url).get();
} catch (IOException e) {
Log.e("Page", "Wrong URL or network problems", e);
}
}
public ArrayList<Dog> parseDogsPage(Breed.Name breedName, Context context) {
ArrayList<Dog> dogs = new ArrayList<>();
try {
Element dogContainer;
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogContainer = doc.getElementsByClass("familybreed").get(0);
} else {
dogContainer = doc.getElementsByClass("toybreed").get(0);
}
Log.i("Page", "A page has been parsed successfully");
dogRows = dogContainer.getElementsByTag("a");
for (Element dogRow : dogRows) {
String dogName, dogURL;
Dog dog;
dogURL = dogRow.getElementsByTag("a").get(0).absUrl("href");
String dogThumbnailURL = dogRow.
getElementsByTag("img").get(0).absUrl("src");
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogName = dogRow.getElementsByTag("span").get(0).text();
dog = new Dog(dogName, dogURL, dogThumbnailURL, breedName);
} else {
dogName = dogRow.getElementsByTag("strong").get(0).text();
Element details = dogContainer.getElementsByClass("details").get(0);
Elements children = details.children();
if (breedName == Breed.Name.TOY_DOG_BREED || breedName == Breed.Name.HOUND_DOG_BREED) {
String origin = children.get(1).text();
String lifespan = children.get(3).text();
dog= new Dog(dogName, origin , lifespan, dogURL, dogThumbnailURL, breedName);
} else {
//for herding
String sizetype = children.get(1).text();
dog = new Dog(dogName, sizetype, dogThumbnailURL, dogURL, breedName);
}
}
dogs.add(dog);
}
} catch (Exception e) {
Log.e("Breed activity", "Wrong parsing for " + breedName, e);
}
return dogs;
}
public Dog parseProfile(Dog dog) {
if (!dog.isDetailDataReady()) {
//coaches already read the data in the coaches page
try {
Element dogContainer = doc.getElementById("dogscontainer");
Element bioContainer = dogContainer.getElementById("biocontainer");
Element bioDetails = bioContainer.getElementById("biodetails");
dog.setOtherNames(bioDetails.getElementsByTag("h1").text());
ArrayList<Dog.Detail> dogDetails = new ArrayList<>();
Elements rows = bioDetails.getElementsByTag("tr");
for (Element row : rows) {
Elements tds = row.getElementsByTag("td");
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//coaches, manager and legends use th and td
Elements ths = row.getElementsByTag("th");
dogDetails.add(new Dog.Detail(ths.get(0).text(), tds.get(0).text()));
} else {
//dogs use two tds
dogDetails.add(new Dog.Detail(tds.get(0).text(), tds.get(1).text()));
}
}
dog.setDetails(dogDetails);
Element articleText = dogContainer.getElementsByClass("dogarticletext").get(0);
Elements paragraphs = articleText.getElementsByTag("p");
String text = "";
for (Element p : paragraphs) {
text = text + "\n\n\n" + p.text();
}
dog.setArticleText(dog.getArticleText() + text);
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//get main image url
dog.setMainImageURL(bioContainer.getElementsByTag("img").get(0).absUrl("src"));
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setThumbnailURL(dog.getMainImageURL());
//only need first name
dog.setName(dog.getOtherNames().split(" ")[1]);
}
} else {
dog.setMainImageURL(bioContainer.getElementsByClass("mainImage").get(0).absUrl("src"));
}
} catch (Exception e) {
Log.e("Profile activity", "Wrong parsing for " + dog.getUrl(), e);
}
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setBasicDataReady(true);
}
dog.setDetailDataReady(true);
}
return dog;
}
}
RetrieveDogTask:
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
logcat的:
Wrong parsing for FAMILY_DOG_BREED
java.lang.IndexOutOfBoundsException: Invalid index 0, size is 0
at java.util.ArrayList.throwIndexOutOfBoundsException(ArrayList.java:255)
at java.util.ArrayList.get(ArrayList.java:308)
at org.jsoup.select.Elements.get(Elements.java:544)
at com.example.shannon.popular.Parser.parseDogsPage(Parser.java:35)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:140)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:131)
at android.os.AsyncTask$2.call(AsyncTask.java:288)
at java.util.concurrent.FutureTask.run(FutureTask.java:237)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1112)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:587)
at java.lang.Thread.run(Thread.java:818)
Breed.class:
public class Breed implements Serializable {
private Name name;
private String url;
Breed(Name name, String url) {
this.name = name;
this.url = url;
}
public Name getName() {
return name;
}
public String getNameString(Context context) {
String nameString = "";
switch (name) {
case FAMILY_DOG_BREED:
nameString = context.getString(R.string.family_breed);
break;
case TOY_DOG_BREED:
nameString = context.getString(R.string.toy_breed);
break;
case HOUND_DOG_BREED:
nameString = context.getString(R.string.hound_breed);
break;
case TERRIER_DOG_BREED:
nameString = context.getString(R.string.terrier_breed);
break;
case WORKING_DOG_BREED:
nameString = context.getString(R.string.working_breed);
break;
case HERDING_DOG_BREED:
nameString = context.getString(R.string.herding_breed);
break;
}
return nameString;
}
public String getURL() {
return url;
}
public enum Name {FAMILY_DOG_BREED, TOY_DOG_BREED, HOUND_DOG_BREED, TERRIER_DOG_BREED, WORKING_DOG_BREED, HERDING_DOG_BREED}
}
答案 0 :(得分:0)
您可能正在使用严格的XML解析器来处理格式错误的HTML文档。我只是尝试对您正在解析的URL进行XML验证,并且它失败了,因为<link>
元素永远不会关闭(在严格的XML中,它应该以{{1}}标记结束,但在该页面中缺少它)。
这对于HTML页面来说非常常见,因为今天的浏览器倾向于自动纠正这些类型的错误。
由于您使用严格的XML解析器,因此解析器很可能会失败。
我建议切换到不同的解析器。我使用PULL解析器(例如http://www.xmlpull.org) - 这种技术允许使用较低级别的控件进行解析,这意味着您可以轻松地忽略HTML中不需要的内容 - 比如这些链接元素或任何其他内容。 / p>
所以你可以这样做:
</link>