Question

我正在使用RSS解析器。我正在尝试阅读RSS项目。它与英语一起工作正常，但如果我从包含非英语的xml中读取，我会得到垃圾值。以下是我的代码

 package com.medic.online;

import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import android.util.Log;

public class RSSParser {

// RSS XML document CHANNEL tag
private static String TAG_CHANNEL = "channel";
private static String TAG_TITLE = "title";
private static String TAG_LINK = "link";
private static String TAG_DESRIPTION = "description";
private static String TAG_LANGUAGE = "language";
private static String TAG_ITEM = "item";
private static String TAG_PUB_DATE = "pubDate";
private static String TAG_GUID = "guid";

// constructor
public RSSParser() {

}

/***
 * Get RSS feed from url
 * 
 * @param url - is url of the website 
 * @return RSSFeed class object
 */
public RSSFeed getRSSFeed(String url) {
    RSSFeed rssFeed = null;
    String rss_feed_xml = null;

    // getting rss link from html source code
    String rss_url = this.getRSSLinkFromURL(url);

    // check if rss_link is found or not
    if (rss_url != null) {
        // RSS url found
        // get RSS XML from rss ulr
        rss_feed_xml = this.getXmlFromUrl(rss_url);
        // check if RSS XML fetched or not
        if (rss_feed_xml != null) {
            // successfully fetched rss xml
            // parse the xml
            try {
                Document doc = this.getDomElement(rss_feed_xml);
                NodeList nodeList = doc.getElementsByTagName(TAG_CHANNEL);
                Element e = (Element) nodeList.item(0);

                // RSS nodes
                String title = this.getValue(e, TAG_TITLE);
                String link = this.getValue(e, TAG_LINK);
                String description = this.getValue(e, TAG_DESRIPTION);
                String language = this.getValue(e, TAG_LANGUAGE);

                // Creating new RSS Feed
                rssFeed = new RSSFeed(title, description, link, rss_url, language);
            } catch (Exception e) {
                // Check log for errors
                e.printStackTrace();
            }

        } else {
            // failed to fetch rss xml
        }
    } else {
        // no RSS url found
    }
    return rssFeed;
}

/**
 * Getting RSS feed items <item>
 * 
 * @param - rss link url of the website
 * @return - List of RSSItem class objects
 * */
public List<RSSItem> getRSSFeedItems(String rss_url){
    List<RSSItem> itemsList = new ArrayList<RSSItem>();
    String rss_feed_xml;

    // get RSS XML from rss url
    rss_feed_xml = this.getXmlFromUrl(rss_url);

    // check if RSS XML fetched or not
    if(rss_feed_xml != null){
        // successfully fetched rss xml
        // parse the xml
        try{
            Document doc = this.getDomElement(rss_feed_xml);
            NodeList nodeList = doc.getElementsByTagName(TAG_CHANNEL);
            Element e = (Element) nodeList.item(0);

            // Getting items array
            NodeList items = e.getElementsByTagName(TAG_ITEM);

            // looping through each item
            for(int i = 0; i < items.getLength(); i++){
                Element e1 = (Element) items.item(i);

                String title = this.getValue(e1, TAG_TITLE);
                Log.v("title", title);
                String link = this.getValue(e1, TAG_LINK);
                String description = this.getValue(e1, TAG_DESRIPTION);
                String pubdate = this.getValue(e1, TAG_PUB_DATE);
                String guid = this.getValue(e1, TAG_GUID);

                RSSItem rssItem = new RSSItem(title, link, description, pubdate, guid);

                // adding item to list
                itemsList.add(rssItem);
            }
        }catch(Exception e){
            // Check log for errors
            e.printStackTrace();
        }
    }

    // return item list
    return itemsList;
}

/**
 * Getting RSS feed link from HTML source code
 * 
 * @param ulr is url of the website
 * @returns url of rss link of website
 * */
public String getRSSLinkFromURL(String url) {
    // RSS url
    String rss_url = null;

    try {
        // Using JSoup library to parse the html source code
        org.jsoup.nodes.Document doc = Jsoup.connect(url).get();
        // finding rss links which are having link[type=application/rss+xml]
        org.jsoup.select.Elements links = doc
                .select("link[type=application/rss+xml]");

        Log.d("No of RSS links found", " " + links.size());

        // check if urls found or not
        if (links.size() > 0) {
            rss_url = links.get(0).attr("href").toString();
        } else {
            // finding rss links which are having link[type=application/rss+xml]
            org.jsoup.select.Elements links1 = doc
                    .select("link[type=application/atom+xml]");
            Log.d("No of RSS links1 found", " " + links1.size());
            if(links1.size() > 0){
                rss_url = links1.get(0).attr("href").toString();    
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

    // returing RSS url
    return rss_url;
}

/**
 * Method to get xml content from url HTTP Get request
 * */
public String getXmlFromUrl(String url) {
    String xml = null;

    try {
        // request method is GET
        DefaultHttpClient httpClient = new DefaultHttpClient();
        HttpGet httpGet = new HttpGet(url);

        HttpResponse httpResponse = httpClient.execute(httpGet);
        HttpEntity httpEntity = httpResponse.getEntity();
        xml = EntityUtils.toString(httpEntity);

    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (ClientProtocolException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // return XML
    return xml;
}

/**
 * Getting XML DOM element
 * 
 * @param XML string
 * */
public Document getDomElement(String xml) {
    Document doc = null;
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    try {

        DocumentBuilder db = dbf.newDocumentBuilder();

        InputSource is = new InputSource();
        is.setCharacterStream(new StringReader(xml));
        doc = (Document) db.parse(is);

    } catch (ParserConfigurationException e) {
        Log.e("Error: ", e.getMessage());
        return null;
    } catch (SAXException e) {
        Log.e("Error: ", e.getMessage());
        return null;
    } catch (IOException e) {
        Log.e("Error: ", e.getMessage());
        return null;
    }

    return doc;
}

/**
 * Getting node value
 * 
 * @param elem element
 */
public final String getElementValue(Node elem) {
    Node child;
    if (elem != null) {
        if (elem.hasChildNodes()) {
            for (child = elem.getFirstChild(); child != null; child = child
                    .getNextSibling()) {
                if (child.getNodeType() == Node.TEXT_NODE || ( child.getNodeType() == Node.CDATA_SECTION_NODE)) {
                    return child.getNodeValue();
                }
            }
        }
    }
    return "";
}

/**
 * Getting node value
 * 
 * @param Element node
 * @param key  string
 * */
public String getValue(Element item, String str) {
    NodeList n = item.getElementsByTagName(str);
    return this.getElementValue(n.item(0));
}

}

android中的RSS阅读器不支持其他语言

0 个答案: