在Android中检测rss feed的编码

时间:2012-03-29 11:28:19

标签: android xml encoding rss

我正在尝试使用XmlPullParser解析XML。

我希望获得Feed的编码

例如

    <?xml version="1.0" encoding="ISO-8859-1"?>

   <?xml version="1.0" encoding="UTF-8"?>

这里有两种不同的编码我想检测编码,

任何人都可以 我试过getInputEncoding()

3 个答案:

答案 0 :(得分:4)

//URL_FEED Example: http://jovemnerd.ig.com.br/feed/rss/
final HttpGet httpget = new HttpGet(URL_FEED);

//Connect 
final HttpResponse response = httpclient.execute(httpget);

//Get Entity connection
HttpEntity entity = response.getEntity();

//Get InputStream
InputStream feed = entity.getContent();

...

//Convert InputStrean to InputSource
final InputSource source = new InputSource(feed);

//If encondind is not detect , then read head xml and set enconding   
if(source.getEncoding()==null){ //THIS THE PROBLEM
    //The enconding is null, but in entity have head with type enconding
    source.setEncoding(getEncondingFromEntity(entity));
}

/*Now your InputSource have the correct enconding, then use "source" in your parse. Ex:*/
final XMLReader xmlreader = parser.getXMLReader();
final RSSHandler handler = new RSSHandler(config);
xmlreader.setContentHandler(handler);
xmlreader.parse(source);

...

private String getEncondingFromEntity(HttpEntity entity){
  if(entity.getContentType()!=null){
    //Content-Type: text/xml; charset=ISO-8859-1
    //Content-Type: text/xml; charset=UTF-8
      for(String str : entity.getContentType().getValue().split(";")){
          if(str.toLowerCase().contains("charset")){
              return str.toLowerCase().replace("charset=","").replace(";","").replace(" ","");
          }
      }
  }
  return null;
}

答案 1 :(得分:0)

你可以这样: XmlPullparser parser = new XmlPullParser(inputstream in,null); 这将自动检测编码

答案 2 :(得分:0)

//Open Connection with URL XML Content
        URL url;
        InputStream feedStream = null;
        HttpURLConnection urlConnection = null;
//This URL have enconding ISO-8859-1
        url = new URL("http://feeds.feedburner.com/99vidaspodcast");
        urlConnection = (HttpURLConnection) url.openConnection();
        feedStream = new BufferedInputStream(urlConnection.getInputStream());

//Read and Parse XML with correct Enconding

        RSSFeed feed= parser.parse(feedStream,getEncondingFromEntity(urlConnection.getContentType()));

----------------------
//Detect Enconding

        private String getEncondingFromEntity(String contentType){
          if(contentType!=null){
           for(String str : contentType.split(";")){
            if(str.toLowerCase().contains("charset")){
              return str.toLowerCase().replace("charset=","").replace(";","").replace(" ","");
            }
           }
          }
          return null;
        }

------------------------
//Apply Enconding and Parse XML    
    private RSSFeed parse(SAXParser parser, InputStream feed, String enconding)
          throws SAXException, IOException {
        if (parser == null) {
          throw new IllegalArgumentException("RSS parser must not be null.");
        } else if (feed == null) {
          throw new IllegalArgumentException("RSS feed must not be null.");
        }

        final InputSource source = new InputSource(feed);
        if(source.getEncoding()==null && enconding!=null){
            //Magic :]
            source.setEncoding(enconding);
        }
        final XMLReader xmlreader = parser.getXMLReader();
        final RSSHandler handler = new RSSHandler(config);

        xmlreader.setContentHandler(handler);
        xmlreader.parse(source);

        return handler.feed();
      }

    -------------------------