Question

我从RSS feed中获取“description”标签。但是根据日志，我可以看到描述并不充分。它是缩进的，但是如果我在浏览器中粘贴网址，它将显示完整的描述。代码也无处可抑制内容。可能是什么原因以及如何获得完整内容？

Once upon a time, a Guru was giving darshan to a congregation. People 
were coming and bowing down, seeking blessings. Gurudev was silent most of the 
time and when somebody would come and share their troubles, looking for a response, 
he would say only one thing. One person came to him and said, “I failed [...]

以下是Feed的网址：

http://srisriravishankar.org/feed/

由于

asatanballa的编辑问题[如何在XML Parser中检索“content：encoded”标签：

RssFeedStructure.java

public class RssFeedStructure {

//private long articleId;
//private long feedId;
private String title;
private String description;
private String imgLink;
private String pubDate;

private String link;
private String published;
private String content;
private URL url;
private String encodedContent;

/*public long getArticleId() {
    return articleId;
}

public void setArticleId(long articleId) {
    this.articleId = articleId;
}

public long getFeedId() {
    return feedId;
}


public void setFeedId(long feedId) {
    this.feedId = feedId;
}*/

/**
 * @return the title
 */
public String getTitle() {
    return title;
}

/**
 * @param title
 *            the title to set
 */
public void setTitle(String title) {
    this.title = title;
}

/**
 * @return the url
 */
public URL getUrl() {
    return url;
}

/**
 * @param url
 *            the url to set
 */
public void setUrl(URL url) {
    this.url = url;
}

/**
 * @param description
 *            the description to set
 */
public void setDescription(String description) {
    this.description = description;

    /*if (description.contains("<img ")) {
        String img = description.substring(description.indexOf("<img "));
        String cleanUp = img.substring(0, img.indexOf(">") + 1);
        img = img.substring(img.indexOf("src=") + 5);
        int indexOf = img.indexOf("'");
        if (indexOf == -1) {
            indexOf = img.indexOf("\"");
        }
        img = img.substring(0, indexOf);

        this.description = this.description.replace(cleanUp, "");
    }*/
}

/**
 * @return the description
 */
public String getDescription() {
    return description;
}

/**
 * @param pubDate
 *            the pubDate to set
 */
public void setPubDate(String pubDate) {
    this.pubDate = pubDate;
}

/**
 * @return the pubDate
 */
public String getPubDate() {
    return pubDate;
}

/**
 * @param encodedContent
 *            the encodedContent to set
 */
public void setEncodedContent(String encodedContent) {
    this.encodedContent = encodedContent;
}

/**
 * @return the encodedContent
 */
public String getEncodedContent() {
    return encodedContent;
}

/**
 * @param imgLink
 *            the imgLink to set
 */
public void setImgLink(String imgLink) {
    this.imgLink = imgLink;
}

/**
 * @return the imgLink
 */
public String getImgLink() {
    return imgLink;
}


/**
 * @param link
 *            the pubDate to set
 */
public void setLink(String link) {
    this.link = link;
}

/**
 * @return the pubDate
 */
public String getLink() {
    return link;
}

/**
 * @param content
 *            the pubDate to set
 */
public void setContent(String content) {
    this.content = content;
}

/**
 * @return the pubDate
 */
public String getContent() {
    return content;
}

/**
 * @param published
 *            the pubDate to set
 */
public void setPublished(String published) {
    this.published = published;
}

/**
 * @return the pubDate
 */
public String getPublished() {
    return published;
}

}

XmlHandler.java

public class XmlHandler extends DefaultHandler {
private RssFeedStructure feedStr = new RssFeedStructure();
private List<RssFeedStructure> rssList = new ArrayList<RssFeedStructure>();

private int articlesAdded = 0;

// Number of articles to download
private static final int ARTICLES_LIMIT = 55;

StringBuffer chars = new StringBuffer();

public void startElement(String uri, String localName, String qName,
        Attributes atts) {
    chars = new StringBuffer();

    if (qName.equalsIgnoreCase("enclosure"))

    {
        if (!atts.getValue("url").toString().equalsIgnoreCase("null")) {
            feedStr.setImgLink(atts.getValue("url").toString());
        } else {
            feedStr.setImgLink("");
        }
    }

}

public void endElement(String uri, String localName, String qName)
        throws SAXException {
    if (localName.equalsIgnoreCase("title")) {
        feedStr.setTitle(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("description")) {
       feedStr.setDescription(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("pubDate")) {
       feedStr.setPubDate(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("encoded")) {
       feedStr.setEncodedContent(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("published")) {
           feedStr.setEncodedContent(chars.toString());
    }

    else if (localName.equalsIgnoreCase("content")) {
           feedStr.setEncodedContent(chars.toString());
    }

    else if (localName.equalsIgnoreCase("link")) {
           feedStr.setEncodedContent(chars.toString());
    }

    else if (qName.equalsIgnoreCase("media:content"))
    {
    } 

    else if (localName.equalsIgnoreCase("link")) {
    }


    if (localName.equalsIgnoreCase("item")) {
        rssList.add(feedStr);

        feedStr = new RssFeedStructure();
        articlesAdded++;
        if (articlesAdded >= ARTICLES_LIMIT) {
            throw new SAXException();
        }
    }
}

public void characters(char ch[], int start, int length) {
    chars.append(new String(ch, start, length));
}

public List<RssFeedStructure> getLatestArticles(String feedUrl) {
    URL url = null;
    try {

        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser sp = spf.newSAXParser();
        XMLReader xr = sp.getXMLReader();
        url = new URL(feedUrl);
        xr.setContentHandler(this);
        xr.parse(new InputSource(url.openStream()));
    } catch (IOException e) {
    } catch (SAXException e) {

    } catch (ParserConfigurationException e) {

    }

    return rssList;
}

}

Answer 1

尝试添加其他部分但请查找“content：encoded”。我相信根据我看到的标签，“内容”不会起作用。

else if (localName.equalsIgnoreCase("content:encoded")) {
       feedStr.setEncodedContent(chars.toString());
}

- 主题的原始答案 -

通常在饲料生成器的控制下，而不是饲料的消费者。在为博客配置RSS源时，所有者决定要包含整个文本，要么只包含一些子集作为摘要。某些Feed所有者可能会执行该子集以让您实际访问该网站，而不仅仅是让您使用该Feed。

Answer 2

刚才开始知道SAX Parser没有从RSS Feed获取像“content：encoded”这样的标签。您可以使用DOM解析器而不是它。

在RSS源中获取不完整的描述

2 个答案: