是否有可能在hpple
中递归获取子元素的所有文本内容。 TFHppleElement
类中的任何方法?
比如javascript
document.getElementById("testdiv").textContent
答案 0 :(得分:1)
我使用此代码获取新闻标题的所有内容
NSURL *newURL = [NSURL URLWithString:@"http://somesite"];
NSData *newsData = [NSData dataWithContentsOfURL: newURL];
TFHpple *newsParser = [TFHpple hppleWithHTMLData: newsData];
NSString *newsXpathQueryString = @"//div[@class='item column-1']";
NSArray *newsNodes = [newsParser searchWithXPathQuery: newsXpathQueryString];
NSMutableArray *newNews = [[NSMutableArray alloc] initWithCapacity: 0];
for (TFHppleElement *element in newsNodes)
{
News *news = [[News alloc] init];
[newNews addObject: news];
news.title = [[element content] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
news.photo_url = [element objectForKey:@"src"];
_allNews = newNews;
[self.tableView reloadData];
}
}
你可以使用
news.title = [[element firstChild]content] to get children elements content
答案 1 :(得分:0)
我想要这样的东西 - 快速的锅炉板代码,它不是static contents
的优雅解决方案。请让我知道,如何改进:)
#pragma mark - Hpple XML parser
/* The documents contents lots of nested div, table, span, style etc. */
- (NSString *) extractDefinition
{
NSString *html = [self.webView stringByEvaluatingJavaScriptFromString: @"document.getElementById('innerframe').innerHTML"];
if ([Resources stringIsEmpty:html]) {
return nil;
}
return [self extractSubDiv:html];
}
- (NSString *)extractSubDiv:(NSString *)html
{
TFHpple *hppleParser = [TFHpple hppleWithHTMLData:[html dataUsingEncoding:NSUTF8StringEncoding]];
NSString * xpathQuery;
xpathQuery = @"//div[@id='columnboth']";
NSArray * defNodes = [hppleParser searchWithXPathQuery:xpathQuery];
NSString * text = nil;
if ([defNodes count] > 0) {
TFHppleElement * element = [defNodes objectAtIndex:0];
text = [self parseContents:element];
} else {
xpathQuery = @"//div[@id='columnsingle']";
defNodes = [hppleParser searchWithXPathQuery:xpathQuery];
if ([defNodes count] > 0) {
TFHppleElement * element = [defNodes objectAtIndex:0];
text = [self parseContents:element];
}
}
return text;
}
- (NSString *) parseContents:(TFHppleElement *)element {
NSArray * innhold = [element searchWithXPathQuery:@"//div[contains(@class,'articlecontents')]"];
return [self getTextFromArray:innhold];
}
static NSMutableString * contents;
- (NSString *) getTextFromArray:(NSArray *)hppleElments {
NSMutableString * text = [[NSMutableString new] autorelease];
contents = nil;
contents = [[NSMutableString new] autorelease];
for (TFHppleElement * e in hppleElments) {
[text appendFormat:@"%@ ", [self getText:e]];
}
return text;
}
/* Here are more nested div and then span for text. */
- (NSString *) getText:(TFHppleElement *)element
{
if ([element isTextNode]) {
[contents appendFormat:@" %@", element.content];
}
for (TFHppleElement * e in element.children) {
[self getText:e];
}
return contents;
}