这可能已被问过很多,但我还是迷路了。我需要解析从Google Reader的API检索的XML文件。基本上,它包含如下对象:
<object>
<string name="id">feed/http://developer.apple.com/news/rss/news.rss</string>
<string name="title">Apple Developer News</string>
<list name="categories">
<object>
<string name="id">user/17999068807557229152/label/Apple</string>
<string name="label">Apple</string>
</object>
</list>
<string name="sortid">DB67AFC7</string>
<number name="firstitemmsec">1317836072018</number>
<string name="htmlUrl">http://developer.apple.com/news/</string>
</object>
我尝试过使用NSXMLParser但它确实有效,但它确实很慢。也许我的代码不是最有效的,但仍然需要10秒以上才能解析并将对象保存到Core Data中。我还看了几个其他库,但对于这么小的XML文件,它们的使用看起来有点复杂和沉重。
您认为我应该使用什么?
谢谢。
修改
这里是解析器代码:
- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qName attributes:(NSDictionary *)attributeDict {
if([elementName isEqualToString:@"list"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"subscriptions"]){
subscriptionListFound = YES;
}
if(subscriptionListFound){
if([elementName isEqualToString:@"list"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"categories"]){
categoryFound = YES;
currentCategoryId = [[[NSMutableString alloc] init] autorelease];
currentCategoryLabel = [[[NSMutableString alloc] init] autorelease];
}
if([elementName isEqualToString:@"object"] && !subscriptionFound && !categoryFound){
subscriptionFound = YES;
currentSubscriptionTitle = [[[NSMutableString alloc] init] autorelease];
currentSubscriptionId = [[[NSMutableString alloc] init] autorelease];
currentSubscriptionHtmlURL = [[[NSMutableString alloc] init] autorelease];
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"id"]){
if(categoryFound){
categoryIdFound = YES;
}
else{
subscriptionIdFound = YES;
}
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"title"]){
subscriptionTitleFound = YES;
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"label"]){
categoryLabelFound = YES;
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"htmlUrl"]){
subscriptionHtmlURLFound = YES;
}
}
}
- (void)parser:(NSXMLParser *)parser didEndElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qName {
if([elementName isEqualToString:@"list"] && !categoryFound){
subscriptionListFound = NO;
}
if([elementName isEqualToString:@"list"] && categoryFound){
categoryFound = NO;
}
if([elementName isEqualToString:@"object"] && !categoryFound && subscriptionFound){
[self saveSubscription];
[[NSNotificationCenter defaultCenter] postNotificationName:@"currentSubscriptionNotification" object:currentSubscriptionTitle];
subscriptionFound = NO;
}
if([elementName isEqualToString:@"string"]){
if(subscriptionIdFound == YES) {
[currentSubscriptionId appendString:self.currentParsedCharacterData];
subscriptionIdFound = NO;
}
if(subscriptionTitleFound == YES) {
[currentSubscriptionTitle appendString:self.currentParsedCharacterData];
subscriptionTitleFound = NO;
}
if(subscriptionHtmlURLFound == YES) {
[currentSubscriptionHtmlURL appendString:self.currentParsedCharacterData];
subscriptionHtmlURLFound = NO;
}
if(categoryIdFound == YES) {
[currentCategoryId appendString:self.currentParsedCharacterData];
categoryIdFound = NO;
}
if(categoryLabelFound == YES) {
[currentCategoryLabel appendString:self.currentParsedCharacterData];
categoryLabelFound = NO;
}
}
[self.currentParsedCharacterData setString:@""];
}
- (void)parser:(NSXMLParser *)parser foundCharacters:(NSString *)string {
[self.currentParsedCharacterData appendString:string];
}
这里是通过CoreData保存的代码:
- (void) saveSubscription {
NSFetchRequest *fetchRequest = [[[NSFetchRequest alloc] init] autorelease];
[fetchRequest setEntity:
[NSEntityDescription entityForName:@"Group" inManagedObjectContext:context]];
[fetchRequest setPredicate: [NSPredicate predicateWithFormat: @"(id == %@)",self.currentCategoryId]];
[fetchRequest setSortDescriptors: [NSArray arrayWithObject:
[[[NSSortDescriptor alloc] initWithKey: @"id"
ascending:YES] autorelease]]];
NSError *error2 = nil;
NSArray *foundGroups = [context executeFetchRequest:fetchRequest error:&error2];
if ([foundGroups count] > 0) {
self.currentGroupObject = [foundGroups objectAtIndex:0];
}
else {
self.currentGroupObject = [NSEntityDescription insertNewObjectForEntityForName:@"Group" inManagedObjectContext:context];
[self.currentGroupObject setId:self.currentCategoryId];
[self.currentGroupObject setLabel:self.currentCategoryLabel];
}
fetchRequest = [[[NSFetchRequest alloc] init] autorelease];
[fetchRequest setEntity:
[NSEntityDescription entityForName:@"Subscription" inManagedObjectContext:context]];
[fetchRequest setPredicate: [NSPredicate predicateWithFormat: @"(id == %@)", self.currentSubscriptionId]];
[fetchRequest setSortDescriptors: [NSArray arrayWithObject:
[[[NSSortDescriptor alloc] initWithKey: @"id"
ascending:YES] autorelease]]];
error2 = nil;
NSArray *foundSubscriptions = [context executeFetchRequest:fetchRequest error:&error2];
if ([foundSubscriptions count] > 0) {
self.currentSubscriptionObject = [foundSubscriptions objectAtIndex:0];
}
else {
self.currentSubscriptionObject = [NSEntityDescription insertNewObjectForEntityForName:@"Subscription" inManagedObjectContext:context];
[self.currentSubscriptionObject setId:self.currentSubscriptionId];
[self.currentSubscriptionObject setTitle:self.currentSubscriptionTitle];
[self.currentSubscriptionObject setHtmlURL:self.currentSubscriptionHtmlURL];
NSString *faviconURL = [self favIconUrlStringFromURL:self.currentSubscriptionHtmlURL];
NSString *faviconPath = [self saveFavicon:self.currentSubscriptionTitle url:faviconURL];
[self.currentSubscriptionObject setFaviconPath:faviconPath];
[self.currentSubscriptionObject setGroup:self.currentGroupObject];
[self.currentGroupObject addSubscriptionObject:self.currentSubscriptionObject];
}
NSError *error;
if (![context save:&error]) {
NSLog(@"Whoops, couldn't save: %@", [error localizedDescription]);
}
}
答案 0 :(得分:8)
您的解析逻辑非常低效 - 您通过说
一遍又一遍地进行相同的测试if (string and x) do this
if (string and y) do this
if (string and z) do this
而不是
if (string)
if (x) do this
if (y) do this
if (z) do this
所有这些不必要的字符串比较可能就是你的解析速度太慢的原因。所有对象查找都是一样的。如果您需要多次值,请将其设置一次,然后将其存储在变量中。
目标C方法调用相对较慢,无法通过编译器进行优化,因此如果值没有更改,则应调用该方法一次,然后存储它。
例如,这个:
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"id"]){
if(categoryFound){
categoryIdFound = YES;
}
else{
subscriptionIdFound = YES;
}
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"title"]){
subscriptionTitleFound = YES;
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"label"]){
categoryLabelFound = YES;
}
if([elementName isEqualToString:@"string"] && [[attributeDict objectForKey:@"name"] isEqualToString:@"htmlUrl"]){
subscriptionHtmlURLFound = YES;
}
可以改写为:
NSString *name = [attributeDict objectForKey:@"name"];
if([elementName isEqualToString:@"string"])
{
if ([name isEqualToString:@"id"])
{
if(categoryFound){
categoryIdFound = YES;
}
else{
subscriptionIdFound = YES;
}
}
else if ([name isEqualToString:@"title"])
{
subscriptionTitleFound = YES;
}
else if ([name isEqualToString:@"label"])
{
categoryLabelFound = YES;
}
else if ([name isEqualToString:@"htmlUrl"])
{
subscriptionHtmlURLFound = YES;
}
}
哪种方式更有效率。
答案 1 :(得分:1)
我建议你使用GDataXML。它使用起来非常简单,速度非常快。有关详细信息,请阅读how-to-read-and-write-xml-documents-with-gdataxml。
我已经在这个Stack Overflow主题中回答了关于如何使用GDataXML读取属性的类似问题:get-xml-response-value-with-gdataxml。
答案 2 :(得分:0)
我认为,在iOS上解析XML的最佳库是TouchXML。它允许您使用xPaths解析XML并具有高级元素解析选项。您也可以使用此解析XHTML文档。
解析非常简单:
NSData *xmlData = read your xml file
CXMLDocument *doc = [[CXMLDocument alloc] initWithData:xmlData options:0 error:nil]
NSArray *objects = [doc nodesForXPath:@"//object" error:nil];
for (CXMLElement *object in objects) {
NSArray *children = [object children];
for(CXMLElement *child in children) {
if([[child name] isEqualToString:@"string"]) {
// you are parsing <string> element.
// you can obtain element attribute by:
NSString *name = [[child attributeForName:@"name"] stringValue];
// you can obtain string between <></> tags via:
NSString *value = [child stringValue];
} else if([[child name] isEqualToString:@"list"]) {
// you are parsing <list> element.
} else if ...
}
}
答案 3 :(得分:0)
在开发了一些与您的需求相似的应用之后,我会全心全意地推荐AQToolkit
我解析XML的常用设置或多或少是这样的:
示例代码:
HTTPMessage *message = [HTTPMessage requestMessageWithMethod:@"GET" url:url version:HTTPVersion1_1];
[message setUseGzipEncoding:YES];
AQGzipInputStream *inputstream = [[AQGzipInputStream alloc] initWithCompressedStream: [message inputStream]];
初始化上下文和添加合并通知的示例代码:
-(void)parserDidStartDocument:(AQXMLParser *)parser
{
self.ctx=[[NSManagedObjectContext alloc] init];
[self.ctx setMergePolicy: NSMergeByPropertyObjectTrumpMergePolicy];
[self.ctx setPersistentStoreCoordinator: [Database db].persistentStoreCoordinator];
NSNotificationCenter *dnc = [NSNotificationCenter defaultCenter];
[dnc addObserver:self selector:@selector(mergeContextChanges:) name:NSManagedObjectContextDidSaveNotification object:self.ctx];
parsedElements = 0;
}
- (void)mergeContextChanges:(NSNotification *)notification{
SEL selector = @selector(mergeHelper:);
[self performSelectorOnMainThread:selector withObject:notification waitUntilDone:YES];
}
- (void)mergeHelper:(NSNotification*)saveNotification
{
// Fault in all updated objects
NSArray* updates = [[saveNotification.userInfo objectForKey:@"updated"] allObjects];
for (NSInteger i = [updates count]-1; i >= 0; i--)
{
[[[Database db].managedObjectContext objectWithID:[[updates objectAtIndex:i] objectID]] willAccessValueForKey:nil];
}
// Merge
[[Database db].managedObjectContext mergeChangesFromContextDidSaveNotification:saveNotification];
}
在我看来,选择正确的解析器对于庞大的数据集更为重要。如果您的数据集是可管理的,那么您可以从一个体面的实现中获益良多。使用任何基于libxml的解析器,并在收到数据时解析数据块,可以在下载数据后解析数据,从而显着提高性能。
根据您的数据源,libz可能会抛出Z_BUF_ERROR(至少在模拟器中)。我在AQToolkit上提出了拉动请求的解决方案,但我确信那里会有更好的解决方案!