使用Webview,我想在此页面中找到链接。
-(void)webView:(WebView *)sender didFinishLoadForFrame:(WebFrame *)frame {
DOMDocument *myDOMDocument = [[self.webview mainFrame] DOMDocument];
看起来是一个很好的起点,但我发现WebScriptObject类参考有点神秘。显然,我不想评估一些Javascript来获取链接。我想直接读取DOM。
我如何找到DOM中的哪些节点是链接,并获取它们指向的地址?
答案 0 :(得分:2)
查找图像的DOMNodes
请参阅walkNodeTree @ http://cocoadev.com/wiki/DOMCore
- 完成样本以查找图像节点,获取其src并制作nsimages
@implementation DDAppDelegate
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
[self.webview.mainFrame loadRequest:[NSURLRequest requestWithURL:[NSURL URLWithString:@"http://dominik.pich.info/Home.html"]]];
}
-(void)webView:(WebView *)sender didFinishLoadForFrame:(WebFrame *)frame {
DOMDocument *myDOMDocument = [[self.webview mainFrame] DOMDocument];
NSMutableArray *imgs = [NSMutableArray array];
[self walkNodeTree:myDOMDocument imgsCollected:imgs];
//bad code, demo
NSMutableArray *nsImages = [NSMutableArray array];
for (DOMNode *img in imgs) {
for(int i = 0; i < img.attributes.length; i++) {
DOMNode *attr = [img.attributes item:i];
NSLog(@"%@", attr.nodeName);
if([attr.nodeName.lowercaseString isEqualToString:@"src"]) {
NSString *urlstring = [attr nodeValue];
NSURL *url = [NSURL URLWithString:urlstring relativeToURL:[NSURL URLWithString:@"http://dominik.pich.info/"]];
NSImage *nsimg = [[NSImage alloc] initWithContentsOfURL:url];
if(nsimg)
[nsImages addObject:nsimg];
}
}
}
NSLog(@"%@", nsImages);
}
- (void)walkNodeTree:(DOMNode*)parent imgsCollected:(NSMutableArray*)imgs {
DOMNodeList *nodeList = [parent childNodes];
unsigned i, length = [nodeList length];
for (i = 0; i < length; i++) {
DOMNode *node = [nodeList item:i];
NSLog(@"%@", node.nodeName);
if([node.nodeName.lowercaseString isEqualToString:@"img"]) {
[imgs addObject:node];
}
else {
//recurse
[self walkNodeTree:node imgsCollected:imgs];
}
}
}
@end
答案 1 :(得分:1)
我一直在使用xpath规范XPath Introduction
将HTML从网址传递到NSXMLDocument,然后通过NSXMLNode's nodesForXPath:error:
获取我想要的值在这种情况下,我使用大型机的URL。但任何有效的网址都应该没问题。
两个NSXML类似乎都没有解析html,就像它们执行xml
一样您可以搜索大量的xpath查询字符串语法示例,我发现一旦您知道HTML标记和类语法是什么就很容易深入到DOM树中。
我在这里使用了一个非常简单的 a href 查询。
但是我已经添加了一个注释掉的例子以显示更多内容。
-(void)applicationDidFinishLaunching:(NSNotification *)aNotification
{
[theWebView setFrameLoadDelegate:self];
NSURL* fileURL = [NSURL URLWithString:@"http://example.com"];
NSURLRequest* request = [NSURLRequest requestWithURL:fileURL];
[[theWebView mainFrame] loadRequest:request];
}
-(void)webView:(WebView *)sender didFinishLoadForFrame:(WebFrame *)frame {
NSError *err_p = nil;
NSXMLDocument * xmlDoc = [[NSXMLDocument alloc] initWithContentsOfURL:[NSURL URLWithString:[theWebView mainFrameURL]]
options:(NSXMLNodePreserveWhitespace|
NSXMLNodePreserveCDATA)
error:&err_p];
if (xmlDoc == nil) {
xmlDoc = [[NSXMLDocument alloc] initWithContentsOfURL:[NSURL URLWithString:[theWebView mainFrameURL]]
options:NSXMLDocumentTidyXML
error:&err_p];
}
NSError * error2;
NSString *xpathQueryTRTest =@"//a";//--query string for all <a href tags
//-- for example 2 --NSString *xpathQueryTRTest =@"//div/p[1]";//--query string for all <a href tags
NSArray *newItemsNodesTRTEST = [xmlDoc nodesForXPath:xpathQueryTRTest error:&error2];//--xpath node results returned in an array
[xmlDoc release];
if (error2)
{
[[NSAlert alertWithError:error2] runModal];
return ;
}
for (NSXMLElement *node in newItemsNodesTRTEST)//--parse the nodes in the array
{
NSLog(@"\nThe Node = %@\nThe node href value = %@", node, [[node attributeForName:@"href"]stringValue]);
//--for example 2 -- NSLog(@"\nThe Node value = %@\n", [node stringValue]);
}
}
答案 2 :(得分:0)