我想解析html数据。我想删除标签中我不关心的所有数据以及标签本身。我只想保留特定span类中的数据。但是也要删除span标签。
是否可能已经在线预设了一个脚本?如果不是最好的方法,谢谢。
答案 0 :(得分:1)
好的,为什么不呢?我深情地记得所有文件似乎在脑海中旋转的时间,就像一个醉汉的房间。
有很多不同的方法可以解决这个问题,我在上面的评论中只提了一个。我能看到的三种方式是:
使用Google搜索页面的结果快速展示示例:
使用Javascript:
// getElementsByTagName, getElementsByClass - both return a NodeList
// it is accessed in the same way as an array - with the [] operators, but it's
// not an array object - this is a function that allows us to still iterate through it
// in much the same way.
function forEachNode(nodeList, func)
{
var i, n = nodeList.length;
for (i=0; i<n; i++)
{
func(nodeList[i], i, nodeList);
}
}
function grabTextFromSpans()
{
//
// 1. - assume elements other than span will possess the target className.
// onlty get elements that are (a) a span and (b) have the right className
var spanList = document.getElementsByTagName('span');
var validList = [], stringList = [];
forEachNode(spanList, function(curItem){ if (curItem.className == 'st') validList.push(curItem);} );
validList.forEach( function(elem){ stringList.push(elem.innerText);} );
var mStr = '';
stringList.forEach(function(elem){mStr += elem + "\n";});
alert(mStr);
//
// 2. - assume only the target elements will possess the target className
//
var mStr = '';
var tgtList = document.getElementsByClassName('st');
forEachNode(tgtList, function(curElem){mStr += curElem.innerText + "\n";} );
alert(mStr);
//
// 3. - assume that tgt elements are spans and have a specific className
//
var mStr = '';
var tgtList = document.querySelectorAll('span.st');
forEachNode(tgtList, function(curElem){mStr += curElem.innerText + "\n";} );
alert(mStr);
}
HTML:
<ol eid="S-WYUPKzGI7JmAWAr4CwBw" id="rso"><!--m--><li class="g"><div class="vsc" sig="u1z"> <div data-ved="0CCQQkgowAA"> <div data-ved="0CCUQkQowAA"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://bebraw.github.com/colorjs/" class="l" onmousedown="return rwt(this,'','','','1','AFQjCNFuHzNJryRAA9QBjqC1250RDZAMCQ','','0CCYQFjAA',null,event)">Colorjs by bebraw</a></h3><div class="s"><div class="f kv"><cite>bebraw.github.com/colorjs/</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:eSrglCqgBAkJ:bebraw.github.com/colorjs/+&cd=1&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','1','AFQjCNEm0e9Xr7p0eOh7TWkQ81JoqWQNfQ','','0CCcQIDAA',null,event)">Cached</a></span></div><div class="esc slp" id="poS0" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">colorjs provides simple API that may be used to create colors (<em>RGBA</em>, <em>HSVA</em>, <em>HSLA</em>) and perform various color related operations (conversions and such).<br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="kDj"> <div data-ved="0CCkQkgowAQ"> <div data-ved="0CCoQkQowAQ"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="https://github.com/bebraw/colorjs" class="l" onmousedown="return rwt(this,'','','','2','AFQjCNFRF9AfGrmbF5E6IhyKId0ztwA7sQ','','0CCsQFjAB',null,event)">bebraw/colorjs · GitHub</a></h3><div class="s"><div class="f kv"><cite>https://github.com/bebraw/colorjs</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:WtA6gOF2ZqEJ:https://github.com/bebraw/colorjs+&cd=2&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','2','AFQjCNHqdXwTalbWocgzhnaRA1EKTkyMzQ','','0CCwQIDAB',null,event)">Cached</a></span></div><div class="esc slp" id="poS1" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">colorjs provides simple API that may be used to create colors (<em>RGBA</em>, <em>HSVA</em>, <em>HSLA</em>) and perform various color related operations (conversions and such).<br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="PWh"> <div data-ved="0CC4QkgowAg"> <div data-ved="0CC8QkQowAg"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://www.pygame.org/docs/ref/color.html" class="l" onmousedown="return rwt(this,'','','','3','AFQjCNFFK5xPcE_2Yl9D8NhHmBbb_Y8UJg','','0CDAQFjAC',null,event)">color - Pygame Documentation</a></h3><div class="s"><div class="f kv"><cite>www.pygame.org/docs/ref/color.html</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:zXwLCILEH14J:www.pygame.org/docs/ref/color.html+&cd=3&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','3','AFQjCNGwtfM-FoMMusa6z3-GjN68_lw5BQ','','0CDEQIDAC',null,event)">Cached</a> - <a href="/search?hl=en&safe=off&q=related:www.pygame.org/docs/ref/color.html+hlsa+hsva+rgba&tbo=1&sa=X&ei=S-WYUPKzGI7JmAWAr4CwBw&ved=0CDIQHzAC">Similar</a></span></div><div class="esc slp" id="poS2" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">Color.<em>hsva</em> - Gets or sets the <em>HSVA</em> representation of the Color. Gets or sets the <b>...</b> Color.<em>hsla</em> - Gets or sets the <em>HSLA</em> representation of the Color. Gets or sets <b>...</b> Color.normalize - Returns the normalized <em>RGBA</em> values of the Color. Returns the <b>...</b><br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="8Ft"> <div data-ved="0CDQQkgowAw"> <div data-ved="0CDUQkQowAw"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://softimage.wiki.softimage.com/sdkdocs/sicppsdk/html/classXSI_1_1MATH_1_1CColor4f.html" class="l" onmousedown="return rwt(this,'','','','4','AFQjCNFu-Fgfc5DxRWUiz6rFQcmtu8VxQw','','0CDYQFjAD',null,event)">CColor4f Class Reference</a></h3><div class="s"><div class="f kv"><cite>softimage.wiki.softimage.com/.../classXSI_1_1MATH_1_1CColor4f....</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:a3m9eQIkVpcJ:softimage.wiki.softimage.com/sdkdocs/sicppsdk/html/classXSI_1_1MATH_1_1CColor4f.html+&cd=4&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','4','AFQjCNE2tLroL_B4pfp_-yHsgN77HgygkA','','0CDcQIDAD',null,event)">Cached</a></span></div><div class="esc slp" id="poS3" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">Detailed Description. A color class represented by single floating values. CColor4f supports the <em>RGBA</em>, <em>HSVA</em> and <em>HLSA</em> color models.<br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="E2M"> <div data-ved="0CDkQkgowBA"> <div data-ved="0CDoQkQowBA"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://api.call-cc.org/doc/imlib2" class="l" onmousedown="return rwt(this,'','','','5','AFQjCNHJCEk0145qLjFUDow7uz6--d9ecQ','','0CDsQFjAE',null,event)">imlib2 | chickadee</a></h3><div class="s"><div class="f kv"><cite>api.call-cc.org/doc/imlib2</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:f3rEHKk7xdQJ:api.call-cc.org/doc/imlib2+&cd=5&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','5','AFQjCNFIOeaytJwKaUCFahhv0rZYsCrtNA','','0CDwQIDAE',null,event)">Cached</a></span></div><div class="esc slp" id="poS4" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">Create a color specifier for the given <em>RGBA</em> values. (color/<em>hsva h s v a</em>) => color procedure. Create a color specifier for the given <em>HSVA</em> values. (color/<em>hlsa h l s a</em>) <b>...</b><br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="QIQ"> <div data-ved="0CD4QkgowBQ"> <div data-ved="0CD8QkQowBQ"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://download.autodesk.com/global/docs/softimage2013/en_us/userguide/files/shaderpresets690.htm" class="l" onmousedown="return rwt(this,'','','','6','AFQjCNEcMDDSe_MGltcAPZgdz_Xba5qhrA','','0CEAQFjAF',null,event)">Softimage User's Guide: Scalar Matte</a></h3><div class="s"><div class="f kv"><cite>download.autodesk.com/global/docs/.../en.../shaderpresets690.htm</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:7EBn3t17As0J:download.autodesk.com/global/docs/softimage2013/en_us/userguide/files/shaderpresets690.htm+&cd=6&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','6','AFQjCNH9A6Y1UtnSCaNB5fq9oEMv6l6dIQ','','0CEEQIDAF',null,event)">Cached</a></span></div><div class="esc slp" id="poS5" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">Determines which color model you will select a color from: <em>RGBA</em>, <em>HLSA</em>, or <em>HSVA</em>. Channel. Selects the color channel. You can only select from this parameter if <b>...</b><br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="ziI"> <div data-ved="0CEMQkgowBg"> <div data-ved="0CEQQkQowBg"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://packages.python.org/python-igraph/igraph.drawing.colors-module.html" class="l" onmousedown="return rwt(this,'','','','7','AFQjCNFXOQ9ZwDTXEYanvZb2l-be3NWrvg','','0CEUQFjAG',null,event)">igraph.drawing.colors.color_name_to_rgba</a></h3><div class="s"><div class="f kv"><cite>packages.python.org/python.../igraph.drawing.colors-module.html</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:YiCWholHT38J:packages.python.org/python-igraph/igraph.drawing.colors-module.html+&cd=7&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','7','AFQjCNE-O44T9i6KfHzrDu2_TN6q5khIDg','','0CEYQIDAG',null,event)">Cached</a></span></div><div class="esc slp" id="poS6" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="f">10+ items – </span><span class="st">Converts a color given by its <em>RGBA</em> coordinates to <em>HSVA</em> <b>...</b><br></span><table class="tsnip"><tbody><tr><td>hsla_to_rgba</td><td>l</td><td>1.0</td><td>Converts a color given by its <em>HSLA</em> coordinates (hue <b>...</b></td></tr><tr><td>rgba_to_hsva</td><td>b</td><td>1.0</td><td>Converts a color given by its <em>RGBA</em> coordinates to <b>...</b></td></tr></tbody></table></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="ELy"> <div data-ved="0CEkQkgowBw"> <div data-ved="0CEoQkQowBw"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://xrvg.rubyforge.org/rdoc/classes/XRVG/Color.html" class="l" onmousedown="return rwt(this,'','','','8','AFQjCNFa4U6Y0BceJtaHJrjPYBL9uluwiQ','','0CEsQFjAH',null,event)">Class: XRVG::Color</a></h3><div class="s"><div class="f kv"><cite>xrvg.rubyforge.org/rdoc/classes/XRVG/Color.html</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:xzEzVZQqjlsJ:xrvg.rubyforge.org/rdoc/classes/XRVG/Color.html+&cd=8&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','8','AFQjCNHAiQ1acFXS45OCe2dh8t04dv79Zw','','0CEwQIDAH',null,event)">Cached</a></span></div><div class="esc slp" id="poS7" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st"><b>...</b> blue complement format255 g g= green grey hsl <em>hsla hsla</em> hsv <em>hsva hsva</em> hue <b>...</b> rgb2h rgb2hsl rgb2hsv rgb2sl rgb2sv <em>rgba</em> saturation svg value white yellow <b>...</b><br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc" sig="AMw"> <div data-ved="0CE4QkgowCA"> <div data-ved="0CE8QkQowCA"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://pygame.readthedocs.org/en/latest/ref/color.html" class="l" onmousedown="return rwt(this,'','','','9','AFQjCNFAhMqlvpl3nu5ke-K5EGXFjdObuA','','0CFAQFjAI',null,event)">pygame.Color — Pygame v1.9.2 documentation</a></h3><div class="s"><div class="f kv"><cite>pygame.readthedocs.org/en/latest/ref/color.html</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:y8n_BdkVwnMJ:pygame.readthedocs.org/en/latest/ref/color.html+&cd=9&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','9','AFQjCNFUzsehzHw76YNkL-4oahK-n-JvIA','','0CFEQIDAI',null,event)">Cached</a></span></div><div class="esc slp" id="poS8" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">Color.<em>hsva</em>, —, Gets or sets the <em>HSVA</em> representation of the Color. <b>...</b> The Color class represents <em>RGBA</em> color values using a value range of 0-255. <b>...</b> The <em>HSLA</em> components are in the ranges H = [0, 360], S = [0, 100], V = [0, 100], A = [0, 100].<br></span></div></div><!--n--></li><!--m--><li class="g"><div class="vsc"> <div data-ved="0CFMQkgowCQ"> <div data-ved="0CFQQkQowCQ"> </div> </div><div class="vspib" aria-label="Result details" role="button" tabindex="0"><div class="vspii"><div class="vspiic"></div></div></div> <h3 class="r"><a href="http://pypixel.googlecode.com/svn-history/r5/trunk/pypixel.py" class="l" onmousedown="return rwt(this,'','','','10','AFQjCNFn-7_0BAq_upcNihBWBYg3GediLg','','0CFUQFjAJ',null,event)">#!/usr/bin/python # # TODO # Thread pypixel so that the end user <b>...</b></a></h3><div class="s"><div class="f kv"><cite>pypixel.googlecode.com/svn-history/r5/trunk/pypixel.py</cite><span class="vshid"><a href="http://webcache.googleusercontent.com/search?q=cache:-prP6iP75CAJ:pypixel.googlecode.com/svn-history/r5/trunk/pypixel.py+&cd=10&hl=en&ct=clnk&gl=au" onmousedown="return rwt(this,'','','','10','AFQjCNE0O_xZcdco81OFkgu3JTMawmr5LQ','','0CFYQIDAJ',null,event)">Cached</a></span></div><div class="esc slp" id="poS9" style="display:none">You +1'd this publicly. <a href="#" class="fl">Undo</a></div><span class="st">TODO # Write color wrapper for <em>HSVA</em>, <em>HSLA</em>, <em>RGBA</em> # import random as randy import pygame from pygame.locals import * # Screen size WIDTH = 640 HEIGHT <b>...</b><br></span></div></div><!--n--></li></ol>