我有一个包含多个javascript标签的html页面。我想从特定标记中提取数据的问题:
<head>
...
</head>
<body>
...
<script type="text/javascript">
$j(document).ready(function() {
if (!($j.cookie("ios"))) {
new $c.free.widgets.FreeAdvDialog().open();
$j.cookie("ios", "seen", { path: '/', expires: 10000});
};
ajax_keys = ["d24349f205e3deb7f1015f42d3a14da7205b62e4", "0ae78c4797d47745ebd44e2754367da10c6f56a4", "567b2bfb6fd1aee784115da54e5e116a280ee225", "fc5cd251be46ff101c471553d52c07bf08c9aa65"];
var is_dm = false;
/* async chart loader */
var chart = new $c.free.widgets.Chart({
target: $j('#graph'),
width: 990,
height: 275,
site: "911.com",
source_panel: 'us'
});
var chart_view = new $c.free.widgets.ChartView({
chart: chart,
csv_button: 'csv-export',
save_button: 'graph-image',
embed_button: 'embed-graph',
key: ajax_keys[1]
});
chart_view.render();
/* zoom info initialization */
var zoom_info = new $c.free.widgets.ZoomInfo({
site: "911.com",
el: '#zoominfo',
key: ajax_keys[3]
});
zoom_info.load();
/* compete numbers initialization */
var compete_numbers = new $c.free.widgets.CompeteNumbers({
site: "911.com",
key: ajax_keys[0],
el: '#compete_numbers'
});
compete_numbers.load();
/* DM Marketing widget init */
new $c.free.widgets.DMSignupMessage({
is_dm: is_dm,
compete_numbers: compete_numbers
});
/* personalization initialization */
var logged_in_as = null;
var d = {
site_name: "911.com",
logged_in_as: logged_in_as,
current_source_panel: {"display_abbreviation": "us", "panel_name": "us", "image_url": "http://media.compete.com/site_media/images/icons/flag_us.gif", "id": 1, "display_name": "United States"}
};
var auth_model = new $c.free.widgets.FreeLoginModel(d);
var links_opts = { model: auth_model };
var links_view = new $c.free.widgets.FreeAccountLinksView(links_opts);
var sites_view = new $c.free.widgets.FollowSiteButtonView(links_opts);
var manage_view = new $c.free.widgets.ManageSitesListButtonView(links_opts);
var sites = new $c.free.widgets.SimilarSitesCollection([], {
site: "911.com",
source_panel: 'us',
key: ajax_keys[2],
auth: auth_model
});
var graph = new $c.free.widgets.BarGraph({
el: $j('#similar-sites'),
collection: sites
});
// tell KISSMetrics where we are
// also identify user so KM console can refer to them by email
if(logged_in_as != null) {
_kmq.push(['identify', logged_in_as]);
}
_kmq.push(['record', 'Viewed Free Site Analytics Report (M)']);
});
...
如何从页面的特定标签获取 ajax_keys (即“d24349f205e3deb7f1015f42d3a14da7205b62e4”)?
P.S。我试图在python脚本中使用正则表达式,但我无法从标记中检索必要的元素。
感谢您的帮助。
答案 0 :(得分:2)
如果您使用像BeautifulSoup这样的库,您可以获取特定的脚本标记,然后对标记的内容而不是整个文档使用正则表达式。
那就是说,假设只有一个ajax_keys
,它看起来像正则表达式一样:
import re
ajaxre = re.compile(r"^\s+ajax_keys = ([^;]+)", re.MULTILINE)
ajax_string = ajaxre.match(source).group(1)
# to get it as a python list
import json
ajax_keys = json.loads(ajax_string)
编辑:感谢@Karl Knechtel为json.loads