Python正则表达式查找div标签内容

时间:2018-09-19 20:01:53

标签: python regex

您编写什么模式来查找标签之间的内容?

inputData = '<div class="media-story">content and content</div>'
pattern = '<div ???>(.*?)</div>'

pattern_matcher = re.compile(pattern)

2 个答案:

答案 0 :(得分:0)

以下应该可以解决问题:

import re

data = '<div class="media-story">content and content</div>'
match = re.search(r'<div.*>(.*)<\/div>', data)
if match:
    print match.group(1)

如果您想玩耍:https://regex101.com/r/wTosNH/1

答案 1 :(得分:0)

使用非捕获组捕获 #!/usr/bin/env node 'use strict' var express = require("express"); var app = express(); var axios = require("axios"); var bodyParser = require("body-parser"); var multer = require("multer"); var device = require('express-device'); app.use(device.capture()); //--------------------------------------------------------------------- app.set("trust proxy", true); app.use(function (req, res, next) { res.header("Access-Control-Allow-Origin", "*"); res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, PATCH, DELETE'); res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept"); next(); }); app.use(bodyParser.json()); // parse application/x-www-form-urlencoded app.use(bodyParser.urlencoded({ extended: true })); // parse multipart/form-data app.use(multer()); //----------------------------------------------------------------------- const port = process.env.PORT || 3000 app.listen(port, () => console.log(`Server is listening on port ${port}.`) ) app.get('/', (req, res) => res.send('Hello world!')) app.get('/:parametroUrl', async function (req, res) { var parametro = req.params.parametroUrl var dataObjectModel = { "body": { "request": { "data": { "id": parametro } , "objectDTO": null }, "response": null } }; return new Promise(resolve => { axios //url to make it fail .post( 'http://172.16.14.38:7001/x/api/url/redirecturl', dataObjectModel ) .then(response => { rastreoUrl = response.data.body.response.data res.writeHead(302, { Location: '' + rastreoUrl + '' }); res.end(); }) //should enter here and redirect to google .catch(reason => { console.log("-----------------GOOGLE-----------------------------") res.writeHead(302, { Location: '' + 'https://www.google.com' + '' }); res.end(); }); }); }); >并在匹配项中忽略, 匹配他们之间的任何东西。

<

demo at regex101

或使用环顾四周

regex = '(?:>).*(?:<)'