我有这个html结构:
angular.module('starter.controllers', [])
.controller('DashCtrl', function($scope, $rootScope, $ionicUser, $ionicPush) {
// // Handles incoming device tokens
$rootScope.$on('$cordovaPush:tokenReceived', function(event, data) {
alert("Successfully registered token " + data.token);
console.log('Ionic Push: Got token ', data.token, data.platform);
$scope.token = data.token;
});
// // Identifies a user with the Ionic User service
$scope.identifyUser = function() {
console.log('Ionic User: Identifying with Ionic User service');
var user = $ionicUser.get();
if(!user.user_id) {
// Set your user_id here, or generate a random one.
user.user_id = $ionicUser.generateGUID();
};
// Add some metadata to your user object.
angular.extend(user, {
name: 'Ionitron',
bio: 'I come from planet Ion'
});
// Identify your user with the Ionic User Service
$ionicUser.identify(user).then(function(){
$scope.identified = true;
alert('Identified user ' + user.name + '\n ID ' + user.user_id);
});
};
$scope.pushRegister = function() {
console.log('Ionic Push: Registering user');
alert('bla');
// Register with the Ionic Push service. All parameters are optional.
$ionicPush.register({ //ERROR FIRES HERE
canShowAlert: true, //Can pushes show an alert on your screen?
canSetBadge: true, //Can pushes update app icon badges?
canPlaySound: true, //Can notifications play a sound?
canRunActionsOnWake: true, //Can run actions outside the app,
onNotification: function(notification) {
// Handle new push notifications here
// console.log(notification);
return true;
}
});
alert('bla2');
};
})
在我的python脚本中,我写道:
<div class="foo">
<h3>Title</h3>
<br>Some text I want to retrieve. <br><br> This text too.
<br> (numbers and position of "br" tag indetermined) And this one too.
<div class="subfoo">Some other text I don't want.</div>
</div>
正如所料,我得到了全文:
exampleSoup = bs4.BeautifulSoup(res.text, "html.parser")
elems = exampleSoup.select('.foo')
print(elems[0].getText())
如何只获得div中没有标记的字符串,即:“我想要检索的一些文本。这个文本也是。这个也是。”? 谢谢你的帮助。
答案 0 :(得分:1)
您可以使用.next_sibling
获取树中的下一个元素。
示例强>
>>> soup = BeautifulSoup(html)
>>> print soup.prettify()
<html>
<body>
<div class="foo">
<h3>
Title
</h3>
Some text I want to retrieve.
<div class="subfoo">
Some other text I don't want.
</div>
</div>
</body>
</html>
>>> print soup.find('div', { 'class' : 'foo' } ).h3.next_sibling.strip()
Some text I want to retrieve.