我正在尝试使用python webscrapping在python中使用漂亮的汤来提取一些信息。这是一节。
<home>
<div class="search-box"></div>
<form action="/file-upload" class="dropzone" id="my-awesome-dropzone"></form>
<script>
this.on('mount', function(){
new Dropzone(".dropzone");
})
</script>
</home>
我想在那里获得1.1值
我正在使用的部分代码是
<div class="result-value" data-reactid=".0.0.3.0.0.3.$0.1.1">
<span data-reactid=".0.0.3.0.0.3.$0.1.1.0">1.1</span>
<span class="result-value-unit" data-reactid=".0.0.3.0.0.3.$0.1.1.1">MB</span>
</div>
结果即将到来
try:
Area =soup.select(".result-value span")
print Area
except StandardError as e:
converted_date="Error was {0}".format(e)
print converted_date
有什么不对?
答案 0 :(得分:0)
假设您知道'use strict';
angular.module('App')
.service('AuthService', function($q, $http, API_ENDPOINT) {
var LOCAL_TOKEN_KEY = 'My big secret here';
var isAuthenticated = false;
var authToken;
function loadUserCredentials() {
var token = window.localStorage.getItem(LOCAL_TOKEN_KEY);
if (token) {
useCredentials(token);
}
}
function storeUserCredentials(token) {
window.localStorage.setItem(LOCAL_TOKEN_KEY, token);
useCredentials(token);
}
function useCredentials(token) {
isAuthenticated = true;
authToken = token;
// Set the token as header for your requests!
$http.defaults.headers.common.Authorization = authToken;
}
function destroyUserCredentials() {
authToken = undefined;
isAuthenticated = false;
$http.defaults.headers.common.Authorization = undefined;
window.localStorage.removeItem(LOCAL_TOKEN_KEY);
}
var register = function(user) {
return $q(function(resolve, reject) {
$http.post(API_ENDPOINT.url + '/signup', user).then(function(result) {
if (result.data.success) {
resolve(result.data.msg);
} else {
reject(result.data.msg);
}
});
});
};
var login = function(user) {
return $q(function(resolve, reject) {
$http.post(API_ENDPOINT.url + '/authenticate', user).then(function(result) {
if (result.data.success) {
storeUserCredentials(result.data.token);
resolve(result.data.msg);
} else {
reject(result.data.msg);
}
});
});
};
var logout = function() {
destroyUserCredentials();
};
loadUserCredentials();
return {
login: login,
register: register,
logout: logout,
isAuthenticated: function() {return isAuthenticated;},
};
})
.factory('AuthInterceptor', function ($rootScope, $q, AUTH_EVENTS) {
return {
responseError: function (response) {
$rootScope.$broadcast({
401: AUTH_EVENTS.notAuthenticated,
}[response.status], response);
return $q.reject(response);
}
};
})
.config(function ($httpProvider) {
$httpProvider.interceptors.push('AuthInterceptor');
});
的值,您可以获得正确的元素:
data-reactid
答案 1 :(得分:0)
同样,如果soup.find('span', {'data-reactid': '.0.0.3.0.0.3.$0.1.1.0'}).text
有效,则代码不会返回任何错误消息。您得到一条结果消息,至少显示您的try...except...
功能正在运行。我想问题出在您的htmlfile
上,该问题必须是bytes
而不是str
。我建议你稍微修改你的代码如下:
from urllib.request import urlopen
htmlfile = urlopen(url).read().decode('utf-8') # if errors occur here, try: htmlfile = urlopen(url).read().decode('utf-8', errors='ignore')
soup = BeautifulSoup(htmlfile, 'lxml')
然后继续其余的。