协助硒网刮

时间:2016-03-14 16:21:01

标签: python python-3.x selenium web web-scraping

我正试图抓住音乐家赢得的荣誉勋章的细节!所以here是指向音乐家列表的链接。然后当你点击每一个的视图时,它有一个整洁的RANK,COMPANY,DATE OF ISSUE等小表。我想从该表中删除数据。

from selenium import webdriver

driver = webdriver.Chrome("/Users/ashkij/Desktop/chromedriver")
driver.get("http://www.cmohs.org/search-results.php?q=&x=40&y=9&rank=Musician&organization=&division=&company=&conflict=")
person = driver.find_elements_by_class_name("div.floatElement.recipientView")
details = driver.find_elements_by_class_name("div.detailDatacol1") 
for i in details:
    print(i.text)

1 个答案:

答案 0 :(得分:0)

首先收集所有视图from selenium import webdriver driver = webdriver.Chrome() driver.get('http://www.cmohs.org/search-results.php?q=&x=40&y=9&rank=Musician&organization=&division=&company=&conflict=') # get hrefs view_links = driver.find_elements_by_class_name('recipientView') for index, view in enumerate(view_links): html = view.get_attribute('innerHTML') href = html.split('"')[1] view_links[index] = href # visit each href and get data for href in view_links: driver.get(href) detail_data = driver.find_elements_by_class_name('dataBar') for detail in detail_data: print(detail.text) ,然后访问每个视图并收集目标数据。

gui_Singleton = 1;
gui_State = struct('gui_Name',       mfilename, ...
                   'gui_Singleton',  gui_Singleton, ...
                   'gui_OpeningFcn', @GUI_Personality_Impressions_OpeningFcn, ...
                   'gui_OutputFcn',  @GUI_Personality_Impressions_OutputFcn, ...
                   'gui_LayoutFcn',  [] , ...
                   'gui_Callback',   []);
if nargin && ischar(varargin{1})
    gui_State.gui_Callback = str2func(varargin{1});
end

if nargout
    [varargout{1:nargout}] = gui_mainfcn(gui_State, varargin{:});
else
    gui_mainfcn(gui_State, varargin{:});
end
% End initialization code - DO NOT EDIT


% --- Executes just before GUI_Personality_Impressions is made visible.
function GUI_Personality_Impressions_OpeningFcn(hObject, eventdata, handles, varargin)
% This function has no output args, see OutputFcn.
% hObject    handle to figure
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
% varargin   command line arguments to GUI_Personality_Impressions (see VARARGIN)

% Choose default command line output for GUI_Personality_Impressions
handles.output = hObject;

% Update handles structure
guidata(hObject, handles);

% UIWAIT makes GUI_Personality_Impressions wait for user response (see UIRESUME)
% uiwait(handles.figure1);


% --- Outputs from this function are returned to the command line.
function varargout = GUI_Personality_Impressions_OutputFcn(hObject, eventdata, handles) 
% varargout  cell array for returning output args (see VARARGOUT);
% hObject    handle to figure
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
set(handles.axes1,'units','pixels');
set(handles.axes2,'units','pixels');
scrz=get(0,'ScreenSize')
% pos2=[(scrz(3)-800)/2  (scrz(4)-600)/2 800 600];
fig_hr = 326;
fig_vr = 493;


pos1 = round((scrz(3)-fig_hr)/4)
pos2 = round((scrz(4)-fig_vr)/2)

% fig_xcoord = (ScreenSize(3) - fig_width)/2; 

pos3 = [pos1 pos2 fig_hr fig_vr]
set(handles.axes1,'pos',[pos3]);
axes(handles.axes1);  
imshow('Chinese_eyes+2.tif');
% pos1 = round((scrz(3)-fig_hr)/  3)
posa = pos1 +1.5* round(fig_hr);
pos4 = [posa pos2 fig_hr fig_vr]
set(handles.axes2,'pos',[pos4]);
axes(handles.axes2);
imshow('Chinese_eyes+2.tif');
% % Get default command line output from handles structure
varargout{1} = handles.output;


% handles.FigureH = figure;
handles.radio1 = uicontrol('Style', 'radiobutton', ...
                           'Callback', @myRadio, ...
                           'Units',    'pixels', ...
                           'Tag',      'A1', ...
                           'Position', [(pos1+326+pos1)/2, pos2-70,70 ,50 ], ...
                           'String',   'A', ...                           
                           'Value',    1);
handles.radio2 = uicontrol('Style', 'radiobutton', ...
                           'Callback', @myRadio, ...
                           'Units',    'pixels', ...
                           'Position', [(posa+326+posa)/2, pos2-70,70 ,50], ...
                           'String',   'B', ...
                           'Tag',      'B1', ...
                           'Value',    0);

%  handles.Next= uicontrol('Style', 'pushbutton', ...
%                            'Callback', @pushbutton1, ...                           
%                            'Units',    'pixels', ...
%                            'Position', [(((pos1+326+pos1)/2)+(posa+326+posa)/2)/2, pos2- 140,70 ,50 ], ...
%                            'String',   'Next', ...
%                            'Value',    0);                    
% set(handles.Next,'Enable','off')                       

guidata(hObject, handles);



function myRadio(hObject,eventdata, handles)
global data

switch get(hObject,'Tag') % Get Tag of selected object.
    case 'A1'
       data=1;
        set(handles.radio2, 'Value', 0);
    case 'B1'
       data=2;    
      set(handles.radio1, 'Value',0);


end