使用Python Selenium Webdriver重新格式化抓取的数据。

时间:2018-10-20 14:04:11

标签: python-3.x pandas selenium-webdriver

我想提取网页上所有品牌的名称,然后将其粘贴到excel(xlsx)文件中。我使用的代码是:

browser.get("https://www.bluesign.com/industry/manufacturers/references.html")
time.sleep(2)
item_list_new=[]
link = browser.find_element_by_xpath("//div[@class='defaultMain']")
item_list_new.append(link.text)
print(link.text)

links_list_dict = {"shows":item_list_new}
df_links_list = pd.DataFrame(links_list_dict)
df_links_list.to_excel("bluesign.xlsx")

问题是输出(品牌名称)似乎位于单行单列中。我希望它出现在不同的行中。请帮忙。

1 个答案:

答案 0 :(得分:0)

只需使用:

procedure TForm_Config.FormCreate(Sender: TObject);
type
  tagWAVEOUTCAPS2A = packed record
    wMid: WORD;
    wPid: WORD;
    vDriverVersion: MMVERSION;
    szPname: array[0..MAXPNAMELEN-1] of AnsiChar;
    dwFormats: DWORD;
    wChannels: WORD;
    wReserved1: WORD;
    dwSupport: DWORD;
    ManufacturerGuid: System.TGUID;
    ProductGuid: System.TGUID;
    NameGuid: System.TGUID;
  end;
var
  i,outdevs: Integer;
  woCaps: tagWAVEOUTCAPS2A;
  RegistryService: TRegistry;
  iClasses, iSubClasses, iNames: Integer;
  audioDeviceClasses, audioDeviceSubClasses, audioDeviceNames: TStringList;
  initialDeviceName, partialDeviceName, fullDeviceName: string;
begin
  audioDeviceClasses := TStringList.Create;
  audioDeviceSubClasses := TStringList.Create;
  audioDeviceNames := TStringList.Create;
  try
    RegistryService := TRegistry.Create;
    try
      RegistryService.RootKey := HKEY_LOCAL_MACHINE;
      if RegistryService.OpenKeyReadOnly('\SYSTEM\CurrentControlSet\Enum\HDAUDIO\') then begin
        RegistryService.GetKeyNames(audioDeviceClasses);
        RegistryService.CloseKey();
        for iClasses := 0 to audioDeviceClasses.Count - 1 do begin
          if RegistryService.OpenKeyReadOnly('\SYSTEM\CurrentControlSet\Enum\HDAUDIO\'+audioDeviceClasses[iClasses]) then begin
            RegistryService.GetKeyNames(audioDeviceSubClasses);
            RegistryService.CloseKey();
            for iSubClasses := 0 to audioDeviceSubClasses.Count - 1 do begin
              if RegistryService.OpenKeyReadOnly('\SYSTEM\CurrentControlSet\Enum\HDAUDIO\'+audioDeviceClasses[iClasses]+'\'+audioDeviceSubClasses[iSubClasses]) then begin
                if RegistryService.ValueExists('DeviceDesc') then begin
                  fullDeviceName := Trim(RegistryService.ReadString('DeviceDesc'));
                  if AnsiPos(';',fullDeviceName) > 0 then begin
                    fullDeviceName := Trim(AnsiMidStr(fullDeviceName, AnsiPos(';',fullDeviceName)+1, Length(fullDeviceName)));
                  end;
                  audioDeviceNames.Add(fullDeviceName);
                end;
                RegistryService.CloseKey();
              end;
            end;
          end;
        end;
      end;
    finally
      FreeAndNil(RegistryService);
    end;

    // WaveOutDevComboBox is a selection box (combo) placed in the form and will receive the list of output audio devices
    WaveOutDevComboBox.Clear;

    try
      outdevs := waveOutGetNumDevs;
      for i := 0 to outdevs - 1 do begin
        ZeroMemory(@woCaps, sizeof(woCaps));
        if waveOutGetDevCaps(i, @woCaps, sizeof(woCaps)) = MMSYSERR_NOERROR then begin
          RegistryService := TRegistry.Create;
          try
            RegistryService.RootKey := HKEY_LOCAL_MACHINE;
            if RegistryService.OpenKeyReadOnly('\System\CurrentControlSet\Control\MediaCategories\' + GUIDToString(woCaps.NameGuid)) then begin
              WaveOutDevComboBox.Items.Add(RegistryService.ReadString('Name'));
              RegistryService.CloseKey();
            end
            else begin
              initialDeviceName := '';
              partialDeviceName := Trim(woCaps.szPname);
              if AnsiPos('(',partialDeviceName) > 0 then begin
                initialDeviceName := Trim(AnsiLeftStr(partialDeviceName,AnsiPos('(',partialDeviceName)-1));
                partialDeviceName := Trim(AnsiMidStr(partialDeviceName,AnsiPos('(',partialDeviceName)+1,Length(partialDeviceName)));
                if AnsiPos(')',partialDeviceName) > 0 then begin
                  partialDeviceName := Trim(AnsiLeftStr(partialDeviceName,AnsiPos(')',partialDeviceName)-1));
                end;
              end;
              for iNames := 0 to audioDeviceNames.Count - 1 do begin
                fullDeviceName := audioDeviceNames[iNames];
                if AnsiStartsText(partialDeviceName,fullDeviceName) then begin
                  break;
                end
                else begin
                  fullDeviceName := partialDeviceName;
                end;
              end;
              WaveOutDevComboBox.Items.Add(initialDeviceName + IfThen(initialDeviceName<>EmptyStr,' (','') + fullDeviceName + IfThen(initialDeviceName<>EmptyStr,')',''));
            end;
          finally
            FreeAndNil(RegistryService);
          end;
        end;
      end;
    except
      WaveOutDevComboBox.Enabled := False;
    end;
  finally
    FreeAndNil(audioDeviceClasses);
    FreeAndNil(audioDeviceSubClasses);
    FreeAndNil(audioDeviceNames);
  end;
end;

例如:

>>> df_links_list = pd.DataFrame(links_list_dict).T #Transpose

如果您不需要标题和索引,可以使用:

>>> a = {"N":["a","b","c","d"]}
>>> c = pd.DataFrame(a).T
>>>c
Out[10]: 
       0  1  2  3
   N   a  b  c  d