一种在python中使用selenium webdriver匹配来自不同页面的某些字段的方法

时间:2015-09-03 10:39:22

标签: python selenium xpath selenium-webdriver webdriver

我正在尝试访问不同页面,在某些字段中插入名称/电子邮件,然后按一个按钮提交这些字段。

现在,我发现了一种使用webdriver在所有页面上匹配电子邮件/名称的方法,即使它们的html结构不同。我使用以下代码:

import logging
from selenium.common.exceptions import ErrorInResponseException, \
    WebDriverException

from selenium.webdriver.common.keys import Keys
from pyvirtualdisplay import Display
from selenium import webdriver
import lxml.html
import urlparse
import time
import re


def subscribe(email, name):
    display = Display(visible=0, size=(800, 600))

    dom = lxml.html.parse('http://muncheye.com')
    url = dom.docinfo.URL
    driver = webdriver.Chrome()

    failed_urls = []
    i = 0

    to_visit_urls = dom.xpath('//div[@id="right-column"]//a/@href')
    print(len(to_visit_urls))

    """
    Visit each url. Check to be alive. Search form.
    """
    for link in to_visit_urls:
        not_found = False
        name_required = True
        email_required = True
        button_required = True

        dom1 = lxml.html.parse(urlparse.urljoin(url, link))

        submit_url = dom1.xpath(
            '//div[@class="product_info"]//table//tr[7]//td[2]//a/@href')[0]

        if re.match('https?://(?:www\.|(?!www))[^\s\.]+\.[^\s]{2,}|www\.['
                    '^\s]+\.[^\s]{2,}', submit_url):
            time.sleep(10)
            try:
                driver.get(submit_url)
                try:
                    name_box = driver.find_element_by_xpath(
                        "//input[@*[contains(translate(., "
                        "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                        "'abcdefghijklmnopqrstuvwxyz'), 'name')]]")
                    name_box.click()
                    name_box.clear()
                    name_box.send_keys(email)
                except Exception:
                    not_found = True

                try:
                    email_box = driver.find_element_by_xpath(
                        "//input[@*[contains(translate(., "
                        "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                        "'abcdefghijklmnopqrstuvwxyz'), 'email')]]")
                    email_box.click()
                    email_box.clear()
                    email_box.send_keys(email)
                except Exception:
                    not_found = True

                if not_found:
                    i += 1
                    print "here" + " = " + str(i) + " link = " + str(submit_url)
                    for element in driver.find_elements_by_xpath(
                            "//input[@type='text']"):
                        if name_required:
                            try:
                                name_box = element.find_element_by_xpath(
                                    ".[@*[contains(translate(., "
                                    "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                                    "'abcdefghijklmnopqrstuvwxyz'), 'name')]]")
                                name_box.click()
                                name_box.clear()
                                name_box.send_keys(name)
                                name_required = False
                                continue
                            except Exception:
                                pass

                        if email_required:
                            try:
                                email_box = element.find_element_by_xpath(
                                    ".[@*[contains(translate(., "
                                    "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                                    "'abcdefghijklmnopqrstuvwxyz'), 'email')]]")
                                email_box.click()
                                email_box.clear()
                                email_box.send_keys(email)
                                email_required = False
                                break
                            except Exception:
                                pass

                        if (not name_required) and (not email_required) and (
                                not button_required):
                            break

                for element1 in driver.find_elements_by_xpath(
                        "//*[@type[translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                        "'abcdefghijklmnopqrstuvwxyz') = 'submit']]["
                        "preceding::*[@name[translate(., "
                        "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                        "'abcdefghijklmnopqrstuvwxyz') ='email' or translate("
                        "., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                        "'abcdefghijklmnopqrstuvwxyz') ='name']]]"):
                    if button_required:
                        try:
                            button = element1.find_element_by_xpath(
                                "//*[@type[translate(., "
                                "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                                "'abcdefghijklmnopqrstuvwxyz') = 'submit']]["
                                "preceding::*[@name[translate(., "
                                "'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                                "'abcdefghijklmnopqrstuvwxyz') ='email' or "
                                "translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
                                "'abcdefghijklmnopqrstuvwxyz') "
                                "='name']]]").click()
                            element1.click()
                            element1.send_keys(Keys.ENTER)
                            element1 = False
                            continue
                        except Exception:
                            try:
                                element1.find_element_by_xpath(
                                    "//*[@name='email' or "
                                    "@name='name']//following::*["
                                    "@type='submit']/a").click()
                                element1.click()
                                element1.send_keys(Keys.ENTER)
                                button_required = False
                            except Exception:
                                pass
            except WebDriverException:
                logging.exception('Chrome crashed')
                driver.close()
                driver = webdriver.Chrome()
                to_visit_urls.append(link)
            except Exception as e:
                logging.exception("Fail here:{0}".format(submit_url))
                failed_urls.append(submit_url)
                pass  # this 'pass' is here because when the script passed
                # from link 33, it gives me fail on all of them

            time.sleep(5)
            print button_required

    return failed_urls


print subscribe('hfbfsdfsdf@freeletter.me', 'hfbfsdfsdf@freeletter.me')

现在,我不知道问题是来自源代码还是webdriver / xpath,但是当按钮尝试提交这些字段时我不认为它在页面上找到,因为我只得到5 /来自100个可用链接的6封电子邮件。

现在,问题是:任何人都可以给我一个更好的xpath表达式,如果页面彼此不同,它们能够按下按钮/填充名称/电子邮件字段吗?

1 个答案:

答案 0 :(得分:-1)

更合适的方法是根据每个页面以某种特定方式查找每对元素。这样做直到你真正找到元素,然后用它们做东西。我不知道HTML的样子,所以我无法向您展示实际的代码。