从内部功能中断时如何访问外部功能?

时间:2020-05-08 14:36:59

标签: python python-3.x selenium

我开发了以下代码。通常情况下,它工作正常。但是,有时在下载记录(record())时,由于服务器无法响应或Internet断开而失败。为了避免破坏代码,我使用了“ try-exceptions”,但随后,我无法同时脱离record()和外部函数。因此,寻找解决方案。第197行左右的代码无助于从外部while循环继续执行代码。代码在这里:

import glob
import datetime
import cv2
import base64
from PIL import Image
from io import BytesIO
import time
import selenium
import self as self
from pytesseract import pytesseract
from selenium.webdriver.common.keys import Keys
import os
from selenium.webdriver.support import expected_conditions as EC, expected_conditions
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException, \
    WebDriverException, ElementNotInteractableException, UnexpectedAlertPresentException

main_Directory = r'/home/sangharshmanuski/Documents/e_courts/mha/downloads4'
log_Directory = r'/home/sangharshmanuski/Documents/e_courts/mha/log'
driver = selenium.webdriver.Firefox()
url = r'https://districts.ecourts.gov.in/'
driver.get(url)
# create wait time variable for regular, short and mid
wait = WebDriverWait(driver, 180)
waitShort = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#sateist > option:nth-child(22)")))
select = Select(driver.find_element_by_css_selector('#sateist'))
options = select.options
select.select_by_visible_text('Maharashtra')
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.region')))

    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))
    districtListDropdown = Select(driver.find_element_by_css_selector("#sateist"))
    distOptions = districtListDropdown.options

    # iterate over each district
    i = 1
    while i < len(distOptions):
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))
            newDistDropDown = Select(driver.find_element_by_css_selector("#sateist"))
        except:
            continue
        newDistOptions = newDistDropDown.options
        distName = newDistOptions[i].text
        print(distName)
        newDistDropDown.select_by_index(i)
        # for creating directory as per each district.
        district_directory = os.path.join(
            main_Directory, distName)  # create new
        if not os.path.exists(district_directory):  # if not directory exists, create one
            os.mkdir(district_directory)
        district_log_directory = os.path.join(log_Directory, distName)
        if not os.path.exists(district_log_directory):  # if not directory exists, create one
            os.mkdir(district_log_directory)
        headingDist = driver.find_element_by_css_selector('.heading')
        if headingDist.text.lower() == distName.lower():
            wait.until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.accordion2:nth-child(2)'))).click()
            current = driver.window_handles[0]
            wait.until(EC.element_to_be_clickable(
                (By.CSS_SELECTOR,
                 'div.panel:nth-child(3) > ul:nth-child(1) > li:nth-child(6) > a:nth-child(1)'))).click()
            # wait until new tab opens.
            wait.until(EC.number_of_windows_to_be(2))
            # define new tab by differentiating from current tab.
            newWindow = [window for window in driver.window_handles if window != current][0]
            # switch to the new tab. ref: https://stackoverflow.com/questions/41571217/python-3-5-selenium-how-to-handle-a-new-window-and-wait-until-it-is-fully-lo
            driver.switch_to.window(newWindow)
            # wait till court complex list appears.
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#court_complex_code')))
            # create list of all court complex.
            # 2 approaches - 1 select 2 click.
            time.sleep(3)


            def complex_and_act():
                this = driver.current_window_handle

                def imgtotxt():
                    elem = driver.find_element_by_id("captcha_image")
                    loc = elem.location
                    size = elem.size
                    left = loc['x']
                    top = loc['y']
                    width = size['width']
                    height = size['height']
                    box = (int(left), int(top), int(left + width), int(top + height))
                    screenshot = driver.get_screenshot_as_base64()
                    img = Image.open(BytesIO(base64.b64decode(screenshot)))
                    area = img.crop(box)
                    area.save('/home/sangharshmanuski/Documents/e_courts/captcha/file_trial.png', 'PNG')
                    fullPath = r'/home/sangharshmanuski/Documents/e_courts/captcha'
                    f = os.listdir(fullPath)
                    desPath = r"/home/sangharshmanuski/Documents/e_courts/editC"
                    img = cv2.imread(os.path.join(fullPath, 'file_trial.png'), 0)
                    ret, thresh1 = cv2.threshold(img, 111, 255, cv2.THRESH_BINARY)
                    cv2.imwrite('/home/sangharshmanuski/Documents/e_courts/editC/oneDisNoLoop.png', thresh1)
                    # know the text with pytesseract
                    captchaText = pytesseract.image_to_string(
                        Image.open('/home/sangharshmanuski/Documents/e_courts/editC/oneDisNoLoop.png'))
                    captcha = driver.find_element_by_id('captcha')
                    captcha.send_keys(captchaText)
                    driver.find_element_by_css_selector('input.button:nth-child(1)').click()
                    time.sleep(1)

                def proceed():
                    while True:
                        try:
                            waitShort.until(EC.alert_is_present())
                            driver.switch_to.alert.accept()
                            driver.switch_to.window(this)
                            driver.find_element_by_css_selector(
                                '#captcha_container_2 > div:nth-child('
                                '1) > div:nth-child(1) > span:nth-child(3) > a:nth-child(7) > img:nth-child(1)').click()
                            log_file = open(os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                            log_file.write('alert was present' + '\n')
                            print('alert was present')
                            imgtotxt()
                        except:
                            # if the waitmsg is on, wait for 5 sec
                            log_file = open(os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                            log_file.write('no alert' + '\n')
                            print('no alert')
                            waitmsg = 0
                            while driver.find_element_by_css_selector('#waitmsg').is_displayed():
                                if waitmsg < 7:
                                    log_file = open(
                                        os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                                    log_file.write('wait' + '\n')
                                    print('waitmsg')
                                    time.sleep(1)
                                    waitmsg += 1
                                else:
                                    log_file = open(os.path.join(
                                        log_Directory, nameCourtComp + '.txt'), 'a')
                                    log_file.write('waiting finished' + '\n')
                                    print('waiting finished')
                                    break
                            invalidCaptcha = "Invalid Captcha"
                            norecord = "Record Not Found"
                            try:
                                waitShort.until(
                                    EC.presence_of_element_located((By.CSS_SELECTOR, '#errSpan > p:nth-child(1)')))
                                incorrect = driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').text
                                if incorrect == invalidCaptcha:
                                    log_file = open(
                                        os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                                    log_file.write('Invalid Captcha' + '\n')
                                    print('invalid captcha')
                                    imgtotxt()
                                    continue
                                else:
                                    if incorrect == norecord:
                                        log_file = open(
                                            os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                                        log_file.write('Record not Found' + '\n')
                                        return print('record not found')

                            except:
                                pass

                            def record():
                                log_file = open(
                                    os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                                log_file.write('Record Found' + '\n')
                                print('record fun started')
                                wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.someclass')))
                                listAllView = driver.find_elements_by_css_selector(
                                    'a.someclass')
                                # make new dirctory by name of Court Complex
                                distDir2 = os.path.join(
                                    main_Directory, distName, nameCourtComp)
                                if not os.path.exists(distDir2):
                                    os.makedirs(distDir2)
                                x = 0
                                for view in listAllView:
                                    try:
                                        view.click()
                                        wait.until(EC.presence_of_element_located((By.ID, 'back_top')))
                                        openFile = open(
                                            os.path.join(distDir2, "file_" + str(x) + ".html"), "w")
                                        openFile.write(driver.page_source)
                                        openFile.close()
                                        back = driver.find_element_by_id('back_top')
                                        back.click()
                                        x += 1
                                    except (TimeoutException, ElementNotInteractableException):

                                        driver.refresh()
                                        log_file = open(
                                            os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                                        log_file.write(
                                            'While Downloading record for '
                                            + nameCourtComp + ' error occured, retrying now...' + '\n')
                                        nonlocal courtComp
                                        courtComp -= 1
                                        return print(
                                            'While Downloading record for '
                                            + nameCourtComp + ' error occured, retrying now...')
                                log_file = open(
                                    os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                                log_file.write('record completed, ' + str(x) + ' records found' + '\n')
                                print('record completed, ' + str(x) + ' records found')
                                return

                            record()
                            return
                courtComp = 1
                courtComplexDownload = Select(
                    driver.find_element_by_css_selector('#court_complex_code'))
                courtComplexDownloadList = courtComplexDownload.options
                courtComplexLen = len(courtComplexDownloadList)
                while courtComp < courtComplexLen:
                    nameCourtComp = courtComplexDownloadList[courtComp].text
                    log_file = open(os.path.join(log_Directory, nameCourtComp + '.txt'), 'w')
                    log_file.write(nameCourtComp + '\n' + '\n')
                    print(nameCourtComp)
                    courtComplexDownload.select_by_index(courtComp)
                    acts = Select(driver.find_element_by_css_selector('#actcode'))
                    actsOpt = acts.options
                    act = 0
                    while len(actsOpt) < 2:
                        if act < 10:

                            time.sleep(1)
                            act += 1
                        else:
                            #if there is no list to populate break out of this loop & go to next complex
                            raise Exception()
                    try:
                        acts.select_by_value('33')
                    except NoSuchElementException:
                        print('PoA not applicable')
                        log_file = open(
                            os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
                        log_file.write('No PoA' + '\n')
                        courtComp += 1
                        continue

                    imgtotxt()
                    proceed()
                    courtComp += 1


            complex_and_act()
            driver.close()
            print("all court complexes in " + distName + " completed")
            driver.switch_to.window(current)
            driver.back()

        else:
            time.sleep(5)
            continue
        i += 1

    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#sateist > option:nth-child(22)")))
    select = Select(driver.find_element_by_css_selector('#sateist'))
    options = select.options
    select.select_by_visible_text('Maharashtra')
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.region')))
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))

0 个答案:

没有答案