我需要以字符串数组的形式获取所选文件夹的内容,以便为for循环中的每个文件运行脚本。我目前的处理方式是将内容作为char二维数组接收。
有没有办法直接将内容作为字符串数组获取,而不是循环运行每一行并将其转换为String?
directory = uigetdir;
list = ls(strcat(directory, '\*.extension'))
for i = 1: ??
答案 0 :(得分:1)
您要使用the dir
function:
script_path = os.path.dirname(os.path.realpath(__file__))
driver = webdriver.PhantomJS(executable_path="/usr/local/bin/bin/phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
case_list = []
#this function launches the headless browser and gets us to the first page of results, which we'll scrape using main
def search():
driver.get('https://www.courts.mo.gov/casenet/cases/nameSearch.do')
if 'Service Unavailable' in driver.page_source:
log('Casenet website seems to be down. Receiving "service unavailable"')
driver.quit()
gc.collect()
return False
time.sleep(2)
court = Select(driver.find_element_by_id('courtId'))
court.select_by_visible_text('All Participating Courts')
case_enter = driver.find_element_by_id('inputVO.lastName')
case_enter.send_keys('Wakefield & Associates')
year_enter = driver.find_element_by_id('inputVO.yearFiled')
year_enter.send_keys('2018')
driver.find_element_by_id('findButton').click()
time.sleep(3)
#scrapes table and stores what we need in a list of lists
def main():
parties = []
dates = []
case_nums = []
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
table = soup.findAll('table', {'class':'outerTable'})
for row in table:
col = row.find_all('td', attrs={'class':'td1'})
col2 = row.find_all('td', attrs={'class':'td2'})
all_links = soup.findAll('a')
for cols in col:
if 'V' in cols.text:
cols = cols.string
cols.encode('utf-8').strip()
cols = re.sub("\xa0'\'", '', cols).strip()
parties.append(cols)
for cols in col2:
if 'V' in cols.text:
cols = cols.string
cols.encode('utf-8').strip()
cols = re.sub("\xa0'\'", '', cols).strip()
parties.append(cols)
for link in all_links:
raw_html = str(link)
if 'goToThisCase' in raw_html:
start = raw_html.find("('") + 2
end = raw_html.find("',")
case = raw_html[start:end].strip()
case_nums.append(case)
for i in col:
if '/2018' in i.text:
i = i.string
i.encode('utf-8').strip()
i = re.sub("\xa0", '', i).strip()
dates.append(i)
for j in col2:
if '/2018' in j.text:
j = j.string
j.encode('utf-8').strip()
j = re.sub("\xa0", '', j).strip()
dates.append(j)
case_list.append(parties)
case_list.append(case_nums)
case_list.append(dates)
return case_list
def page_looper():
main()
count = '1'
print "page %s fully scraped" % count
count = str(int(count) +1)
print len(case_list), " cases so far"
print case_list
for count in range(2,9):
link = driver.find_element_by_link_text(str(count))
link.click()
time.sleep(2)
main()
print "page %s fully scraped" % count
count = str(int(count) +1)
print len(case_list), " cases so far"
print case_list
next_page_link = driver.find_element_by_partial_link_text('Next')
print "Next 10 pages found"
next_page_link.click()
time.sleep(2)
try:
page_looper()
except Exception:
print "no more cases"
#pprint.pprint(case_list)
#data = zip(case_list[0],case_list[1],case_list[2])
#pprint.pprint(data)
# with open(script_path + "/cases.csv", "w") as f:
# writer = csv.writer(f)
# for d in data:
# writer.writerow(d)
search()
page_looper()