我在urls.csv中有一个网址列表
http://www.indianngos.org/ngo_detail.aspx?nprof=292241149
http://www.indianngos.org/ngo_detail.aspx?nprof=9986241242
http://www.indianngos.org/ngo_detail.aspx?nprof=319824125
我的代码如下:
import requests
from bs4 import BeautifulSoup
import csv
with open('urls.csv' , 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for line in csv_reader:
r = requests.get(line[0]).text
soup = BeautifulSoup(r,'lxml')
csv_file = open('output.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Ngoname', 'CEO', 'City', 'Address', 'Phone', 'Mobile', 'E-mail'])
# print(soup.prettify())
ngoname = soup.find('h1')
print('NGO Name :', ngoname.text)
ceo = soup.find('h2', class_='').text
ceo_name = ceo.split(':')
print('CeoName:', ceo_name[1])
city = soup.find_all('span')
print('City :', city[5].text)
address = soup.find_all('span')
print('Address :', address[6].text)
phone = soup.find_all('span')
print('Phone :', phone[7].text)
mobile = soup.find_all('span')
print('Mobile :', mobile[8].text)
email = soup.find_all('span')
print('Email_id :', email[9].text)
csv_writer.writerow([ngoname.text, ceo_name[1], city[5].text, address[6].text, phone[7].text, mobile[8].text, email[9].text])
csv_file.close()
我只是从这个刮刀中获取最后一个网址的数据。 如何从输出csv
中的每个URL下方获取数据答案 0 :(得分:0)
您需要为所有三个CSV文件保持输出文件处于打开状态。目前,您每次都会覆盖:
class WoActivity : BaseActivity()
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
val binding = DataBindingUtil.setContentView(this, R.layout.activity_wo) as ActivityWoBinding
binding.wo = WorkOrder()
setSupportActionBar(binding.toolbar)
supportActionBar!!.setDisplayHomeAsUpEnabled(true)
binding.setHandler(this);
binding.setManager(getSupportFragmentManager());
}
companion object {
@BindingAdapter("handler") @JvmStatic
fun bindViewPagerAdapter(view: ViewPager, activity: WoActivity) {
val adapter = WOPagerAdapter(activity.supportFragmentManager)
adapter.addFragment(WoTabWoFragment.newInstance(), view.context.getString(R.string.work_order))
adapter.addFragment(WoTabScheFragment.newInstance(), view.context.getString(R.string.scheduling))
view.adapter = adapter
}
@BindingAdapter("pager") @JvmStatic
fun bindViewPagerTabs(view: TabLayout, pagerView: ViewPager) {
view.setupWithViewPager(pagerView, true)
}
}
}
这种方法会为您提供一个包含以下内容的输出文件:
import requests
from bs4 import BeautifulSoup
import csv
with open('urls.csv', newline='') as f_urls, open('output.csv', 'w', newline='') as f_output:
csv_urls = csv.reader(f_urls)
csv_output = csv.writer(f_output)
csv_output.writerow(['Ngoname', 'CEO', 'City', 'Address', 'Phone', 'Mobile', 'E-mail'])
for line in csv_urls:
r = requests.get(line[0]).text
soup = BeautifulSoup(r, 'lxml')
ngoname = soup.find('h1')
print('NGO Name :', ngoname.text)
ceo = soup.find('h2', class_='').text
ceo_name = ceo.split(':')
print('CeoName:', ceo_name[1])
city = soup.find_all('span')
print('City :', city[5].text)
address = soup.find_all('span')
print('Address :', address[6].text)
phone = soup.find_all('span')
print('Phone :', phone[7].text)
mobile = soup.find_all('span')
print('Mobile :', mobile[8].text)
email = soup.find_all('span')
print('Email_id :', email[9].text)
csv_output.writerow([ngoname.text, ceo_name[1], city[5].text, address[6].text, phone[7].text, mobile[8].text, email[9].text])