用美丽的汤刮取csv中的网址列表

时间:2018-01-22 14:31:36

标签: python csv web-scraping beautifulsoup export-to-csv

我在urls.csv中有一个网址列表

http://www.indianngos.org/ngo_detail.aspx?nprof=292241149
http://www.indianngos.org/ngo_detail.aspx?nprof=9986241242
http://www.indianngos.org/ngo_detail.aspx?nprof=319824125

我的代码如下:

import requests
from bs4 import BeautifulSoup
import csv

with open('urls.csv' , 'r') as csv_file:
csv_reader = csv.reader(csv_file)

for line in csv_reader:
    r = requests.get(line[0]).text

    soup = BeautifulSoup(r,'lxml')

    csv_file = open('output.csv', 'w')

    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['Ngoname', 'CEO', 'City', 'Address', 'Phone', 'Mobile', 'E-mail'])
    # print(soup.prettify())

    ngoname = soup.find('h1')
    print('NGO Name :', ngoname.text)

    ceo = soup.find('h2', class_='').text
    ceo_name = ceo.split(':')
    print('CeoName:', ceo_name[1])

    city = soup.find_all('span')
    print('City :', city[5].text)

    address = soup.find_all('span')
    print('Address :', address[6].text)

    phone = soup.find_all('span')
    print('Phone :', phone[7].text)

    mobile = soup.find_all('span')
    print('Mobile :', mobile[8].text)

    email = soup.find_all('span')
    print('Email_id :', email[9].text)

 csv_writer.writerow([ngoname.text, ceo_name[1], city[5].text, address[6].text, phone[7].text, mobile[8].text, email[9].text])

csv_file.close()

我只是从这个刮刀中获取最后一个网址的数据。 如何从输出csv

中的每个URL下方获取数据

1 个答案:

答案 0 :(得分:0)

您需要为所有三个CSV文件保持输出文件处于打开状态。目前,您每次都会覆盖:

class WoActivity : BaseActivity()

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)

        val binding = DataBindingUtil.setContentView(this, R.layout.activity_wo) as ActivityWoBinding

        binding.wo = WorkOrder()

        setSupportActionBar(binding.toolbar)
        supportActionBar!!.setDisplayHomeAsUpEnabled(true)

        binding.setHandler(this);
        binding.setManager(getSupportFragmentManager());

    }

    companion object {
        @BindingAdapter("handler") @JvmStatic
        fun bindViewPagerAdapter(view: ViewPager, activity: WoActivity) {
            val adapter = WOPagerAdapter(activity.supportFragmentManager)
            adapter.addFragment(WoTabWoFragment.newInstance(), view.context.getString(R.string.work_order))
            adapter.addFragment(WoTabScheFragment.newInstance(), view.context.getString(R.string.scheduling))
            view.adapter = adapter
        }

        @BindingAdapter("pager") @JvmStatic
        fun bindViewPagerTabs(view: TabLayout, pagerView: ViewPager) {
            view.setupWithViewPager(pagerView, true)
        }
    }
}

这种方法会为您提供一个包含以下内容的输出文件:

import requests
from bs4 import BeautifulSoup
import csv

with open('urls.csv', newline='') as f_urls, open('output.csv', 'w', newline='') as f_output:
    csv_urls = csv.reader(f_urls)
    csv_output = csv.writer(f_output)
    csv_output.writerow(['Ngoname', 'CEO', 'City', 'Address', 'Phone', 'Mobile', 'E-mail'])

    for line in csv_urls:
        r = requests.get(line[0]).text
        soup = BeautifulSoup(r, 'lxml')

        ngoname = soup.find('h1')
        print('NGO Name :', ngoname.text)

        ceo = soup.find('h2', class_='').text
        ceo_name = ceo.split(':')
        print('CeoName:', ceo_name[1])

        city = soup.find_all('span')
        print('City :', city[5].text)

        address = soup.find_all('span')
        print('Address :', address[6].text)

        phone = soup.find_all('span')
        print('Phone :', phone[7].text)

        mobile = soup.find_all('span')
        print('Mobile :', mobile[8].text)

        email = soup.find_all('span')
        print('Email_id :', email[9].text)

        csv_output.writerow([ngoname.text, ceo_name[1], city[5].text, address[6].text, phone[7].text, mobile[8].text, email[9].text])