如何使用BS4检测页面上没有表数据

时间:2016-03-18 21:08:20

标签: selenium pandas beautifulsoup html-table bs4

我无法使用BS4解析此HTML表格。有时页面没有付款数据,并会说“没有待处理的清单付款”。其他时候,该页面将列出所有待处理的到期付款。我想把这个数据输出到一个数组中。

package com.example.android.sunshine.app;

import android.os.Bundle;
import android.support.v4.app.Fragment;
import android.support.v7.app.ActionBarActivity;
import android.util.Log;
import android.view.LayoutInflater;
import android.view.Menu;
import android.view.MenuItem;
import android.view.View;
import android.view.ViewGroup;
import android.widget.ArrayAdapter;
import android.widget.ListView;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;


public class MainActivity extends ActionBarActivity {

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);
    if (savedInstanceState == null) {
        getSupportFragmentManager().beginTransaction()
                .add(R.id.container, new PlaceholderFragment())
                .commit();
    }
}

@Override
public boolean onCreateOptionsMenu(Menu menu) {
    // Inflate the menu; this adds items to the action bar if it is present.
    getMenuInflater().inflate(R.menu.main, menu);
    return true;
}

@Override
public boolean onOptionsItemSelected(MenuItem item) {
    // Handle action bar item clicks here. The action bar will
    // automatically handle clicks on the Home/Up button, so long
    // as you specify a parent activity in AndroidManifest.xml.
    int id = item.getItemId();

    //noinspection SimplifiableIfStatement
    if (id == R.id.action_settings) {
        return true;
    }

    return super.onOptionsItemSelected(item);
}

/**
 * A placeholder fragment containing a simple view.
 */
public static class PlaceholderFragment extends Fragment {

    private ArrayAdapter<String> mForecastAdapter;

    public PlaceholderFragment() {
    }

    @Override
    public View onCreateView(LayoutInflater inflater, ViewGroup container,
                             Bundle savedInstanceState) {
        View rootView = inflater.inflate(R.layout.fragment_main, container, false);

        String[] forecastArray = {
                "Today - Sunny - 66/73",
                "Tomorrow - Sunny - 65/71",
                "Sunday - Rainy - 59/66",
                "Monday - Cloudy - 59/65",
                "Tuesday - Cloudy - 60/66",
                "Wednesday - Sunny - 61/68",
                "Thursday - Sunny - 62/70"
        };

        List<String> weekForecast = new ArrayList<>(Arrays.asList(forecastArray));

        mForecastAdapter = new ArrayAdapter<>(
                // The current context (this fragment's parent)
                getActivity(),
                // ID of list item layout
                R.layout.list_item_forecast,
                // ID of the textView to populate
                R.id.list_item_forecast_textview,
                // Forecast data
                weekForecast);

        ListView listView = (ListView) rootView.findViewById(
                R.id.listview_forecast);
        listView.setAdapter(mForecastAdapter);

        return rootView;

        // These two need to be declared outside the try/catch
        // so that they can be closed in the finally block.
        HttpURLConnection urlConnection = null;
        BufferedReader reader = null;

        // Will contain the raw JSON response as a string.
        String forecastJsonStr = null;

        try {
            // Construct the URL for the OpenWeatherMap query
            // Possible parameters are avaiable at OWM's forecast API page, at
            // http://openweathermap.org/API#forecast
            URL url = new URL("http://api.openweathermap.org/data/2.5/forecast/daily?q=Uniontown&mode=xml&units=metric&cnt=7&appid=");

            // Create the request to OpenWeatherMap, and open the connection
            urlConnection = (HttpURLConnection) url.openConnection();
            urlConnection.setRequestMethod("GET");
            urlConnection.connect();

            // Read the input stream into a String
            InputStream inputStream = urlConnection.getInputStream();
            StringBuffer buffer = new StringBuffer();
            if (inputStream == null) {
                // Nothing to do.
                return null;
            }
            reader = new BufferedReader(new InputStreamReader(inputStream));

            String line;
            while ((line = reader.readLine()) != null) {
                // Since it's JSON, adding a newline isn't necessary (it won't affect parsing)
                // But it does make debugging a *lot* easier if you print out the completed
                // buffer for debugging.
                buffer.append(line + "\n");
            }

            if (buffer.length() == 0) {
                // Stream was empty.  No point in parsing.
                return null;
            }
            forecastJsonStr = buffer.toString();
        } catch (IOException e) {
            Log.e("PlaceholderFragment", "Error ", e);
            // If the code didn't successfully get the weather data, there's no point in attemping
            // to parse it.
            return null;
        } finally{
            if (urlConnection != null) {
                urlConnection.disconnect();
            }
            if (reader != null) {
                try {
                    reader.close();
                } catch (final IOException e) {
                    Log.e("PlaceholderFragment", "Error closing stream", e);
                }
            }
        }

        return rootView;
    }
  }
}

enter image description here

enter image description here

在大多数情况下,解决了。我做了这样的事情:

def find_payment(html):
    soup = BeautifulSoup(html)
    table = soup.find('table', cellspacing="0", cellpadding="2", border="0")
    table_body = table.find('tbody')
    rows = table.body.find_all('tr')
    payment_data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        account_data.append([ele for ele in cols if ele])
    return payment_data

2 个答案:

答案 0 :(得分:1)

为什么不只是用“成功”或“ body10 来查找”td“?

 def find_payments(html):    
        soup = BeautifulSoup(html)
        if soup.find("td", {"class":"success"}):
            payments = "There is no pending manifest payment"
        else:
            payments = [pmnt.text for pmnt in soup.findAll("td", {"class":"body10"})]

答案 1 :(得分:0)

一种选择是稍微防御(有点LBYL样式)并搜索&#34;没有待处理的清单付款&#34;元素事先:

if soup.find(text="There is no pending manifest payment") is not None:
    print("No payment data")