Pyspark使用.filter()过滤掉空列表

时间:2017-02-24 11:54:02

标签: python-2.7 filter pyspark pyspark-sql

我有一个pyspark数据框,其中一列填充了列表,包含条目或只是空列表。我想有效地过滤掉包含空列表的所有行。

package edu.arnab.simpledialogmenu;

import android.app.AlertDialog;
import android.content.DialogInterface;
import android.content.DialogInterface.OnClickListener;
import android.graphics.Color;
import android.os.Bundle;
import android.support.v7.app.ActionBarActivity;
import android.view.Menu;
import android.view.MenuItem;
import android.view.View;
import android.widget.EditText;
import android.widget.RelativeLayout;
import android.widget.TextView;
import android.widget.Toast;


public class MainActivity extends ActionBarActivity implements OnClickListener {

    RelativeLayout layout;
    TextView tvTitle, tvCaption, tvStudio;
    EditText gTitle, gCaption;
    String title, caption;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        layout = (RelativeLayout) findViewById(R.id.relative1);

        tvTitle = (TextView) findViewById(R.id.textView1);
        tvCaption = (TextView) findViewById(R.id.textView2);
        tvStudio = (TextView) findViewById(R.id.textView3);

        registerForContextMenu(layout);
        registerForContextMenu(tvTitle);

    }

    @Override

    public void onCreateContextMenu(android.view.ContextMenu menu, android.view.View v, android.view.ContextMenu.ContextMenuInfo menuInfo) 
    {
        if(v == layout)
        {
            menu.add(1, 1, 0, "Make Background Yellow");
            menu.add(1, 2, 0, "Make Background Cyan");
        }
        else if(v == tvTitle)
        {

            menu.removeGroup(1);
            menu.add(2, 3, 0, "Make Title COD");
            menu.add(2, 4, 0, "Make Title NFS");
        }

        super.onCreateContextMenu(menu, v, menuInfo);
    };

    @Override
    public boolean onContextItemSelected(MenuItem item) {
        // TODO Auto-generated method stub

        switch(item.getItemId())
        {
            case 1:
                layout.setBackgroundColor(Color.YELLOW);
                break;
            case 2:
                layout.setBackgroundColor(Color.CYAN);
                break;
            case 3:
                tvTitle.setText("Title: Call of Duty");
                break;
            case 4:
                tvTitle.setText("Title: Need for Speed");
                break;
        }
        return super.onContextItemSelected(item);
    }

    public boolean onCreateOptionsMenu(Menu menu) {

        menu.add(1, 1, 1, "Game Entry Dialog");
        menu.add(1, 2, 1, "Change Background");
        menu.add(1, 3, 1, "Exit App");
        return true;
    }

    @Override
    public boolean onOptionsItemSelected(MenuItem item) {
        String text =  null;
        switch(item.getItemId())
        {
            case 1:
                text = item.getTitle().toString();
                // Show Game Dialog
                AlertDialog.Builder ab = new AlertDialog.Builder(this);
                ab.setTitle("New Game Entry Dialog");

                View view = getLayoutInflater().inflate(R.layout.dlg_layout, null);
                ab.setView(view);

                gTitle = (EditText) view.findViewById(R.id.editTitle);
                gCaption = (EditText) view.findViewById(R.id.editCaption);
                title = gTitle.getText().toString();
                caption = gCaption.getText().toString();



                ab.setPositiveButton("OKAY IT", this);
                ab.setNegativeButton("CANCEL IT", this);

                AlertDialog ad = ab.create();
                ad.show();

                break;
            case 2:
                text = item.getTitle().toString();

                layout.setBackgroundColor(Color.GREEN);
                tvTitle.setBackgroundColor(Color.WHITE);
                tvCaption.setBackgroundColor(Color.LTGRAY);
                break;
            case 3:
                text = item.getTitle().toString();

                finish();
                break;
        }

        Toast.makeText(this, "You have selected menu item " + text, 3000).show();
        return super.onOptionsItemSelected(item);
    }

    @Override
    public void onClick(DialogInterface arg0, int arg1) {
        // TODO Auto-generated method stub

        switch(arg1)
        {
            case DialogInterface.BUTTON_POSITIVE:
                //take text from dialog fields and show all info on MainActivity
                tvTitle.setText(title);
                tvCaption.setText(caption);
                break;
            case DialogInterface.BUTTON_NEGATIVE:
                Toast.makeText(this, "You cancelled dialog entry", 300).show();
                break;
        }

    }
}

返回以下错误:

import pyspark.sql.functions as sf
df.filter(sf.col('column_with_lists') != []) 

也许我可以查看列表的长度并强加它应该是> 0(见here)。但是,如果我使用pyspark-sql并且Py4JJavaError: An error occurred while calling o303.notEqual. : java.lang.RuntimeException: Unsupported literal type class 甚至允许lambda,我不确定这种语法是如何工作的。

也许要说清楚,我有多个列,但想要在一个列上应用上面的过滤器,删除所有条目。链接的SO示例在单个列上进行过滤。

提前致谢!

2 个答案:

答案 0 :(得分:5)

所以它看起来就像使用sql.functions中的尺寸函数一样简单:

import pyspark.sql.functions as sf
df.filter(sf.size('column_with_lists') > 0)

答案 1 :(得分:0)

在过滤的函数中绝对允许使用Lambda函数,甚至可以在PySpark中考虑普通函数!使用lambda函数,从数据中筛选出空列表的一种方法是:

df_filtered = df.filter(lambda x: size(x['column_with_lists']) > 0.0)

这将收集“column_with_lists”列中列表大小大于0的所有行。

您链接的示例执行相同的任务,但是在整个数据框而不是特定列上。