我有一个pyspark数据框,其中一列填充了列表,包含条目或只是空列表。我想有效地过滤掉包含空列表的所有行。
package edu.arnab.simpledialogmenu;
import android.app.AlertDialog;
import android.content.DialogInterface;
import android.content.DialogInterface.OnClickListener;
import android.graphics.Color;
import android.os.Bundle;
import android.support.v7.app.ActionBarActivity;
import android.view.Menu;
import android.view.MenuItem;
import android.view.View;
import android.widget.EditText;
import android.widget.RelativeLayout;
import android.widget.TextView;
import android.widget.Toast;
public class MainActivity extends ActionBarActivity implements OnClickListener {
RelativeLayout layout;
TextView tvTitle, tvCaption, tvStudio;
EditText gTitle, gCaption;
String title, caption;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
layout = (RelativeLayout) findViewById(R.id.relative1);
tvTitle = (TextView) findViewById(R.id.textView1);
tvCaption = (TextView) findViewById(R.id.textView2);
tvStudio = (TextView) findViewById(R.id.textView3);
registerForContextMenu(layout);
registerForContextMenu(tvTitle);
}
@Override
public void onCreateContextMenu(android.view.ContextMenu menu, android.view.View v, android.view.ContextMenu.ContextMenuInfo menuInfo)
{
if(v == layout)
{
menu.add(1, 1, 0, "Make Background Yellow");
menu.add(1, 2, 0, "Make Background Cyan");
}
else if(v == tvTitle)
{
menu.removeGroup(1);
menu.add(2, 3, 0, "Make Title COD");
menu.add(2, 4, 0, "Make Title NFS");
}
super.onCreateContextMenu(menu, v, menuInfo);
};
@Override
public boolean onContextItemSelected(MenuItem item) {
// TODO Auto-generated method stub
switch(item.getItemId())
{
case 1:
layout.setBackgroundColor(Color.YELLOW);
break;
case 2:
layout.setBackgroundColor(Color.CYAN);
break;
case 3:
tvTitle.setText("Title: Call of Duty");
break;
case 4:
tvTitle.setText("Title: Need for Speed");
break;
}
return super.onContextItemSelected(item);
}
public boolean onCreateOptionsMenu(Menu menu) {
menu.add(1, 1, 1, "Game Entry Dialog");
menu.add(1, 2, 1, "Change Background");
menu.add(1, 3, 1, "Exit App");
return true;
}
@Override
public boolean onOptionsItemSelected(MenuItem item) {
String text = null;
switch(item.getItemId())
{
case 1:
text = item.getTitle().toString();
// Show Game Dialog
AlertDialog.Builder ab = new AlertDialog.Builder(this);
ab.setTitle("New Game Entry Dialog");
View view = getLayoutInflater().inflate(R.layout.dlg_layout, null);
ab.setView(view);
gTitle = (EditText) view.findViewById(R.id.editTitle);
gCaption = (EditText) view.findViewById(R.id.editCaption);
title = gTitle.getText().toString();
caption = gCaption.getText().toString();
ab.setPositiveButton("OKAY IT", this);
ab.setNegativeButton("CANCEL IT", this);
AlertDialog ad = ab.create();
ad.show();
break;
case 2:
text = item.getTitle().toString();
layout.setBackgroundColor(Color.GREEN);
tvTitle.setBackgroundColor(Color.WHITE);
tvCaption.setBackgroundColor(Color.LTGRAY);
break;
case 3:
text = item.getTitle().toString();
finish();
break;
}
Toast.makeText(this, "You have selected menu item " + text, 3000).show();
return super.onOptionsItemSelected(item);
}
@Override
public void onClick(DialogInterface arg0, int arg1) {
// TODO Auto-generated method stub
switch(arg1)
{
case DialogInterface.BUTTON_POSITIVE:
//take text from dialog fields and show all info on MainActivity
tvTitle.setText(title);
tvCaption.setText(caption);
break;
case DialogInterface.BUTTON_NEGATIVE:
Toast.makeText(this, "You cancelled dialog entry", 300).show();
break;
}
}
}
返回以下错误:
import pyspark.sql.functions as sf
df.filter(sf.col('column_with_lists') != [])
也许我可以查看列表的长度并强加它应该是> 0(见here)。但是,如果我使用pyspark-sql并且Py4JJavaError: An error occurred while calling o303.notEqual.
: java.lang.RuntimeException: Unsupported literal type class
甚至允许lambda,我不确定这种语法是如何工作的。
也许要说清楚,我有多个列,但想要在一个列上应用上面的过滤器,删除所有条目。链接的SO示例在单个列上进行过滤。
提前致谢!
答案 0 :(得分:5)
所以它看起来就像使用sql.functions
中的尺寸函数一样简单:
import pyspark.sql.functions as sf
df.filter(sf.size('column_with_lists') > 0)
答案 1 :(得分:0)
在过滤的函数中绝对允许使用Lambda函数,甚至可以在PySpark中考虑普通函数!使用lambda函数,从数据中筛选出空列表的一种方法是:
df_filtered = df.filter(lambda x: size(x['column_with_lists']) > 0.0)
这将收集“column_with_lists”列中列表大小大于0的所有行。
您链接的示例执行相同的任务,但是在整个数据框而不是特定列上。