如何在Java中创建一个接受字符串数组的Spark UDF?

时间:2019-11-25 06:39:09

标签: java apache-spark

已向here询问了Scala的问题,由于我正在使用Java API,因此对我没有帮助。我一直在扔东西,厨房洗手池也要扔,所以这就是我的方法:

import android.content.Context;
import android.content.Intent;
import android.content.pm.PackageManager;
import android.graphics.ImageFormat;
import android.hardware.camera2.CameraAccessException;
import android.hardware.camera2.CameraCaptureSession;
import android.hardware.camera2.CameraCharacteristics;
import android.hardware.camera2.CameraDevice;
import android.hardware.camera2.CameraManager;
import android.hardware.camera2.CameraMetadata;
import android.hardware.camera2.CaptureRequest;
import android.hardware.camera2.TotalCaptureResult;
import android.hardware.camera2.params.StreamConfigurationMap;
import android.media.Image;
import android.media.ImageReader;
import android.os.Build;
import android.os.Environment;
import android.os.Handler;
import android.os.HandlerThread;
import androidx.annotation.NonNull;

import android.util.Log;
import android.util.Size;
import android.widget.Toast;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.UUID;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;

import androidx.annotation.RequiresApi;
import androidx.core.content.ContextCompat;

@RequiresApi(api = Build.VERSION_CODES.LOLLIPOP)
public class CameraAdmin {

    Context context;

    private static final String TAG = "CameraAdmin";

    private String mCameraId;
    private CameraCaptureSession mCaptureSession;
    private CameraDevice mCameraDevice;

    private HandlerThread backgroundThread;
    private ImageReader imageReader;
    private String cImgType,cFileToSave;
    public boolean CaptureCompleted;
    CaptureRequest.Builder captureBuilder;

    public CameraAdmin(Context context,String CameraID) {
        this.context = context;
        this.mCameraId=CameraID;
        this.CaptureCompleted=false;
    }

    public void openCamera() {
        if (ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) != PackageManager.PERMISSION_GRANTED) {
            return;
        }
        setUpCameraOutputs();
        CameraManager manager = (CameraManager) context.getSystemService(Context.CAMERA_SERVICE);
        try {
            manager.openCamera(mCameraId, mStateCallback, null);
        } catch (Exception e) {
            e.printStackTrace();

        }
    }

    private void setUpCameraOutputs() {
        CameraManager manager = (CameraManager) context.getSystemService(Context.CAMERA_SERVICE);
        try {
            CameraCharacteristics characteristics = manager.getCameraCharacteristics(mCameraId);

            StreamConfigurationMap map = characteristics.get(CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP);

            imageReader = ImageReader.newInstance(1920, 1080, ImageFormat.JPEG, /*maxImages*/2);
            imageReader.setOnImageAvailableListener(mOnImageAvailableListener, null);

            return;
        } catch (CameraAccessException e) {
            e.printStackTrace();
        } catch (NullPointerException e) {
            e.printStackTrace();
        }
    }

    private final CameraDevice.StateCallback mStateCallback = new CameraDevice.StateCallback() {

        @Override
        public void onOpened(@NonNull CameraDevice cameraDevice) {
            mCameraDevice = cameraDevice;
            createCameraCaptureSession();
        }

        @Override
        public void onDisconnected(@NonNull CameraDevice cameraDevice) {
            cameraDevice.close();
            mCameraDevice = null;
        }

        @Override
        public void onError(@NonNull CameraDevice cameraDevice, int error) {
            cameraDevice.close();
            mCameraDevice = null;
        }

    };

    private final ImageReader.OnImageAvailableListener mOnImageAvailableListener = new ImageReader.OnImageAvailableListener() {

        @Override
        public void onImageAvailable(ImageReader reader) {
            Log.d(TAG, "ImageAvailable");
            final Image image = reader.acquireLatestImage();
            final ByteBuffer buffer = image.getPlanes()[0].getBuffer();
            final byte[] bytes = new byte[buffer.capacity()];
            buffer.get(bytes);
            saveImageToDisk(bytes);
            Intent intent = new Intent();
            intent.setAction(MainActivity.MyBroadcastReceiver.ACTION);
            intent.putExtra("dataToPass", "Image Saved!");
            context.sendBroadcast(intent);
            image.close();
        }
    };

    private void saveImageToDisk(final byte[] bytes)
    {
        final File file = new File(Environment.getExternalStorageDirectory() + "/" + cFileToSave +".jpg");
        try (final OutputStream output = new FileOutputStream(file)) {
            output.write(bytes);
            Log.d("FileSaved","FileSaved");

        } catch (final IOException e) {
            Log.e(TAG, "Exception occurred while saving picture to external storage ", e);
        }
    }
    public void closeCamera() {
        try {
            if (null != mCaptureSession) {
                mCaptureSession.close();
                mCaptureSession = null;
            }
            if (null != mCameraDevice) {
                mCameraDevice.close();
                mCameraDevice = null;
            }
            if (null != imageReader) {
                imageReader.close();
                imageReader = null;
            }
        } catch (Exception e) {
            throw new RuntimeException("Interrupted while trying to lock camera closing.", e);
        } finally {
        }
    }


    private void createCameraCaptureSession() {
        try {

            captureBuilder = mCameraDevice.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW);
            captureBuilder.set(CaptureRequest.CONTROL_AE_EXPOSURE_COMPENSATION,-3);
            captureBuilder.set(CaptureRequest.CONTROL_MODE, CameraMetadata.CONTROL_MODE_AUTO);
            captureBuilder.set(CaptureRequest.CONTROL_AF_MODE, CaptureRequest.CONTROL_AF_MODE_AUTO);
            captureBuilder.set(CaptureRequest.LENS_FOCUS_DISTANCE, 0f);
            captureBuilder.addTarget(imageReader.getSurface());

            mCameraDevice.createCaptureSession(Arrays.asList(imageReader.getSurface()),
                    new CameraCaptureSession.StateCallback() {
                        @Override
                        public void onConfigured(@NonNull CameraCaptureSession cameraCaptureSession) {
                            if (null == mCameraDevice) {
                                return;
                            }
                            mCaptureSession = cameraCaptureSession;
                        }

                        @Override
                        public void onConfigureFailed(@NonNull CameraCaptureSession cameraCaptureSession) {
                            Log.d(TAG, "Configuration Failed");
                        }
                    }, null
            );
        } catch (CameraAccessException e) {
            e.printStackTrace();
        }
    }

    public void takePicture(String ImgType,String FileToSave) {
        try {
            if (null == mCameraDevice) {
                return;
            }
            this.cImgType = ImgType;
            this.cFileToSave = FileToSave;
            CameraCaptureSession.CaptureCallback CaptureCallback = new CameraCaptureSession.CaptureCallback() {
                @Override
                public void onCaptureCompleted(@NonNull CameraCaptureSession session, @NonNull CaptureRequest request, @NonNull TotalCaptureResult result) {
                }
            };

            mCaptureSession.capture(captureBuilder.build(), CaptureCallback, null);
        } catch (CameraAccessException e) {
            e.printStackTrace();
        }
        Toast.makeText(context, file.getAbsolutePath(), Toast.LENGTH_SHORT).show();
    }
}

对于我的UDF声明:

List<String> sourceClasses = new ArrayList<String>();
//Add elements
List<String> targetClasses = new ArrayList<String>();
//Add elements

dataset = dataset.withColumn("Transformer", callUDF(
    "Transformer",
    lit((String[])sourceClasses.toArray())
        .cast(DataTypes.createArrayType(DataTypes.StringType)),
    lit((String[])targetClasses.toArray())
        .cast(DataTypes.createArrayType(DataTypes.StringType))
));

运行代码时,执行不会超过UDF调用,这是可以预期的,因为我无法匹配类型。请在这方面帮助我。

编辑

我尝试了@Oli建议的解决方案。但是,出现以下异常:

public class Transformer implements UDF2<Seq<String>, Seq<String>, String> {


//  @SuppressWarnings("deprecation")
public String call(Seq<String> sourceClasses, Seq<String> targetClasses)
    throws Exception {

此行似乎专门表明存在问题:

org.apache.spark.SparkException: Failed to execute user defined function($anonfun$261: (array<string>, array<string>) => string)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassCastException: scala.collection.mutable.WrappedArray$ofRef cannot be cast to scala.collection.immutable.Seq
at com.esrx.dqm.uuid.UUIDTransformerEngine$1.call(UUIDTransformerEngine.java:1)
at org.apache.spark.sql.UDFRegistration$$anonfun$261.apply(UDFRegistration.scala:774)
... 22 more

1 个答案:

答案 0 :(得分:3)

根据我对UDF类型的了解,您正在尝试创建一个将两个数组作为输入并返回一个字符串的UDF。

在Java中,这有点痛苦,但很容易管理。

比方说,您想连接两个数组并将它们与单词AND链接。您可以如下定义UDF:

UDF2 my_udf2 = new UDF2<WrappedArray<String>, WrappedArray<String>, String>() {
    public String call(WrappedArray<String> a1, WrappedArray a2) throws Exception {
        ArrayList<String> l1 = new ArrayList(JavaConverters
            .asJavaCollectionConverter(a1)
            .asJavaCollection());
        ArrayList<String> l2 = new ArrayList(JavaConverters
            .asJavaCollectionConverter(a2)
            .asJavaCollection());
        return l1.stream().collect(Collectors.joining(",")) +
             " AND " +
             l2.stream().collect(Collectors.joining(","));
    }
};

请注意,您需要在方法的签名中使用scala WrappedArray,并使用JavaConverters在方法的主体中对其进行转换,以便能够在Java中对其进行操作。这是必需的导入文件,以防万一。

import scala.collection.mutable.WrappedArray;
import scala.collection.JavaConverters;

然后您可以注册UDF并将其与Spark一起使用。为了能够使用它,我从“ id”列创建了一个示例数据框和两个虚拟数组。请注意,它也可以与您在问题中尝试使用的lit函数一起使用。

spark.udf().register("my_udf2", my_udf2, DataTypes.StringType);

String[] data = {"abcd", "efgh", "ijkl"};

spark.range(3)
    .withColumn("id", col("id").cast("string"))
    .withColumn("array", functions.array(col("id"), col("id")))
    .withColumn("string_of_arrays",
          functions.callUDF("my_udf2", col("array"), lit(data)))
    .show(false);

产生:

+---+------+----------------------+
|id |array |string_of_arrays      |
+---+------+----------------------+
|0  |[0, 0]|0,0 AND abcd,efgh,ijkl|
|1  |[1, 1]|1,1 AND abcd,efgh,ijkl|
|2  |[2, 2]|2,2 AND abcd,efgh,ijkl|
+---+------+----------------------+

在Spark> = 2.3中,您也可以这样做:

UserDefinedFunction my_udf2 = udf(
    (WrappedArray<String> s1, WrappedArray<String> s2) -> "some_string",
    DataTypes.StringType
);

df.select(my_udf2.apply(col("a1"), col("a2")).show(false);