Question

我想编写一个无需复制代码即可接受RDD和Seq的方法。

def myMethod[F[_]](input: F[InputClass]): F[OutputClass] = {
    // do something here like
    input.map{ i => 
       // transformed input OutputClass
    }
}

F可以是Seq或RDD，因为它们都实现了方法map。

对于更独特的方法，例如count或cache，我可以使Seq对cache不执行任何操作，而对length使用count吗？

Answer 1

您想要的是Type Class。如果只需要map和flatMap方法，建议您使用 Monad （也许是Cats一个）提供RDD的实现。

现在，如果需要更多方法，可以实现自己的类型类。

import scala.language.higherKinds

trait DataCollection[F[_]] {
  def map[A, B](col: F[A])(f: A => B): F[B]
  def cache[A](col: F[A]): F[A]
  def count[A](col: F[A]): Long
}

object DataCollection { 
  implicit val RddDataCollection: DataCollection[RDD] = new DataCollection[RDD] {
    override def map[A, B](rdd: RDD[A])(f: A => B): RDD[B] = rdd.map(f)
    override def cache[A](rdd: RDD[A]): RDD[A] = rdd.cache()
    override def count[A](rdd: RDD[A]): Long = rdd.count()
  }

  implicit val SeqDataCollection: DataCollection[Seq] = new DataCollection[Seq] {
    override def map[A, B](seq: Seq[A])(f: A => B): Seq[B] = seq.map(f)
    override def cache[A](seq: Seq[A]): Seq[A] = seq
    override def count[A](seq: Seq[A]): Long = seq.length
  }

  implicit class Ops[F[_], A](val col: F[A]) extends AnyVal {
    @inline
    def map[B](f: A => B)(implicit DC: DataCollection[F]): F[B] = DC.map(col)(f)

    @inline
    def cache()(implicit DC: DataCollection[F]): F[A] = DC.cache(col)

    @inline
    def count()(implicit DC: DataCollection[F]): Long = DC.count(col)
  }
}

def myGenericMethod[F[_]: DataCollection, T](col: F[T]): Long = {
  import DataCollection.Ops
  col.map(x => x).cache().count()
}

适用于RDD和Seq

1 个答案: