我想计算视频的观看次数。我有以下结构。
package com.test
import java.util
import java.util.concurrent.ExecutorService
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Put, Table}
import org.apache.hadoop.hbase.security.User
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.spark.sql.ForeachWriter
import scala.collection.mutable
trait HBaseBulkForeachWriter[RECORD] extends ForeachWriter[RECORD] {
val tableName: String
val hbaseConfResources: mutable.Seq[String] = mutable.Seq("location for core-site.xml", "location for hbase-site.xml")
def pool: Option[ExecutorService] = None
def user: Option[User] = None
private var hTable: Table = _
private var connection: Connection = _
override def open(partitionId: Long, version: Long): Boolean = {
connection = createConnection()
hTable = getHTable(connection)
true
}
def createConnection(): Connection = {
val hbaseConfig = HBaseConfiguration.create()
hbaseConfResources.foreach(hbaseConfig.addResource)
ConnectionFactory.createConnection(hbaseConfig, pool.orNull, user.orNull)
}
def getHTable(connection: Connection): Table = {
connection.getTable(TableName.valueOf(tableName))
}
override def process(record: RECORD): Unit = {
val put = bulkPut
hTable.put(put)
}
override def close(errorOrNull: Throwable): Unit = {
hTable.close()
connection.close()
}
def bulkPut: util.ArrayList[Put]
}
我只需要计算两个数组({
_id: ObjectId("some_id")
views:[
{ userId:1, groupId:[1,2,4] },
{ userId:2, groupId:[2,4] },
{ userId:1, groupId:[3,5] }
]
}
和输入数组)是否具有公共元素,请将计数增加一。
如果输入数组为views.groupId
,则视图数为2。(第一个对象的元素为1,第三个对象的元素为3,5)
我编写了以下代码,但给出了错误的答案。
[1,3,5]
我花了更多时间自己解决问题,但今天没有运气。预先感谢
答案 0 :(得分:1)
使用$setIntersection
和$size
的方法是正确的,但是由于您要将数组转换为单个值,因此最好使用$reduce。另外,使用$min可以确保每次迭代中添加的值分别为1
或0
(最多1
):
db.collection.aggregate([
{
$project: {
views: {
$reduce: {
input: "$views",
initialValue: 0,
in: {
$add: [
"$$value",
{
$min: [ 1, { $size: { $setIntersection: [ "$$this.groupId", [ 1,3,5 ] ] } } ]
}
]
}
}
}
}
}
])
答案 1 :(得分:1)
您需要为$filter
运算符定义有效条件。
对于您而言,如果您在Number
参数中返回cond
值,它将用this convention将其转换为Boolean
:
db.collection.aggregate([
{
$project: {
views: {
$size: {
$ifNull: [
{
$filter: {
input: "$views",
as: "view",
cond: {
$size: {
$setIntersection: [
"$$view.groupId",
[ 1, 3, 5]
]
}
}
}
},
[]
]
}
}
}
}
])