我有三个#include<iostream>
#include<map>
#include <cmath>
#include <cstdio>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
int main() {
int n = 2;
vector <vector<int>*> vec1;
for (int i = 0; i < n; i++)
{
vector<int> *vec = new vector<int>;
vec->clear();
cout << "Enter number of var : ";
int var;
cin >> var;
for (int i = 0; i < var; i++)
{
cout << "Enter number : ";
int num ;
cin >> num;
vec->push_back(num);
}
vec1.push_back(vec);
cout << "Container ends \n";
}
cout << "Enter i : ";
int i;
cin >> i;
cout << "Enter j : ";
int j;
cin >> j;
auto newvec = vec1[i];
cout << newvec->at(j) << endl;;
system("pause");
return 0;
}
,第一个是基本DF
,第二个是行为df
,第三个是规则df
df
我想计算每个人的奖励积分,并在奖励名称中添加基本df列,并检查行为df中的条件。如果a和b为true,则将分配100点;如果OR为b,则为b。正确,因此将分配200分,否则将在规则表中a或b条件的地方分配0分
Base df:
+---+----+------+
| ID|Name|Salary|
+---+----+------+
| 1| A| 100|
| 2| B| 200|
| 3| C| 300|
| 4| D| 1000|
| 5| E| 500|
+---+----+------+
Behavior DF:
+----+---------+------+
|S.NO|Operation|Points|
+----+---------+------+
| 1| a AND b| 100|
| 2| a OR b| 200|
| 3|otherwise| 0|
+----+---------+------+
Rule DF:
+----+-----+------+------------+-----+
|RULE|Table| col| operation|value|
+----+-----+------+------------+-----+
| a| Base|Salary| equal| 1000|
| b| Base|Salary|Greater Than| 500|
+----+-----+------+------------+-----+
答案 0 :(得分:0)
您可以采用这种方法-
我必须在Rule
和Behavior
数据帧中进行一些更改。将操作存储为逻辑(“ ==”)而不是字符串(“ equal”)。
Base = spark.createDataFrame([(1,'A',100),(2,'B',200),(3,'C',300),(4,'D',1000),(5,'E',500)],['ID','Name','Salary'])
Behavior = spark.createDataFrame([(1,'df.rule_a & df.rule_b',100),(2,'df.rule_a | df.rule_b',200),(3,'otherwise',0)],['SNo','Operation','Points'])
Rule = spark.createDataFrame([(1,'Base','Salary','==',1000),(2,'Base','Salary','>',500)],['RULE','Table','col','operation','value'])
Base.show()
#+---+----+------+
#| ID|Name|Salary|
#+---+----+------+
#| 1| A| 100|
#| 2| B| 200|
#| 3| C| 300|
#| 4| D| 1000|
#| 5| E| 500|
#+---+----+------+
Behavior.show()
#+---+---------------------+------+
#|SNo| Operation |Points|
#+---+---------------------+------+
#| 1|df.rule_a & df.rule_b| 100|
#| 2|df.rule_a | df.rule_b| 200|
#| 3| otherwise | 0|
#+---+---------------------+------+
Rule.show()
#+----+-----+------+---------+-----+
#|RULE|Table| col|operation|value|
#+----+-----+------+---------+-----+
#| 1| Base|Salary| ==| 1000|
#| 2| Base|Salary| >| 500|
#+----+-----+------+---------+-----+
为存储在Rules
数据框中的规则准备逻辑
要动态地准备规则,可以在for loop
数据帧上运行Rule
,并传递迭代编号以过滤转换和Rule变量。
from pyspark.sql.functions import col,concat,lit
rule_a = Rule.filter("RULE == 1").select(concat(col("Table"), lit("['"), col("col"), lit("']"), lit(" "), col("Operation"), col("Value"))).collect()[0][0]
rule_b = Rule.filter("RULE == 2").select(concat(col("Table"), lit("['"), col("col"), lit("']"), lit(" "), col("Operation"), col("Value"))).collect()[0][0]
将规则执行的布尔结果添加到数据框
df = Base.withColumn("rule_a", eval(rule_a)).withColumn("rule_b", eval(rule_b))
df.show()
#+---+----+------+------+------+
#| ID|Name|Salary|rule_a|rule_b|
#+---+----+------+------+------+
#| 1| A| 100| false| false|
#| 2| B| 200| false| false|
#| 3| C| 300| false| false|
#| 4| D| 1000| true| true|
#| 5| E| 500| false| false|
#+---+----+------+------+------+
将行为和从Behavior
数据帧到变量的对应点存储
要动态准备变量,可以在Behavior数据帧上运行for loop
,并将迭代次数作为变量传递,以过滤转换和列名。
behavior1 = Behavior.filter("SNo==1").select( col("Operation")).collect()[0][0]
behavior1_points = Behavior.filter("SNo==1").select( col("Points")).collect()[0][0]
behavior2 = Behavior.filter("SNo==2").select( col("Operation")).collect()[0][0]
behavior2_points = Behavior.filter("SNo==2").select( col("Points")).collect()[0][0]
behavior3 = Behavior.filter("SNo==3").select( col("Operation")).collect()[0][0]
behavior3_points = Behavior.filter("SNo==3").select( col("Points")).collect()[0][0]
最终解决方案
from pyspark.sql.functions import lit,when,col,greatest
df\
.withColumn("b1", eval(behavior1))\
.withColumn("b2", eval(behavior2))\
.select('*'
,greatest(when(col('b1') == 'true',lit(behavior1_points)).otherwise(0)
,when(col('b2') == 'true',lit(behavior2_points)).otherwise(0)
,lit(behavior3_points)).alias('point')).drop('rule_a','rule_b','b1','b2').show()
#+---+----+------+-----+
#| ID|Name|Salary|point|
#+---+----+------+-----+
#| 1| A| 100| 0|
#| 2| B| 200| 0|
#| 3| C| 300| 0|
#| 4| D| 1000| 200|
#| 5| E| 500| 0|
#+---+----+------+-----+