我的代码很简单:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/skbuff.h>
#include <linux/inet.h>
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
.symbol_name = "ip_rcv",
};
/* kprobe pre_handler: called just before the probed instruction is executed */
static int handler_pre(struct kprobe *p, struct pt_regs *regs)
{
struct sk_buff * skb = (struct sk_buff *)(regs->di);
u16 dst_port;
dst_port = ntohs(*((u16*)(skb->head + skb->transport_header + 2)));
if(dst_port == 50000){ //50000 is the TCP port
printk(KERN_INFO "post handler addr 0x%p skb is 0x%d\n",p->addr, regs->di);
// modify one byte to make TCP checksum wrong and drop the pkt.
*((u8*)(skb->head + skb->network_header +7))=0xab;
}
return 0;
}
/* kprobe post_handler: called after the probed instruction is executed */
static void handler_post(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
{
//printk(KERN_INFO "post handler addr 0x%p skb is 0x%d\n",p->addr, regs->di);
}
/*
* fault_handler: this is called if an exception is generated for any
* instruction within the pre- or post-handler, or when Kprobes
* single-steps the probed instruction.
*/
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn",
p->addr, trapnr);
/* Return 0 because we don't handle the fault. */
return 0;
}
static int __init kprobe_init(void)
{
int ret;
kp.pre_handler = handler_pre;
kp.post_handler = handler_post;
kp.fault_handler = handler_fault;
ret = register_kprobe(&kp);
if (ret < 0) {
printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
return ret;
}
printk(KERN_INFO "Planted kprobe at %p\n", kp.addr);
return 0;
}
static void __exit kprobe_exit(void)
{
unregister_kprobe(&kp);
printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr);
}
module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");
我认为 test("Dataset as method") {
val spark = SparkSession.builder().master("local").appName("Dataset as method").getOrCreate()
import spark.implicits._
//xyz is an alias of ds1
val ds1 = Seq("1", "2").toDS().as("xyz")
//xyz can be used to refer to the value column
ds1.select($"xyz.value").show(truncate = false)
//ERROR here, no table or view named xyz
spark.sql("select * from xyz").show(truncate = false)
}
就像一个表名,但是sql xyz
会引发错误,抱怨select * from xyz
不存在。
所以,我想问一下,xyz
方法到底意味着什么?以及如何使用别名,例如我的案例中的as
答案 0 :(得分:2)
.as()
与dataset
一起使用(如您的情况)是为alias
创建dataset
的功能,如您在api doc
中所见
/**
* Returns a new Dataset with an alias set.
*
* @group typedrel
* @since 1.6.0
*/
def as(alias: String): Dataset[T] = withTypedPlan {
SubqueryAlias(alias, logicalPlan)
}
只能在函数apis中使用,例如select
,join
,filter
等。但别名不能用于 sql查询
更明显如果您创建两列数据集并使用别名
val ds1 = Seq(("1", "2"),("3", "4")).toDS().as("xyz")
现在,您可以使用select
仅使用别名选择一列作为
ds1.select($"xyz._1").show(truncate = false)
应该给你
+---+
|_1 |
+---+
|1 |
|3 |
+---+
使用as
两个具有相同列名的数据集时,join
别名的使用更为明显,您可以使用别名编写加入条件。
但使用别名在sql查询中使用你必须注册表
ds1.registerTempTable("xyz")
spark.sql("select * from xyz").show(truncate = false)
应该能给你正确的结果
+---+---+
|_1 |_2 |
+---+---+
|1 |2 |
|3 |4 |
+---+---+
或者甚至以更好的方式做到这一点
ds1.createOrReplaceTempView("xyz")