const container = d3.select('#graph');
// Init SVG
const svgChart = container.append('svg:svg')
.attr("id", "svg-id2")
.attr('width', outerWidth)
.attr('height', outerHeight)
.attr('class', 'svg-plot')
.attr('transform', `translate(${margin.left}, ${margin.top})`);
// Init Canvas
const canvasChart = container.append('canvas')
.attr("id", "canvass-id2")
.attr('width', width)
.attr('height', height)
.style('margin-left', margin.left + 'px')
.style('margin-top', margin.top + 'px')
.attr('class', 'canvas-plot');
// Prepare buttons
const toolsList = container.select('.tools')
.style('margin-top', margin.top + 'px')
.style('visibility', 'visible');
toolsList.select('#reset').on('click', () => {
const t = d3.zoomIdentity.translate(0, 0).scale(1);
.call(zoom_function.transform, t)
var scale = 1.5;
const context = canvasChart.node().getContext('2d');
let min_x = d3.max(data, (d) => d['x'])
// Init Scales
const x = d3.scaleLinear().domain([d3.min(data, (d) => d['x']), d3.max(data, (d) => d['x'])]).range([0, width]).nice();
const y = d3.scaleLinear().domain([d3.min(data, (d) => d['y']), d3.max(data, (d) => d['y'])]).range([height, 0]).nice();
// Init Axis
const xAxis = d3.axisBottom(x);
const yAxis = d3.axisLeft(y);
// Add Axis
const gxAxis = svgChart.append('g')
.attr('transform', `translate(0, ${height})`)
const gyAxis = svgChart.append('g')
// Add labels
.attr('x', `-${height / 2}`)
.attr('dy', '-3.5em')
.attr('transform', 'rotate(-90)')
.text('Axis Y');
.attr('x', `${width / 2}`)
.attr('y', `${height + 40}`)
.text('Axis X');
// Draw plot on canvas
function draw(transform) {
const scaleX = transform.rescaleX(x);
const scaleY = transform.rescaleY(y);
context.clearRect(0, 0, width, height);
data.forEach(point => {
drawPoint(scaleX, scaleY, point, transform.k);
var zoom = d3.zoom()
.on('zoom', function () {
canvasChart.attr('transform', d3.event.transform);
// Initial draw made with no zoom
function drawPoint(scaleX, scaleY, point, k) {
context.fillStyle = pointColor;
context.strokeStyle = pointStroke;
context.lineWidth = 1;
const px = scaleX(point['x']);
const py = scaleY(point['y']);
var r = 2
context.arc(px, py, r, 0, 2 * Math.PI, true);
// Zoom/Drag handler
const zoom_function = d3.zoom().scaleExtent([1, 1000])
.on('zoom', () => {
const transform = d3.event.transform;
function changeRadius(val) {
return Math.log10(val);
function reset() {
const t = d3.zoomIdentity.translate(0, 0).scale(1);
.call(zoom_function.transform, t)
d3.selectAll('#zoomIn').on('click', function () {
scale += 0.1
const t = d3.zoomIdentity.translate(0, 0).scale(scale);
.call(zoom_function.transform, t)
d3.selectAll('#zoomOut').on('click', function () {
scale -= 0.1
d3.zoom().scaleExtent([1, 1000]);
const t = d3.zoomIdentity.translate(0, 0).scale(scale);
.call(zoom_function.transform, t)
d3.selectAll('#reset').on('click', reset);
|name| timestamp|value|
| A|1604219844| 7|
| A|1604219845| 1|
| A|1604219846| 1|
| A|1604219847| 1|
| A|1604219848| 2|
| A|1604219849| 7|
| A|1604219850| 1|
| A|1604219851| 1|
| A|1604219852| 2|
| A|1604219853| 7|
| A|1604219854| 1|
| A|1604219855| 1|
我想在上述数据框中实现一个附加列,以聚集import findspark
import pyspark # only run after findspark.init()
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
import pyspark.sql.functions as psf
spark = SparkSession.builder.getOrCreate()
data = spark.read.option("header","true").csv("sample_data.csv")
w = Window.orderBy("timestamp")
value_lag = lag('value').over(w)
timestamp_lag = lag('timestamp').over(w)
df_final = data.withColumn('prev_timestamp', timestamp_lag).withColumn('prev_value', value_lag)\
.withColumn("changed", (data.value != psf.lag('value').over(w)).cast('int'))\
.withColumn("diff",data.value - psf.lag('value').over(w))
|name| timestamp|value|prev_timestamp|prev_value|changed|diff|
| A|1604219844| 7| null| null| null|null|
| A|1604219845| 1| 1604219844| 7| 1|-6.0|
| A|1604219846| 1| 1604219845| 1| 0| 0.0|
| A|1604219847| 1| 1604219846| 1| 0| 0.0|
| A|1604219848| 2| 1604219847| 1| 1| 1.0|
| A|1604219849| 7| 1604219848| 2| 1| 5.0|
| A|1604219850| 1| 1604219849| 7| 1|-6.0|
| A|1604219851| 1| 1604219850| 1| 0| 0.0|
| A|1604219852| 2| 1604219851| 1| 1| 1.0|
| A|1604219853| 7| 1604219852| 2| 1| 5.0|
| A|1604219854| 1| 1604219853| 7| 1|-6.0|
| A|1604219855| 1| 1604219854| 1| 0| 0.0|
本专栏的想法是,它可以帮助根据键1 -> 2 -> 7 -> 1
|name| timestamp|value|prev_timestamp|prev_value|changed|diff| keyword|
| A|1604219844| 7| null| null| null|null| null|
| A|1604219845| 1| 1604219844| 7| 1| -6|Insert1-Update1|
| A|1604219846| 1| 1604219845| 1| 0| 0| null|
| A|1604219847| 1| 1604219846| 1| 0| 0| null|
| A|1604219848| 2| 1604219847| 1| 1| 1| Insert2|
| A|1604219849| 7| 1604219848| 2| 1| 5|Insert2-Update1|
| A|1604219850| 1| 1604219849| 7| 1| -6|Insert2-Update2|
| A|1604219851| 1| 1604219850| 1| 0| 0| null|
| A|1604219852| 2| 1604219851| 1| 1| 1| Insert3|
| A|1604219853| 7| 1604219852| 2| 1| 5|Insert3-Update1|
| A|1604219854| 1| 1604219853| 7| 1| -6|Insert3-Update2|
| A|1604219855| 1| 1604219854| 1| 0| 0| null|