我是Scala的新开发人员,遇到一些问题,无法在Spark Scala上编写简单的代码。读取实木复合地板文件后,我得到了这个DF:
Private Sub ChartBtn_Click(sender As Object, e As EventArgs) Handles ChartBtn.Click
'Average Repeatability at 5K Test Point
Dim A5K1 As Integer = CInt(T5K1.Text)
Dim A5K2 As Integer = CInt(T5K2.Text)
Dim A5K3 As Integer = CInt(T5K3.Text)
Dim average = (A5K1 + A5K2 + A5K3) / 3
'Chart Setup
With Testpoint_Chart.ChartAreas(0)
.AxisX.Title = "Pressure (Psi)"
.AxisX.Minimum = 1000
.AxisX.Maximum = 10000
.AxisY.Interval = 1000
.AxisY.Title = "Test Points (ft.lb)"
If T10K.Text > Math.Ceiling(tst_MaxOutput.Text / 1000) * 1000 Then
.AxisY.Maximum = Math.Ceiling(T10K.Text / 1000) * 1000
Else
.AxisY.Maximum = Math.Ceiling(tst_MaxOutput.Text / 1000) * 1000
End If
End With
Testpoint_Chart.Series.Clear()
Testpoint_Chart.Series.Add("Test Data")
Testpoint_Chart.Series.Add("Max Torque")
'Max Torque
With Testpoint_Chart.Series("Max Torque")
.IsVisibleInLegend = True
.ChartType = SeriesChartType.Line
.IsValueShownAsLabel = False
.Color = Color.Red
Dim xmaxvals() As Integer = ({1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000})
Dim ymaxvals() As Integer = {tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text}
.Points.DataBindXY(xmaxvals, ymaxvals)
End With
'Test Points
With Testpoint_Chart.Series("Test Data")
.IsVisibleInLegend = True
.ChartType = SeriesChartType.Line
.IsValueShownAsLabel = True
Dim xvals() As Integer = ({1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000})
Dim yvals() As Integer = {T1K.Text, T2K.Text, T3K.Text, T4K.Text, average, T6K.Text, T7K.Text, T8K.Text, T9K.Text, T10K.Text}
.Points.DataBindXY(xvals, yvals)
End With
End Sub
我想从第一个DF创建一个DF结果(例如,如果ID = 2,则时间戳应乘以2)。因此,我创建了一个新类:
ID Timestamp
1 0
1 10
1 11
2 20
3 15
这是我的代码:
case class OutputData(id: bigint, timestamp:bigint)
能帮我吗?
答案 0 :(得分:0)
为了简化实施,您可以使用df
来投射case class
,该过程是Dataset
使用对象符号,而不是每次访问row
您需要某个元素的值。除此之外,根据您的输入和输出将采用相同的格式,您可以使用相同的案例类而不是定义2。
代码如下:
// Sample intput data
val df = Seq(
(1, 0L),
(1, 10L),
(1, 11L),
(2, 20L),
(3, 15L)
).toDF("ID", "Timestamp")
df.show()
// Case class as helper
case class OutputData(ID: Integer, Timestamp: Long)
val newDF = df.as[OutputData].map(record=>{
val newTime = if(record.ID == 2) record.Timestamp*2 else record.Timestamp // identify your id and apply logic based on that
OutputData(record.ID, newTime)// return same format with updated values
})
newDF.show()
以上代码的输出:
// original
+---+---------+
| ID|Timestamp|
+---+---------+
| 1| 0|
| 1| 10|
| 1| 11|
| 2| 20|
| 3| 15|
+---+---------+
// new one
+---+---------+
| ID|Timestamp|
+---+---------+
| 1| 0|
| 1| 10|
| 1| 11|
| 2| 40|
| 3| 15|
+---+---------+