读取实木复合地板文件后创建一个简单的DF

时间:2019-11-21 21:46:33

标签: scala apache-spark

我是Scala的新开发人员,遇到一些问题,无法在Spark Scala上编写简单的代码。读取实木复合地板文件后,我得到了这个DF:

Private Sub ChartBtn_Click(sender As Object, e As EventArgs) Handles ChartBtn.Click

    'Average Repeatability at 5K Test Point
    Dim A5K1 As Integer = CInt(T5K1.Text)
    Dim A5K2 As Integer = CInt(T5K2.Text)
    Dim A5K3 As Integer = CInt(T5K3.Text)
    Dim average = (A5K1 + A5K2 + A5K3) / 3

    'Chart Setup
    With Testpoint_Chart.ChartAreas(0)
        .AxisX.Title = "Pressure (Psi)"
        .AxisX.Minimum = 1000
        .AxisX.Maximum = 10000

        .AxisY.Interval = 1000
        .AxisY.Title = "Test Points (ft.lb)"

        If T10K.Text > Math.Ceiling(tst_MaxOutput.Text / 1000) * 1000 Then
            .AxisY.Maximum = Math.Ceiling(T10K.Text / 1000) * 1000
        Else
            .AxisY.Maximum = Math.Ceiling(tst_MaxOutput.Text / 1000) * 1000
        End If

    End With

    Testpoint_Chart.Series.Clear()
    Testpoint_Chart.Series.Add("Test Data")
    Testpoint_Chart.Series.Add("Max Torque")

    'Max Torque
    With Testpoint_Chart.Series("Max Torque")
        .IsVisibleInLegend = True
        .ChartType = SeriesChartType.Line
        .IsValueShownAsLabel = False
        .Color = Color.Red

        Dim xmaxvals() As Integer = ({1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000})
        Dim ymaxvals() As Integer = {tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text, tst_MaxOutput.Text}

        .Points.DataBindXY(xmaxvals, ymaxvals)
    End With

    'Test Points
    With Testpoint_Chart.Series("Test Data")
        .IsVisibleInLegend = True
        .ChartType = SeriesChartType.Line
        .IsValueShownAsLabel = True

        Dim xvals() As Integer = ({1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000})
        Dim yvals() As Integer = {T1K.Text, T2K.Text, T3K.Text, T4K.Text, average, T6K.Text, T7K.Text, T8K.Text, T9K.Text, T10K.Text}

        .Points.DataBindXY(xvals, yvals)
    End With
End Sub

我想从第一个DF创建一个DF结果(例如,如果ID = 2,则时间戳应乘以2)。因此,我创建了一个新类:

ID   Timestamp
1    0
1    10
1    11    
2    20
3    15

这是我的代码:

 case class OutputData(id: bigint, timestamp:bigint)

能帮我吗?

1 个答案:

答案 0 :(得分:0)

为了简化实施,您可以使用df来投射case class,该过程是Dataset使用对象符号,而不是每次访问row您需要某个元素的值。除此之外,根据您的输入和输出将采用相同的格式,您可以使用相同的案例类而不是定义2。

代码如下:

// Sample intput data
val df = Seq(
  (1,    0L),
  (1,    10L),
  (1,    11L),   
  (2,    20L),
  (3,    15L)
).toDF("ID", "Timestamp")
df.show()

// Case class as helper
case class OutputData(ID: Integer, Timestamp: Long)

val newDF = df.as[OutputData].map(record=>{
  val newTime = if(record.ID == 2) record.Timestamp*2 else record.Timestamp // identify your id and apply logic based on that
  OutputData(record.ID, newTime)// return same format with updated values
})

newDF.show()

以上代码的输出:

// original
+---+---------+
| ID|Timestamp|
+---+---------+
|  1|        0|
|  1|       10|
|  1|       11|
|  2|       20|
|  3|       15|
+---+---------+
// new one
+---+---------+
| ID|Timestamp|
+---+---------+
|  1|        0|
|  1|       10|
|  1|       11|
|  2|       40|
|  3|       15|
+---+---------+