流式传输:VM冻结大文件

时间:2019-04-13 10:25:16

标签: stream erlang elixir streaming beam

我正在建立一个管道来处理,聚合和转换csv文件中的数据,然后写回另一个csv文件中……我从19列csv文件中加载行,并使用一些数学运算(映射归约样式)写回30另一个csv中的列。

一切正常,直到我尝试将25mb的文件上传到应用程序,250000行,然后我决定对所有操作进行流式处理,而不是急于处理……但是现在,我要使用流来逐个函数地更改函数,我遇到一个问题,我不明白为什么在仅创建5个字段之后,当我尝试写入文件时,该程序只是冻结,并在几千行后停止写入。 Frozen writing process after thousands of lines been written 我正在流传输每个函数,所以据我所知它应该没有任何锁,而对于前数千个写入,它可以正常工作,所以我想知道发生了什么,在erlang观察器中,我只能看到资源的使用率下降了0,并且不再写入文件。

这是我的流函数(在我从文件加载之前),接下来是我的写函数:

def process(stream, field_longs_lats, team_settings) do
    main_stream =
      stream
      # Removing once that don't have timestamp
      |> Stream.filter(fn [time | _tl] -> time != "-" end)
      # Filter all duplicated rows by timestamp
      |> Stream.uniq_by(fn [time | _tl] -> time end)
      |> Stream.map(&Transform.apply_row_tranformations/1)

    cumulative_milli =
      main_stream
      |> Stream.map(fn [_time, milli | _tl] -> milli end)
      |> Statistics.cumulative_sum()

    speeds =
      main_stream
      |> Stream.map(fn [_time, _milli, _lat, _long, pace | _tl] ->
        pace
      end)
      |> Stream.map(&Statistics.get_speed/1)

    cals = Motion.calories_per_timestep(cumulative_milli, cumulative_milli)

    long_stream =
      main_stream
      |> Stream.map(fn [_time, _milli, lat | _tl] -> lat end)

    lat_stream =
      main_stream
      |> Stream.map(fn [_time, _milli, _lat, long | _tl] -> long end)

    x_y_tuples =
      RelativeCoordinates.relative_coordinates(long_stream, lat_stream, field_longs_lats)

    x = Stream.map(x_y_tuples, fn {x, _y} -> x end)
    y = Stream.map(x_y_tuples, fn {_x, y} -> y end)

    [x, y, cals, long_stream, lat_stream]
  end

写:

def write_to_file(keyword_list, file_name) do
    file = File.open!(file_name, [:write, :utf8])

    IO.write(file, V4.empty_v4_headers() <> "\n")

    keyword_list
    |> Stream.zip()
    |> Stream.each(&write_tuple_row(&1, file))
    |> Stream.run()

    File.close(file)
  end

@spec write_tuple_row(tuple(), pid()) :: :ok
  def write_tuple_row(tuple, file) do
    IO.inspect("writing #{inspect(tuple)}")

    row_content =
      Tuple.to_list(tuple)
      |> Enum.map_join(",", fn value -> Transformations.to_string(value) end)

    IO.write(file, row_content <> "\n")
  end

0 个答案:

没有答案