需要帮助修复elixir和sweet_xml库的内存泄漏

时间:2018-09-11 19:54:17

标签: memory-leaks elixir openstreetmap

我是长生不老药的新手。

我有以下lib/osm.ex文件

defmodule Osm do
  import SweetXml

  def hello do
    :world
  end

  def main(args) do
    args |> parse_args |> process
  end

  defp parse_args(args) do
    {options, _, _} = OptionParser.parse(args, switches: [osm_file: :string, help: :boolean])
    options
  end

  def output_help() do
    IO.puts "Usage: osm [OPTION]"
    IO.puts ""
    IO.puts "  --osm-file  an osm-file to import"
    IO.puts "  --help      outputs this help-page"
  end

  def process([]) do
    IO.puts "No arguments given"
  end

  def process(options) do
    if options[:help] do
      output_help()
    else
      case options do
        [osm_file: _] ->
          process_osm_file(options[:osm_file])
      end
    end
  end

  def process_osm_file(file) do
    counts = %{:nodes => 0, :ways => 0, :relations => 0}
    cond do
      String.ends_with?(file, ".pbf") ->
        IO.puts "parse osm-pbf."
      String.ends_with?(file, ".osm.bz2") ->
        IO.puts "extract and parse osm-xml."
      String.ends_with?(file, ".osm") ->
        IO.puts "parse osm-xml."
        File.stream!(file)
         |> stream_tags([:node, :way, :relation], discard: [:node, :way, :relation])
          |> Stream.map(fn
            {_, node} ->
              process_element(node, counts)
          end)
          |> Enum.reduce(fn element, result ->
            result_modified = %{result |
              nodes: result[:nodes] + element[:nodes],
              ways: result[:ways] + element[:ways],
              relations: result[:relations] + element[:relations]
            }
            cond do
              rem(result_modified[:nodes], 1000) == 0 ->
                IO.write "\rnodes: " <> to_string(result_modified[:nodes]) <> "; ways: " <> to_string(result_modified[:ways]) <> "; relations: " <> to_string(result_modified[:relations]) <> "; mem: " <> to_string(:erlang.memory(:total))
              true -> true
            end
            result_modified
          end)
          |> Stream.run
          IO.puts ""
      true ->
        IO.puts "invalid osm-file extension."
    end
  end

  defp process_element(doc, counts) do
    case doc |> xmlElement(:name) do
      :node ->
        doc |> xmap(
          id: ~x"./@id"i,
          lat: ~x"./@lat"f,
          lon: ~x"./@lon"f,
          tags: [
            ~x"./tag"l,
            key: ~x"./@k"s,
            value: ~x"./@v"s
          ]
        ) |> process_node(counts)

      :way ->
        doc |> xmap(
          id: ~x"./@id"i,
          nd: [
            ~x"./nd"l,
            ref: ~x"./@ref"i
          ],
          tags: [
            ~x"./tag"l,
            key: ~x"./@k"s,
            value: ~x"./@v"s
          ]
        ) |> process_way(counts)

      :relation ->
        doc |> xmap(
          id: ~x"./@id"i,
          member: [
            ~x"./member"l,
            type: ~x"./@type"s,
            ref: ~x"./@ref"s,
            role: ~x"./@role"s
          ],
          tags: [
            ~x"./tag"l,
            key: ~x"./@k"s,
            value: ~x"./@v"s
          ]
        ) |> process_relation(counts)

      _ ->
        IO.puts "unhandled element"
    end
  end

  defp process_node(node, counts) do
    _ = node
    Map.put(counts, :nodes, counts[:nodes] + 1)
  end

  defp process_way(way, counts) do
    _ = way
    Map.put(counts, :ways, counts[:ways] + 1)
  end

  defp process_relation(relation, counts) do
    _ = relation
    Map.put(counts, :relations, counts[:relations] + 1)
  end
end

以及以下mix.exs文件

defmodule Osm.MixProject do
  use Mix.Project

  def project do
    [
      app: :osm,
      version: "0.1.0",
      elixir: "~> 1.7",
      start_permanent: Mix.env() == :prod,
      escript: [main_module: Osm],
      deps: deps()
    ]
  end

  def application do
    [
      extra_applications: [:logger]
    ]
  end

  defp deps do
    [
      {:sweet_xml, github: 'kbrw/sweet_xml', app: false}
    ]
  end
end

我用mix escript.build

进行编译

我已经下载了berlin-latest.osm.bz2 file,并提取了berlin-latest.osm文件。

如果我打./osm --osm-file=berlin-latest.osm

该脚本解析xml数据并正确计算节点,方式和关系,但是直到最后,内存消耗都在增加。

SweetXml库中是否存在内存泄漏,或者我做错了什么?

1 个答案:

答案 0 :(得分:3)

我没有看到会在您的代码中造成内存泄漏的东西。

我进行了以下测试:我使用SweetXml逐步删除了所有代码,而当我使用SweetXml(即stream_tags([:node, :way, :relation], discard: [:node, :way, :relation]))撤回第一部分时,内存泄漏消失了。 这显然表明内存消耗来自SweetXml

阅读SweetXml.stream_tags/3函数的源代码可能会给您带来一些答案。我还不知道泄漏的来源。

编辑:在仔细检查了源代码之后,我仍然没有找到泄漏的根源。我开始觉得这是更深层次的事情,也许与erlang VM的工作方式有关。