我有以下代码来读取文件,该文件删除文件中的每个注释。
let s_read_all line =
if line = "" then
raise Pbm_format_error
else if line.[0] = '#' then
""
else
line ^ "\n"
;;
let read_all flec =
let rec loop accum_ref =
let line = input_line flec in
accum_ref := (!accum_ref) ^ (s_read_all line);
loop accum_ref
in
let accum_ref = ref "" in
try
loop accum_ref
with
End_of_file -> !accum_ref
;;
对于180k线路(约2分钟),我的代码非常慢。我在解释器模式下执行它。是否会使我的代码变得如此缓慢?
答案 0 :(得分:5)
问题是字符串连接很慢。更准确地说,它重复的字符串连接很慢。您应该使用Buffer而不是string来累积行:
let read_all flec =
let rec loop buffer =
let line = input_line flec in
Buffer.add_string buffer (s_read_all line);
loop buffer
in
let buffer = Buffer.create 180 in
try
loop buffer
with
End_of_file -> Buffer.content buffer
;;
答案 1 :(得分:1)
您正在使用line ^ "\n"
和(!accum_ref) ^ (s_read_all line);
。
与Java类似,^
是直接连接,并将不断创建新的字符串。所以我想这就是180K线路速度慢的原因。
你应该使用Buffer,就像Java中的StringBuilder一样。
另外,如果你给Buffer.create
一个好的初始长度,它会稍快一点。
exception Pbm_format_error
let s_read_all line =
if line = "" then
raise Pbm_format_error
else if line.[0] = '#' then
""
else
line
let read_all flec =
let rec loop accum_buf =
let line = s_read_all (input_line flec) in
Buffer.add_string accum_buf line;
if line <> "" then Buffer.add_string accum_buf "\n" else ();
loop accum_buf
in
let accum_buf = Buffer.create (180 * 1000 * 128) in
try
loop accum_buf
with
End_of_file -> Buffer.contents accum_buf