用于计算文件ssqHitNum.txt中18个最高频率数的“good-red”函数。
(defun good-red ()
(let ((tab (make-hash-table)) (res '()) (nums) (sort-res))
(dotimes (i 33) (setf (gethash (+ i 1) tab) 0))
(with-open-file (stream "ssqHitNum.txt")
(loop :for line = (read-line stream nil)
:until (null line)
:do
(setq nums (butlast (str2lst (subseq line 6))))
(dolist (n nums) (incf (gethash n tab)))
))
(maphash #'(lambda (k v) (push (cons k v) res)) tab)
(setq sort-res (sort res #'> :key #'cdr))
;(print sort-res)
(subseq (mapcar #'car sort-res) 0 18)))
$ head ssqHitNum.txt
10000 7 12 18 19 22 28 4
10000 16 17 23 26 31 32 11
10000 3 4 18 22 24 29 11
10000 4 9 10 18 29 32 8
10000 5 7 10 14 17 25 11
数字在1到33之间。我是否使用hashtab
并按照Common Lisp代码在ocaml中逐行扫描文件?或者使用ocaml有更优雅的方式?
任何建议都表示赞赏!
答案 0 :(得分:13)
FWIW,在F#3.0中你可以这样写:
System.IO.File.ReadLines @"ssqHitNum.txt"
|> Seq.collect (fun s -> s.Split ' ' |> Seq.skip 1)
|> Seq.countBy id
|> Seq.sortBy (fun (_, p) -> -p)
|> Seq.take 18
在OCaml中,我首先从头开始编写这些有用的库函数:
let readAllLines file =
let lines = ref [] in
let input = open_in file in
begin
try
while true do
lines := input_line input :: !lines
done
with
| End_of_file ->
close_in input
end;
List.rev !lines
let collect f xs =
List.concat (List.map f xs)
let countBy f xs =
let counts = Hashtbl.create 100 in
let find key = try Hashtbl.find counts (f key) with Not_found -> 0 in
let add key = Hashtbl.replace counts (f key) (1 + find key) in
List.iter add xs;
Hashtbl.fold (fun k n kns -> (k, n)::kns) table []
let sortBy f xs =
List.sort (fun x y -> compare (f x) (f y)) xs
let rec truncate n xs =
match n, xs with
| 0, _ | _, [] -> []
| n, x::xs -> x::truncate (n-1) xs
let rec skip n xs =
match n, xs with
| 0, xs -> xs
| n, [] -> []
| n, _::xs -> skip (n-1) xs
let (|>) x f = f x
let id x = x
然后以同样的方式写出来:
readAllLines "ssqHitNum.txt"
|> collect (fun s -> split ' ' s |> skip 1)
|> countBy id
|> sortBy (fun (_, p) -> -p)
|> truncate 18
F#仍然更好,因为线路是按需读取的,而我的OCaml预先将所有内容读入内存。
答案 1 :(得分:4)
从类似的高级别角度来看,使用Batteries:
open Batteries_uni
let freq file best_n =
let table = Hashtbl.create 100 in
let freq_num num =
Hashtbl.replace table num (1 + Hashtbl.find_default table num 0) in
let freq_line line =
let nums = List.tl (String.nsplit line " ") in
List.iter freq_num nums in
Enum.iter freq_line (File.lines_of file);
let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
Hashtbl.enum table |> List.of_enum
|> List.sort ~cmp
|> List.take best_n
从顶层测试:
#use "topfind";;
#require "batteries";;
#use "/tmp/test.ml";;
test "/tmp/test.txt" 18;;
答案 2 :(得分:3)
正如z_axis所述,这是另一个仅使用随OCaml编译器一起分发的基本库的解决方案。由于缺少一些便利功能,它有点冗长。
let freq file best_n =
let table = Hashtbl.create 100 in
let freq_num num =
Hashtbl.replace table num
(1 + try Hashtbl.find table num with Not_found -> 0) in
begin
let input = open_in file in
try while true do
let line = input_line input in
let nums = List.tl (Str.split (Str.regexp " +") line) in
List.iter freq_num nums
done with End_of_file -> close_in input
end;
let sorted =
let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
List.sort cmp (Hashtbl.fold (fun k x li -> (k,x)::li) table []) in
(* take not tail-rec, not a problem for small n such as n=18 *)
let rec take n = function
| li when n = 0 -> []
| [] -> []
| hd::tl -> hd :: take (n - 1) tl in
take best_n sorted
默认情况下,regexp模块Str未链接,即使它位于默认搜索路径中,因此您必须使用str.cma
(ocamlc
)或{显式编译程序{1}}(适用于str.cmxa
)。在顶层,ocamlopt
然后#use "topfind";;
会这样做。
答案 3 :(得分:2)
使用一组固定的小整数,使用数组可能更简单:
let good_red () =
let a = Array.make 33 0 in
let bump i = a.(i-1) <- a.(i-1) + 1 in
let rec iter_lines fh =
try
let words = Str.split (Str.regexp " +") (input_line fh) in
List.iter bump (List.map int_of_string (List.tl words));
iter_lines fh
with End_of_file -> () in
let fh = open_in "ssqHitNum.txt" in
iter_lines fh;
close_in fh;
let b = Array.mapi (fun i freq -> (i+1,freq)) a in
Array.sort (fun (i1,f1) (i2,f2) -> compare f2 f1) b;
Array.sub b 0 18;;
try
Array.iter (fun (i,freq) -> Printf.printf "%2d %2d\n" freq i) (good_red ())
with Invalid_argument _ -> print_endline "bad input"
正如gasche提到的那样,你需要使用str.cma或str.cmxa进行编译。
答案 4 :(得分:1)
我不确定你要解决的问题。 (为什么所有输入行都以10000
开头?)
如果你只是想找到第18个最高频率数字,你不需要逐行读取(这在Lisp,在C中,在Ocaml中都是如此......)和Ocaml的{{ 1}}可以做输入。
使用Scanf.scanf "%d" (fun x -> ...)
在Ocaml中是明智的。