今天早上我问here为什么我的Python代码比我的F#版慢很多,但是我想知道F#版本是否可以更快。任何想法如何创建下面的代码的更快版本,从32位整数的二进制文件中读取排序的唯一索引列表?请注意,我尝试了两种方法,一种基于BinaryReader,另一种基于MemoryMappedFile(and some more on Github)。
module SimpleRead
let readValue (reader:BinaryReader) cellIndex =
// set stream to correct location
reader.BaseStream.Position <- cellIndex*4L
match reader.ReadInt32() with
| Int32.MinValue -> None
| v -> Some(v)
let readValues fileName indices =
use reader = new BinaryReader(File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
// Use list or array to force creation of values (otherwise reader gets disposed before the values are read)
let values = List.map (readValue reader) (List.ofSeq indices)
values
module MemoryMappedSimpleRead =
open System.IO.MemoryMappedFiles
let readValue (reader:MemoryMappedViewAccessor) offset cellIndex =
let position = (cellIndex*4L) - offset
match reader.ReadInt32(position) with
| Int32.MinValue -> None
| v -> Some(v)
let readValues fileName indices =
use mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open)
let offset = (Seq.min indices ) * 4L
let last = (Seq.max indices) * 4L
let length = 4L+last-offset
use reader = mmf.CreateViewAccessor(offset, length, MemoryMappedFileAccess.Read)
let values = (List.ofSeq indices) |> List.map (readValue reader offset)
values
为了比较,这里是我最新的numpy版本
import numpy as np
def convert(v):
if v <> -2147483648:
return v
else:
return None
def read_values(filename, indices):
values_arr = np.memmap(filename, dtype='int32', mode='r')
return map(convert, values_arr[indices])
更新 与我之前所说的相反,我的python仍然比F#版本慢很多但是由于我的python测试中出现错误,否则它会出现。 留下这个问题,以防有人深入了解BinaryReader或MemoryMappedFile知道一些改进。
答案 0 :(得分:1)
我设法通过使用reader.BaseStream.Seek而不是reader.BaseStream.Position使SimpleReader快30%。我也用数组替换了列表,但这并没有太大的改变。
我的简单阅读器的完整代码现在是:
open System
open System.IO
let readValue (reader:BinaryReader) cellIndex =
// set stream to correct location
reader.BaseStream.Seek(int64 (cellIndex*4), SeekOrigin.Begin) |> ignore
match reader.ReadInt32() with
| Int32.MinValue -> None
| v -> Some(v)
let readValues indices fileName =
use reader = new BinaryReader(File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
// Use list or array to force creation of values (otherwise reader gets disposed before the values are read)
let values = Array.map (readValue reader) indices
values
其他语言的完整代码和版本位于GitHub。