Vowpal Wabbit IO吞吐量?

时间:2018-02-15 10:29:58

标签: r machine-learning io classification vowpalwabbit

我正在尝试使用vw为分类问题构建模型,问题是当我运行此代码来获取预测时:

vw -t -i model.vw -p predictions.txt test.txt

predictions.txt写入文件系统需要很长时间(对于具有1000万个样本的测试,大约需要50分钟);我认为使用SSD磁盘驱动器而不是磁盘驱动器可以加快这个过程,但它没有太大的区别,我也尝试了不同的文件系统(NTFS,XFS和EXT4),它也没有太大帮助;我认为vw可以选择在写出来之前缓冲输出,但我找不到它。

有关此问题的任何建议吗?

其他信息

vw版本是在Debian 9 AMD64上从源代码(当然是来自Github.com repo)构建的8.5.0。读/写是在NTFS作为其文件系统的分区上完成的(在磁盘上)。 cat写入此分区上的文件的吞吐量在20MB到30MB之间。

当我开始vw时,它会产生另一个稍慢的子进程。

一般流程状态信息:

PID PRI NI  VIRT    RES SHR S   CPU%    MEM%    TIME+   Command
3943    20  0   1131M   85728   6196    S   73.3    1.7 9:32.03 ├─ vw -t -i models/model.vw.30 -p x.txt
3944    20  0   1131M   85728   6196    R   36.3    1.7 4:53.39 │  └─ vw

父进程的示例strace输出:

Trace of process 3943 - vw -t -i models/model.vw.30 -p x.txt
strace: Process 3943 attached
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.131190\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.149736\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.287871\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.133941\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.141326\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.107144\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 1
write(4, "0.395839\n", 9)               = 9
futex(0x561432250dfc, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x561432250df8, FUTEX_OP_SET<<28|0<<12|FUTEX_OP_CMP_GT<<24|0x1) = 1

子进程的示例strace输出:

Trace of process 3944 - vw
strace: Process 3944 attached
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046163, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046165, NULL) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046167, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046169, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046171, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046173, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
read(0, "1 |a x:5646322 |b 5646322 |c uBy"..., 5977) = 5977
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046175, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
read(0, "23:25.0 I24:25.0 I25:23.0 I26:25"..., 64944) = 56380
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046177, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046179, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046181, NULL) = 0
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x561432250dfc, FUTEX_WAIT_PRIVATE, 5046183, NULL) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x561432250da0, FUTEX_WAKE_PRIVATE, 1) = 0

0 个答案:

没有答案