(TL; DR)在NVME SSD(Intel p3600以及Avant)上,如果我在磁盘的一小部分而不是整个磁盘上发出随机读取,我会看到IOPS降低。
Disk Range(in 4k blocks), IOPS
0, 38833
1, 68596
10, 76100
30, 80381
40, 113647
50, 148205
100, 170374
200, 239798
400, 270197
800, 334767
OS:Linux 4.2.0-35-generic
SSD:Intel P3600 NVME Flash
$ for i in 0 1 10 30 40 50 100 200 400 800
<program_name> /dev/nvme0n1 10 $i
* $ g++ <progname.cpp> -o progname -std=c++11 -lpthread -laio -O3
* $ progname /dev/nvme0n1 10 100
#include <random>
#include <libaio.h>
#include <stdlib.h>//malloc, exit
#include <future> //async
#include <unistd.h> //usleep
#include <iostream>
#include <sys/time.h> // gettimeofday
#include <vector>
#include <fcntl.h> // open
#include <errno.h>
#include <sys/types.h> // open
#include <sys/stat.h> // open
#include <cassert>
#include <semaphore.h>
io_context_t ioctx;
std::vector<char*> buffers;
int fd = -1;
sem_t sem;
constexpr int numPerRound = 20;
constexpr int numRounds = 100000;
constexpr int MAXEVENT = 10;
constexpr size_t BLKSIZE = 4096;
constexpr int QDEPTH = 200;
off_t startBlock = 0;
off_t numBlocks = 100;
const int numSubmitted = numRounds * numPerRound;
void DoGet()
io_event eventsArray[MAXEVENT];
int numCompleted = 0;
while (numCompleted != numSubmitted)
bzero(eventsArray, MAXEVENT * sizeof(io_event));
int numEvents;
do {
numEvents = io_getevents(ioctx, 1, MAXEVENT, eventsArray, nullptr);
} while (numEvents == -EINTR);
for (int i = 0; i < numEvents; i++)
io_event* ev = &eventsArray[i];
iocb* cb = (iocb*)(ev->data);
assert(ev->res2 == 0);
assert(ev->res == BLKSIZE);
sem_post(&sem); // free ioctx
numCompleted += numEvents;
std::cout << "completed=" << numCompleted << std::endl;
int main(int argc, char* argv[])
if (argc == 1) {
std::cout << "usage <nvme_device_name> <start_4k_block> <num_4k_blocks>" << std::endl;
char* deviceName = argv[1];
startBlock = atoll(argv[2]);
numBlocks = atoll(argv[3]);
int ret = 0;
ret = io_queue_init(QDEPTH, &ioctx);
assert(ret == 0);
ret = sem_init(&sem, 0, QDEPTH);
assert(ret == 0);
auto DoGetFut = std::async(std::launch::async, DoGet);
// preallocate buffers
for (int i = 0; i < QDEPTH; i++)
char* buf ;
ret = posix_memalign((void**)&buf, 4096, BLKSIZE);
assert(ret == 0);
fd = open("/dev/nvme0n1", O_DIRECT | O_RDONLY);
assert(fd >= 0);
off_t offset = 0;
struct timeval start;
gettimeofday(&start, 0);
std::mt19937 generator (getpid());
// generate random offsets within [startBlock, startBlock + numBlocks]
std::uniform_int_distribution<off_t> offsetgen(startBlock, startBlock + numBlocks);
for (int j = 0; j < numRounds; j++)
iocb mycb[numPerRound];
iocb* posted[numPerRound];
bzero(mycb, sizeof(iocb) * numPerRound);
for (int i = 0; i < numPerRound; i++)
// same buffer may get used in 2 different async read
// thats ok - not validating content in this program
char* iobuf = buffers[i];
iocb* cb = &mycb[i];
offset = offsetgen(generator) * BLKSIZE;
io_prep_pread(cb, fd, iobuf, BLKSIZE, offset);
cb->data = iobuf;
posted[i] = cb;
sem_wait(&sem); // wait for ioctx to be free
int ret = 0;
do {
ret = io_submit(ioctx, numPerRound, posted);
} while (ret == -EINTR);
assert(ret == numPerRound);
struct timeval end;
gettimeofday(&end, 0);
uint64_t diff = ((end.tv_sec - start.tv_sec) * 1000000) + (end.tv_usec - start.tv_usec);
<< "ops=" << numRounds * numPerRound
<< " iops=" << (numRounds * numPerRound *(uint64_t)1000000)/diff
<< " region-size=" << (numBlocks * BLKSIZE)
<< std::endl;
答案 0 :(得分:2)