以下是导致我的问题的代码:
struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
unsigned cache_size, unsigned private_data_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags, void *vaddr,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
{
char mz_name[RTE_MEMZONE_NAMESIZE];
char rg_name[RTE_RING_NAMESIZE];
struct rte_mempool_list *mempool_list;
struct rte_mempool *mp = NULL;
struct rte_tailq_entry *te;
struct rte_ring *r;
const struct rte_memzone *mz;
size_t mempool_size;
int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
int rg_flags = 0;
void *obj;
struct rte_mempool_objsz objsz;
void *startaddr;
int page_size = getpagesize();
/* compilation-time checks */
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
RTE_CACHE_LINE_MASK) != 0);
#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
RTE_CACHE_LINE_MASK) != 0);
RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
RTE_CACHE_LINE_MASK) != 0);
#endif
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
RTE_CACHE_LINE_MASK) != 0);
RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) &
RTE_CACHE_LINE_MASK) != 0);
#endif
mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
/* asked cache too big */
if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
rte_errno = EINVAL;
return NULL;
}
/* check that we have both VA and PA */
if (vaddr != NULL && paddr == NULL) {
rte_errno = EINVAL;
return NULL;
}
/* Check that pg_num and pg_shift parameters are valid. */
if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
rte_errno = EINVAL;
return NULL;
}
/* "no cache align" imply "no spread" */
if (flags & MEMPOOL_F_NO_CACHE_ALIGN)
flags |= MEMPOOL_F_NO_SPREAD;
/* ring flags */
if (flags & MEMPOOL_F_SP_PUT)
rg_flags |= RING_F_SP_ENQ;
if (flags & MEMPOOL_F_SC_GET)
rg_flags |= RING_F_SC_DEQ;
/* calculate mempool object sizes. */
if (!rte_mempool_calc_obj_size(elt_size, flags, &objsz)) {
rte_errno = EINVAL;
return NULL;
}
rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
/* allocate the ring that will be used to store objects */
/* Ring functions will return appropriate errors if we are
* running as a secondary process etc., so no checks made
* in this function for that condition */
snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT, name);
r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags);
if (r == NULL)
goto exit;
/*
* reserve a memory zone for this mempool: private data is
* cache-aligned
*/
private_data_size = (private_data_size +
RTE_CACHE_LINE_MASK) & (~RTE_CACHE_LINE_MASK);
if (! rte_eal_has_hugepages()) {
/*
* expand private data size to a whole page, so that the
* first pool element will start on a new standard page
*/
int head = sizeof(struct rte_mempool);
int new_size = (private_data_size + head) % page_size;
if (new_size) {
private_data_size += page_size - new_size;
}
}
/* try to allocate tailq entry */
te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
goto exit;
}
/*
* If user provided an external memory buffer, then use it to
* store mempool objects. Otherwise reserve memzone big enough to
* hold mempool header and metadata plus mempool objects.
*/
mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
if (vaddr == NULL)
mempool_size += (size_t)objsz.total_size * n;
if (! rte_eal_has_hugepages()) {
/*
* we want the memory pool to start on a page boundary,
* because pool elements crossing page boundaries would
* result in discontiguous physical addresses
*/
mempool_size += page_size;
}
snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
/*
* no more memory: in this case we loose previously reserved
* space for the as we cannot free it
*/
if (mz == NULL) {
rte_free(te);
goto exit;
}
if (rte_eal_has_hugepages()) {
startaddr = (void*)mz->addr;
} else {
/* align memory pool start address on a page boundary */
unsigned long addr = (unsigned long)mz->addr;
if (addr & (page_size - 1)) {
addr += page_size;
addr &= ~(page_size - 1);
}
startaddr = (void*)addr;
}
/* init the mempool structure */
mp = startaddr;
memset(mp, 0, sizeof(*mp));
snprintf(mp->name, sizeof(mp->name), "%s", name);
mp->phys_addr = mz->phys_addr;
mp->ring = r;
mp->size = n;
mp->flags = flags;
mp->elt_size = objsz.elt_size;
mp->header_size = objsz.header_size;
mp->trailer_size = objsz.trailer_size;
mp->cache_size = cache_size;
mp->cache_flushthresh = (uint32_t)
------> (cache_size * CACHE_FLUSHTHRESH_MULTIPLIER);
mp->private_data_size = private_data_size;
/* calculate address of the first element for continuous mempool. */
obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
private_data_size;
/* populate address translation fields. */
mp->pg_num = pg_num;
mp->pg_shift = pg_shift;
mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask));
/* mempool elements allocated together with mempool */
if (vaddr == NULL) {
mp->elt_va_start = (uintptr_t)obj;
mp->elt_pa[0] = mp->phys_addr +
(mp->elt_va_start - (uintptr_t)mp);
/* mempool elements in a separate chunk of memory. */
} else {
mp->elt_va_start = (uintptr_t)vaddr;
memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
}
mp->elt_va_end = mp->elt_va_start;
/* call the initializer */
if (mp_init)
mp_init(mp, mp_init_arg);
mempool_populate(mp, n, 1, obj_init, obj_init_arg);
te->data = (void *) mp;
rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_INSERT_TAIL(mempool_list, te, next);
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
exit:
rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
return mp;
}
cat / proc / cpuinfo产生:
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts mmx fxsr sse sse2 ss syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology tsc_reliable nonstop_tsc aperfmperf pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic popcnt aes hypervisor lahf_lm ida arat epb pln pts dts
这里没有AVX。
我的编译行是:
gcc -Wp,-MD,./.rte_mempool.o.d.tmp -m64 -pthread -march=native -DRTE_MACHINE_CPUFLAG_SSE -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3 -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 -DRTE_MACHINE_CPUFLAG_SSE4_2 -DRTE_MACHINE_CPUFLAG_AES -DRTE_MACHINE_CPUFLAG_PCLMULQDQ -DRTE_COMPILE_TIME_CPUFLAGS=RTE_CPUFLAG_SSE,RTE_CPUFLAG_SSE2,RTE_CPUFLAG_SSE3,RTE_CPUFLAG_SSSE3,RTE_CPUFLAG_SSE4_1,RTE_CPUFLAG_SSE4_2,RTE_CPUFLAG_AES,RTE_CPUFLAG_PCLMULQDQ -I/home/dpdk/sources/dpdk/DPDK-2.0.0/x86_64-vm-gcc/include -include /home/dpdk/sources/dpdk/DPDK-2.0.0/x86_64-vm-gcc/include/rte_config.h -W -Wall -Werror -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wold-style-definition -Wpointer-arith -Wcast-align -Wnested-externs -Wcast-qual -Wformat-nonliteral -Wformat-security -Wundef -Wwrite-strings -I/home/dpdk/sources/dpdk/DPDK-2.0.0/lib/librte_mempool -g -O3 -g -fPIC -o rte_mempool.o -c /home/dpdk/sources/dpdk/DPDK-2.0.0/lib/librte_mempool/rte_mempool.c
显然,指定了本机编译。
我的gcc版本是gcc(GCC)4.6.3 20120306(Red Hat 4.6.3-2)。
gcc报告以下关于“本机”架构选择:
gcc -### -E - -march=native 2>&1 | sed -r '/cc1/!d;s/(")|(^.* - )//g'
-march=corei7 -mcx16 -msahf -mno-movbe -maes -mpclmul -mpopcnt -mno-abm -mno-lwp -mno-fma -mno-fma4 -mno-xop -mno-bmi -mno-tbm -mno-avx -msse4.2 -msse4.1 --param l1-cache-size=32 --param l1-cache-line-size=64 --param l2-cache-size=20480 -mtune=corei7
代码在非法指令上崩溃:
0x00000000005e49f6 <+598>: callq 0x40b370 <memset@plt>
0x00000000005e49fb <+603>: mov 0x30(%rsp),%rcx
0x00000000005e4a00 <+608>: lea 0x283c00(%rip),%rdx # 0x868607
0x00000000005e4a07 <+615>: mov $0x20,%esi
0x00000000005e4a0c <+620>: mov %rbx,%rdi
0x00000000005e4a0f <+623>: xor %eax,%eax
0x00000000005e4a11 <+625>: callq 0x40be40 <snprintf@plt>
=> 0x00000000005e4a16 <+630>: vcvtsi2sd %r13,%xmm0,%xmm0
0x00000000005e4a1b <+635>: mov 0x28(%rsp),%r8
0x00000000005e4a20 <+640>: mov 0xd0(%rsp),%edx
0x00000000005e4a27 <+647>: mov 0x50(%rsp),%rcx
0x00000000005e4a2c <+652>: mov 0x20(%r8),%rax
0x00000000005e4a30 <+656>: mov %ebp,0x34(%rbx)
0x00000000005e4a33 <+659>: mov %edx,0x40(%rbx)
为什么vcvtsi2sd用于gcc承诺不使用avx指令的非avx机器上?