在非avx处理器上指定本机时的非法avx指令

时间:2016-08-20 19:01:41

标签: c gcc native avx

以下是导致我的问题的代码:

struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
        unsigned cache_size, unsigned private_data_size,
        rte_mempool_ctor_t *mp_init, void *mp_init_arg,
        rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
        int socket_id, unsigned flags, void *vaddr,
        const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
{
    char mz_name[RTE_MEMZONE_NAMESIZE];
    char rg_name[RTE_RING_NAMESIZE];
    struct rte_mempool_list *mempool_list;
    struct rte_mempool *mp = NULL;
    struct rte_tailq_entry *te;
    struct rte_ring *r;
    const struct rte_memzone *mz;
    size_t mempool_size;
    int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
    int rg_flags = 0;
    void *obj;
    struct rte_mempool_objsz objsz;
    void *startaddr;
    int page_size = getpagesize();

    /* compilation-time checks */
    RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
              RTE_CACHE_LINE_MASK) != 0);
#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
    RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
              RTE_CACHE_LINE_MASK) != 0);
    RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
              RTE_CACHE_LINE_MASK) != 0);
#endif
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
    RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
              RTE_CACHE_LINE_MASK) != 0);
    RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) &
              RTE_CACHE_LINE_MASK) != 0);
#endif

    mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);

    /* asked cache too big */
    if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
        rte_errno = EINVAL;
        return NULL;
    }

    /* check that we have both VA and PA */
    if (vaddr != NULL && paddr == NULL) {
        rte_errno = EINVAL;
        return NULL;
    }

    /* Check that pg_num and pg_shift parameters are valid. */
    if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
        rte_errno = EINVAL;
        return NULL;
    }

    /* "no cache align" imply "no spread" */
    if (flags & MEMPOOL_F_NO_CACHE_ALIGN)
        flags |= MEMPOOL_F_NO_SPREAD;

    /* ring flags */
    if (flags & MEMPOOL_F_SP_PUT)
        rg_flags |= RING_F_SP_ENQ;
    if (flags & MEMPOOL_F_SC_GET)
        rg_flags |= RING_F_SC_DEQ;

    /* calculate mempool object sizes. */
    if (!rte_mempool_calc_obj_size(elt_size, flags, &objsz)) {
        rte_errno = EINVAL;
        return NULL;
    }

    rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);

    /* allocate the ring that will be used to store objects */
    /* Ring functions will return appropriate errors if we are
     * running as a secondary process etc., so no checks made
     * in this function for that condition */
    snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT, name);
    r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags);
    if (r == NULL)
        goto exit;

    /*
     * reserve a memory zone for this mempool: private data is
     * cache-aligned
     */
    private_data_size = (private_data_size +
                 RTE_CACHE_LINE_MASK) & (~RTE_CACHE_LINE_MASK);

    if (! rte_eal_has_hugepages()) {
        /*
         * expand private data size to a whole page, so that the
         * first pool element will start on a new standard page
         */
        int head = sizeof(struct rte_mempool);
        int new_size = (private_data_size + head) % page_size;
        if (new_size) {
            private_data_size += page_size - new_size;
        }
    }

    /* try to allocate tailq entry */
    te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
    if (te == NULL) {
        RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
        goto exit;
    }

    /*
     * If user provided an external memory buffer, then use it to
     * store mempool objects. Otherwise reserve memzone big enough to
     * hold mempool header and metadata plus mempool objects.
     */
    mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
    if (vaddr == NULL)
        mempool_size += (size_t)objsz.total_size * n;

    if (! rte_eal_has_hugepages()) {
        /*
         * we want the memory pool to start on a page boundary,
         * because pool elements crossing page boundaries would
         * result in discontiguous physical addresses
         */
        mempool_size += page_size;
    }

    snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);

    mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
    /*
     * no more memory: in this case we loose previously reserved
     * space for the as we cannot free it
     */
    if (mz == NULL) {
        rte_free(te);
        goto exit;
    }

    if (rte_eal_has_hugepages()) {
        startaddr = (void*)mz->addr;
    } else {
        /* align memory pool start address on a page boundary */
        unsigned long addr = (unsigned long)mz->addr;
        if (addr & (page_size - 1)) {
            addr += page_size;
            addr &= ~(page_size - 1);
        }
        startaddr = (void*)addr;
    }

    /* init the mempool structure */
    mp = startaddr;
    memset(mp, 0, sizeof(*mp));
    snprintf(mp->name, sizeof(mp->name), "%s", name);
    mp->phys_addr = mz->phys_addr;
    mp->ring = r;
    mp->size = n;
    mp->flags = flags;
    mp->elt_size = objsz.elt_size;
    mp->header_size = objsz.header_size;
    mp->trailer_size = objsz.trailer_size;
    mp->cache_size = cache_size;
    mp->cache_flushthresh = (uint32_t)
 ------>    (cache_size * CACHE_FLUSHTHRESH_MULTIPLIER);
    mp->private_data_size = private_data_size;

    /* calculate address of the first element for continuous mempool. */
    obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
        private_data_size;

    /* populate address translation fields. */
    mp->pg_num = pg_num;
    mp->pg_shift = pg_shift;
    mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask));

    /* mempool elements allocated together with mempool */
    if (vaddr == NULL) {
        mp->elt_va_start = (uintptr_t)obj;
        mp->elt_pa[0] = mp->phys_addr +
            (mp->elt_va_start - (uintptr_t)mp);

    /* mempool elements in a separate chunk of memory. */
    } else {
        mp->elt_va_start = (uintptr_t)vaddr;
        memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
    }

    mp->elt_va_end = mp->elt_va_start;

    /* call the initializer */
    if (mp_init)
        mp_init(mp, mp_init_arg);

    mempool_populate(mp, n, 1, obj_init, obj_init_arg);

    te->data = (void *) mp;

    rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
    TAILQ_INSERT_TAIL(mempool_list, te, next);
    rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);

exit:
    rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);

    return mp;
}

cat / proc / cpuinfo产生:

flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts mmx fxsr sse sse2 ss syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology tsc_reliable nonstop_tsc aperfmperf pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic popcnt aes hypervisor lahf_lm ida arat epb pln pts dts

这里没有AVX。

我的编译行是:

gcc -Wp,-MD,./.rte_mempool.o.d.tmp -m64 -pthread  -march=native -DRTE_MACHINE_CPUFLAG_SSE -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3 -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 -DRTE_MACHINE_CPUFLAG_SSE4_2 -DRTE_MACHINE_CPUFLAG_AES -DRTE_MACHINE_CPUFLAG_PCLMULQDQ -DRTE_COMPILE_TIME_CPUFLAGS=RTE_CPUFLAG_SSE,RTE_CPUFLAG_SSE2,RTE_CPUFLAG_SSE3,RTE_CPUFLAG_SSSE3,RTE_CPUFLAG_SSE4_1,RTE_CPUFLAG_SSE4_2,RTE_CPUFLAG_AES,RTE_CPUFLAG_PCLMULQDQ  -I/home/dpdk/sources/dpdk/DPDK-2.0.0/x86_64-vm-gcc/include -include /home/dpdk/sources/dpdk/DPDK-2.0.0/x86_64-vm-gcc/include/rte_config.h -W -Wall -Werror -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wold-style-definition -Wpointer-arith -Wcast-align -Wnested-externs -Wcast-qual -Wformat-nonliteral -Wformat-security -Wundef -Wwrite-strings -I/home/dpdk/sources/dpdk/DPDK-2.0.0/lib/librte_mempool -g -O3  -g -fPIC -o rte_mempool.o -c /home/dpdk/sources/dpdk/DPDK-2.0.0/lib/librte_mempool/rte_mempool.c

显然,指定了本机编译。

我的gcc版本是gcc(GCC)4.6.3 20120306(Red Hat 4.6.3-2)。

gcc报告以下关于“本机”架构选择:

gcc -### -E - -march=native 2>&1 | sed -r '/cc1/!d;s/(")|(^.* - )//g'
-march=corei7 -mcx16 -msahf -mno-movbe -maes -mpclmul -mpopcnt -mno-abm -mno-lwp -mno-fma -mno-fma4 -mno-xop -mno-bmi -mno-tbm -mno-avx -msse4.2 -msse4.1 --param l1-cache-size=32 --param l1-cache-line-size=64 --param l2-cache-size=20480 -mtune=corei7

代码在非法指令上崩溃:

   0x00000000005e49f6 <+598>:   callq  0x40b370 <memset@plt>
   0x00000000005e49fb <+603>:   mov    0x30(%rsp),%rcx
   0x00000000005e4a00 <+608>:   lea    0x283c00(%rip),%rdx        # 0x868607
   0x00000000005e4a07 <+615>:   mov    $0x20,%esi
   0x00000000005e4a0c <+620>:   mov    %rbx,%rdi
   0x00000000005e4a0f <+623>:   xor    %eax,%eax
   0x00000000005e4a11 <+625>:   callq  0x40be40 <snprintf@plt>
=> 0x00000000005e4a16 <+630>:   vcvtsi2sd %r13,%xmm0,%xmm0
   0x00000000005e4a1b <+635>:   mov    0x28(%rsp),%r8
   0x00000000005e4a20 <+640>:   mov    0xd0(%rsp),%edx
   0x00000000005e4a27 <+647>:   mov    0x50(%rsp),%rcx
   0x00000000005e4a2c <+652>:   mov    0x20(%r8),%rax
   0x00000000005e4a30 <+656>:   mov    %ebp,0x34(%rbx)
   0x00000000005e4a33 <+659>:   mov    %edx,0x40(%rbx)

为什么vcvtsi2sd用于gcc承诺不使用avx指令的非avx机器上?

0 个答案:

没有答案