Linux内核:手动修改页表条目标志

时间:2017-06-15 08:43:21

标签: linux-kernel

我试图通过在相应的页表条目中设置标志来手动将用户空间进程的某个内存区域标记为不可缓存(出于教育目的,不打算在生产代码中使用)。

我有一个Ubuntu 14.04(禁用ASLR),在x86_64 Intel Skylake处理器上运行4.4 Linux内核。

在我的内核模块中,我有以下功能:

/*                                                                                                                      
 * Set memory region [start,end], excluding 'addr', of process with PID 'pid' as uncacheable.                           
 */                                                                                                                     
ssize_t set_uncachable(uint32_t pid, uint64_t start, uint64_t end, uint64_t addr)                                       
{                                                                                                                       
    struct task_struct* ts = NULL;                                                                                      
    struct vm_area_struct *curr, *first = NULL;                                                                         
    struct mm_struct* mm;                                                                                               
    pgd_t * pgd;                                                                                                        
    pte_t * pte;                                                                                                        
    uint64_t numpages, curr_addr;                                                                                       
    uint32_t level, j, i = 0;                                                                                           

    printk(KERN_INFO "set_unacheable called\n");                                                                        

    ts = pid_task(find_vpid(pid), PIDTYPE_PID); //find task from PID                                                    
    pgd = ts->mm->pgd; //page table root of the task                                                                    

    first = ts->mm->mmap;                                                                                               
    curr = first;                                                                                                       
    if(first == NULL)                                                                                                   
        return -1;                                                                                                      

    do                                                                                                                  
    {                                                                                                                   
        printk(KERN_INFO "Region %3u [0x%016llx - 0x%016llx]", i, curr->vm_start, curr->vm_end);                        

        numpages = (curr->vm_end - curr->vm_start) / PAGE_SIZE; //PAGE_SIZE is 4K for now                               
        if(curr->vm_start > curr->vm_end)                                                                               
            numpages = 0;                                                                                               

        for(j = 0; j < numpages; j++)                                                                                   
        {                                                                                                               
            curr_addr = curr->vm_start + (PAGE_SIZE*j);                                                                 
            pte = lookup_address_in_pgd(pgd, curr_addr, &level);                                                        

            if((pte != NULL) && (level == 1))                                                                           
            {                                                                                                           
                printk(KERN_INFO "PTE for 0x%016x - 0x%016x (level %u)\n", curr_addr, pte->pte, level);                 
                if(curr_addr >= start && curr_addr < end && curr_addr != addr)                                             
                {                                                                                                          
                    //setting page entry to PAT#3                                                                          
                    pte->pte |= PWT_BIT | PCD_BIT;                                                                         
                    pte->pte &= ~PAT_BIT;                                                                               
                    printk(KERN_INFO "PTE for 0x%016x - 0x%016x (level %u) -- UPDATED\n", curr_addr, pte->pte, level);  
                }                                                                                                         
            }                                                                                                             
        }                                                                                                                 

        curr = curr->vm_next;                                                                                             
        if(curr == NULL)                                                                                                  
            return -1;                                                                                                    

        i++;                                                                                                              
    } while (curr != first);                                                                                              


    return 0;                                                                                                              
}  

为了测试上面的代码,我运行了一个在内存中分配某个区域的应用程序:

//#define BUF_ADDR_START 0x0000000008400000LL    /* works */                                                         
#define BUF_ADDR_START 0x00007ffff0000000LL      /* does not work */

[...]

buffer = mmap((void *)BUF_ADDR, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_POPULATE, 0, 0); 
if ( buffer == MAP_FAILED )                                                                                         
{                                                                                                                   
    printf("Failed to map buffer\n");                                                                               
    exit(-1);                                                                                                    
}                                                                                                                   
memset(buffer, 0, BUF_SIZE);                                                                                        
printf("Buffer at %p\n", buffer); 

我想使用我的内核模块将缓冲区标记为不可缓存。我的内核模块中的代码适用于0x8400000,但是对于0x7ffff0000000,没有找到页表条目(即lookup_address_in_pgd返回NULL)。但是,缓冲区肯定是在测试程序中分配的。

似乎我的内核模块适用于低地址(代码,数据和堆部分),但不适用于映射到较高地址的内存(堆栈,共享库等)。

有没有人知道为什么它失败了更大的地址?关于如何更优雅地实现set_uncachable的建议也是受欢迎的; - )

谢谢!

0 个答案:

没有答案