Question

我试图在安装了AMD OpenCL的Windows 10下运行Theano。 Python，Theano，Pygpu和其他依赖项使用Anaconda安装，包括MKL作为BLAS安装，as per the developers' instructions。 MKL似乎是预期会出现在numpy的地方：

>>> import numpy as np
>>> np.__config__.show()
lapack_opt_info:
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
    include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
    library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
blas_opt_info:
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
    include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
    library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
lapack_mkl_info:
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
    include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
    library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
blas_mkl_info:
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
    include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
     library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']

然而，当我运行pygpu.test（）时，我遇到了很多错误（1166，加上2次失败）。大约一半的错误是由于无法找到BLAS库：

======================================================================
ERROR: pygpu.tests.test_blas.test_dot(1, 'float32', True, True, True, False)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\nose\case.py", line 198, in runTest
    self.test(*self.arg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\support.py", line 44, in f
    func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\test_blas.py", line 44, in dot
    gr = gblas.dot(gX, gY, gZ, overwrite_z=overwrite)
  File "pygpu\blas.pyx", line 79, in pygpu.blas.dot
  File "pygpu\blas.pyx", line 29, in pygpu.blas.pygpu_blas_rdot
pygpu.gpuarray.GpuArrayException: (b'Missing Blas library', 5)

...每次测试的确切细节都不同，但追溯是相同的。

前两个报告的错误是Clang前端编译失败。我承认我不明白他们是如何/为什么会出现，但他们听起来很严肃：

====================================================================== 
ERROR: pygpu.tests.test_basic.test_tril 
---------------------------------------------------------------------- 
Traceback (most recent call last): 
  File "C:\ProgramData\Anaconda3\lib\site-packages\nose\case.py", line 198, in runTest 
    self.test(*self.arg) 
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\test_basic.py", line 15, in test_tril 
    result = tril(ag, inplace=inplace) 
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 62, in tril 
    k = _generate_kernel(A.context, cols, upper) 
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 25, in _generate_kernel 
    k = GpuKernel(src, "extract_tri", spec, context=ctx) 
  File "pygpu\gpuarray.pyx", line 2428, in pygpu.gpuarray.GpuKernel.__cinit__ 
  File "pygpu\gpuarray.pyx", line 446, in pygpu.gpuarray.kernel_init 
pygpu.gpuarray.GpuArrayException: Program build failure :: 
C:\Users\megha\AppData\Local\Temp\\OCL11404T5.cl:43:38: error: casting '__global float *' to type 'char *' changes address space of pointer 
        a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off); 
                                     ^       ~ 
1 error generated. 

error: Clang front-end compilation failed! 
Frontend phase failed compilation. 
Error: Compiling CL to IR 
1 #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE)
2 #define WITHIN_KERNEL /* empty */
3 #define KERNEL __kernel
4 #define GLOBAL_MEM __global
5 #define LOCAL_MEM __local
6 #define LOCAL_MEM_ARG __local
7 #ifndef NULL
8   #define NULL ((void*)0)
9 #endif
10 #define LID_0 get_local_id(0)
11 #define LID_1 get_local_id(1)
12 #define LID_2 get_local_id(2)
13 #define LDIM_0 get_local_size(0)
14 #define LDIM_1 get_local_size(1)
15 #define LDIM_2 get_local_size(2)
16 #define GID_0 get_group_id(0)
17 #define GID_1 get_group_id(1)
18 #define GID_2 get_group_id(2)
19 #define GDIM_0 get_num_groups(0)
20 #define GDIM_1 get_num_groups(1)
21 #define GDIM_2 get_num_groups(2)
22 #define ga_bool uchar
23 #define ga_byte char
24 #define ga_ubyte uchar
25 #define ga_short short
26 #define ga_ushort ushort
27 #define ga_int int
28 #define ga_uint uint
29 #define ga_long long
30 #define ga_ulong ulong
31 #define ga_float float
32 #define ga_double double
33 #define ga_half half
34 #define ga_size ulong
35 #define ga_ssize long
36 #define load_half(p) vload_half(0, p)
37 #define store_half(p, v) vstore_half_rtn(v, 0, p)
38 #define GA_DECL_SHARED_PARAM(type, name) , __local type *name
39 #define GA_DECL_SHARED_BODY(type, name)
40 #define GA_WARP_SIZE 64
41 
42     KERNEL void extract_tri(GLOBAL_MEM ga_float *a, ga_size a_off, ga_uint N) {
43         a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
44         unsigned int idx = GID_1 * LDIM_0 * GDIM_0 +
45                            GID_0 * LDIM_0 + LID_0;
46         unsigned int ix = idx/5;
47         unsigned int iy = idx%5;
48         if (idx < N) {
49             if (ix < iy)
50                 a[idx] = 0.0;
51         }
52     }
53     


====================================================================== 
ERROR: pygpu.tests.test_basic.test_triu 
---------------------------------------------------------------------- 
Traceback (most recent call last): 
  File "C:\ProgramData\Anaconda3\lib\site-packages\nose\case.py", line 198, in runTest 
    self.test(*self.arg) 
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\test_basic.py", line 29, in test_triu 
    result = triu(ag, inplace=inplace) 
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 43, in triu 
    k = _generate_kernel(A.context, cols, upper) 
  File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 25, in _generate_kernel 
    k = GpuKernel(src, "extract_tri", spec, context=ctx) 
  File "pygpu\gpuarray.pyx", line 2428, in pygpu.gpuarray.GpuKernel.__cinit__ 
  File "pygpu\gpuarray.pyx", line 446, in pygpu.gpuarray.kernel_init 
pygpu.gpuarray.GpuArrayException: Program build failure :: 
C:\Users\megha\AppData\Local\Temp\\OCL11404T7.cl:43:38: error: casting '__global float *' to type 'char *' changes address space of pointer 
        a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off); 
                                     ^       ~ 
1 error generated. 

error: Clang front-end compilation failed! 
Frontend phase failed compilation. 
Error: Compiling CL to IR 
1 #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE)
2 #define WITHIN_KERNEL /* empty */
3 #define KERNEL __kernel
4 #define GLOBAL_MEM __global
5 #define LOCAL_MEM __local
6 #define LOCAL_MEM_ARG __local
7 #ifndef NULL
8   #define NULL ((void*)0)
9 #endif
10 #define LID_0 get_local_id(0)
11 #define LID_1 get_local_id(1)
12 #define LID_2 get_local_id(2)
13 #define LDIM_0 get_local_size(0)
14 #define LDIM_1 get_local_size(1)
15 #define LDIM_2 get_local_size(2)
16 #define GID_0 get_group_id(0)
17 #define GID_1 get_group_id(1)
18 #define GID_2 get_group_id(2)
19 #define GDIM_0 get_num_groups(0)
20 #define GDIM_1 get_num_groups(1)
21 #define GDIM_2 get_num_groups(2)
22 #define ga_bool uchar
23 #define ga_byte char
24 #define ga_ubyte uchar
25 #define ga_short short
26 #define ga_ushort ushort
27 #define ga_int int
28 #define ga_uint uint
29 #define ga_long long
30 #define ga_ulong ulong
31 #define ga_float float
32 #define ga_double double
33 #define ga_half half
34 #define ga_size ulong
35 #define ga_ssize long
36 #define load_half(p) vload_half(0, p)
37 #define store_half(p, v) vstore_half_rtn(v, 0, p)
38 #define GA_DECL_SHARED_PARAM(type, name) , __local type *name
39 #define GA_DECL_SHARED_BODY(type, name)
40 #define GA_WARP_SIZE 64
41 
42     KERNEL void extract_tri(GLOBAL_MEM ga_float *a, ga_size a_off, ga_uint N) {
43         a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
44         unsigned int idx = GID_1 * LDIM_0 * GDIM_0 +
45                            GID_0 * LDIM_0 + LID_0;
46         unsigned int ix = idx/5;
47         unsigned int iy = idx%5;
48         if (idx < N) {
49             if (ix > iy)
50                 a[idx] = 0.0;
51         }
52     }
53

有人可以告诉我如何解决这些错误吗？提前谢谢。

（我在这里没有粘贴的其他错误状态＆＃39; TypeError：这是针对CUDA数组＆＃39;，所以我认为是无关紧要的;而这两个失败与float16支持有关，我读过的是实验性的，无论如何都不需要。）

Pygpu＆＃39;缺少BLAS库＆＃39;和其他测试错误

0 个答案: