我试图在安装了AMD OpenCL的Windows 10下运行Theano。 Python,Theano,Pygpu和其他依赖项使用Anaconda安装,包括MKL作为BLAS安装,as per the developers' instructions。 MKL似乎是预期会出现在numpy的地方:
>>> import numpy as np
>>> np.__config__.show()
lapack_opt_info:
define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
blas_opt_info:
define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
lapack_mkl_info:
define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
blas_mkl_info:
define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll']
include_dirs = ['C:/ProgramData/Anaconda3\\Library\\include']
library_dirs = ['C:/ProgramData/Anaconda3\\Library\\lib']
然而,当我运行pygpu.test()时,我遇到了很多错误(1166,加上2次失败)。大约一半的错误是由于无法找到BLAS库:
======================================================================
ERROR: pygpu.tests.test_blas.test_dot(1, 'float32', True, True, True, False)
----------------------------------------------------------------------
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\nose\case.py", line 198, in runTest
self.test(*self.arg)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\support.py", line 44, in f
func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\test_blas.py", line 44, in dot
gr = gblas.dot(gX, gY, gZ, overwrite_z=overwrite)
File "pygpu\blas.pyx", line 79, in pygpu.blas.dot
File "pygpu\blas.pyx", line 29, in pygpu.blas.pygpu_blas_rdot
pygpu.gpuarray.GpuArrayException: (b'Missing Blas library', 5)
...每次测试的确切细节都不同,但追溯是相同的。
前两个报告的错误是Clang前端编译失败。我承认我不明白他们是如何/为什么会出现,但他们听起来很严肃:
======================================================================
ERROR: pygpu.tests.test_basic.test_tril
----------------------------------------------------------------------
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\nose\case.py", line 198, in runTest
self.test(*self.arg)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\test_basic.py", line 15, in test_tril
result = tril(ag, inplace=inplace)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 62, in tril
k = _generate_kernel(A.context, cols, upper)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 25, in _generate_kernel
k = GpuKernel(src, "extract_tri", spec, context=ctx)
File "pygpu\gpuarray.pyx", line 2428, in pygpu.gpuarray.GpuKernel.__cinit__
File "pygpu\gpuarray.pyx", line 446, in pygpu.gpuarray.kernel_init
pygpu.gpuarray.GpuArrayException: Program build failure ::
C:\Users\megha\AppData\Local\Temp\\OCL11404T5.cl:43:38: error: casting '__global float *' to type 'char *' changes address space of pointer
a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
^ ~
1 error generated.
error: Clang front-end compilation failed!
Frontend phase failed compilation.
Error: Compiling CL to IR
1 #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE)
2 #define WITHIN_KERNEL /* empty */
3 #define KERNEL __kernel
4 #define GLOBAL_MEM __global
5 #define LOCAL_MEM __local
6 #define LOCAL_MEM_ARG __local
7 #ifndef NULL
8 #define NULL ((void*)0)
9 #endif
10 #define LID_0 get_local_id(0)
11 #define LID_1 get_local_id(1)
12 #define LID_2 get_local_id(2)
13 #define LDIM_0 get_local_size(0)
14 #define LDIM_1 get_local_size(1)
15 #define LDIM_2 get_local_size(2)
16 #define GID_0 get_group_id(0)
17 #define GID_1 get_group_id(1)
18 #define GID_2 get_group_id(2)
19 #define GDIM_0 get_num_groups(0)
20 #define GDIM_1 get_num_groups(1)
21 #define GDIM_2 get_num_groups(2)
22 #define ga_bool uchar
23 #define ga_byte char
24 #define ga_ubyte uchar
25 #define ga_short short
26 #define ga_ushort ushort
27 #define ga_int int
28 #define ga_uint uint
29 #define ga_long long
30 #define ga_ulong ulong
31 #define ga_float float
32 #define ga_double double
33 #define ga_half half
34 #define ga_size ulong
35 #define ga_ssize long
36 #define load_half(p) vload_half(0, p)
37 #define store_half(p, v) vstore_half_rtn(v, 0, p)
38 #define GA_DECL_SHARED_PARAM(type, name) , __local type *name
39 #define GA_DECL_SHARED_BODY(type, name)
40 #define GA_WARP_SIZE 64
41
42 KERNEL void extract_tri(GLOBAL_MEM ga_float *a, ga_size a_off, ga_uint N) {
43 a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
44 unsigned int idx = GID_1 * LDIM_0 * GDIM_0 +
45 GID_0 * LDIM_0 + LID_0;
46 unsigned int ix = idx/5;
47 unsigned int iy = idx%5;
48 if (idx < N) {
49 if (ix < iy)
50 a[idx] = 0.0;
51 }
52 }
53
======================================================================
ERROR: pygpu.tests.test_basic.test_triu
----------------------------------------------------------------------
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\nose\case.py", line 198, in runTest
self.test(*self.arg)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\tests\test_basic.py", line 29, in test_triu
result = triu(ag, inplace=inplace)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 43, in triu
k = _generate_kernel(A.context, cols, upper)
File "C:\ProgramData\Anaconda3\lib\site-packages\pygpu\basic.py", line 25, in _generate_kernel
k = GpuKernel(src, "extract_tri", spec, context=ctx)
File "pygpu\gpuarray.pyx", line 2428, in pygpu.gpuarray.GpuKernel.__cinit__
File "pygpu\gpuarray.pyx", line 446, in pygpu.gpuarray.kernel_init
pygpu.gpuarray.GpuArrayException: Program build failure ::
C:\Users\megha\AppData\Local\Temp\\OCL11404T7.cl:43:38: error: casting '__global float *' to type 'char *' changes address space of pointer
a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
^ ~
1 error generated.
error: Clang front-end compilation failed!
Frontend phase failed compilation.
Error: Compiling CL to IR
1 #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE)
2 #define WITHIN_KERNEL /* empty */
3 #define KERNEL __kernel
4 #define GLOBAL_MEM __global
5 #define LOCAL_MEM __local
6 #define LOCAL_MEM_ARG __local
7 #ifndef NULL
8 #define NULL ((void*)0)
9 #endif
10 #define LID_0 get_local_id(0)
11 #define LID_1 get_local_id(1)
12 #define LID_2 get_local_id(2)
13 #define LDIM_0 get_local_size(0)
14 #define LDIM_1 get_local_size(1)
15 #define LDIM_2 get_local_size(2)
16 #define GID_0 get_group_id(0)
17 #define GID_1 get_group_id(1)
18 #define GID_2 get_group_id(2)
19 #define GDIM_0 get_num_groups(0)
20 #define GDIM_1 get_num_groups(1)
21 #define GDIM_2 get_num_groups(2)
22 #define ga_bool uchar
23 #define ga_byte char
24 #define ga_ubyte uchar
25 #define ga_short short
26 #define ga_ushort ushort
27 #define ga_int int
28 #define ga_uint uint
29 #define ga_long long
30 #define ga_ulong ulong
31 #define ga_float float
32 #define ga_double double
33 #define ga_half half
34 #define ga_size ulong
35 #define ga_ssize long
36 #define load_half(p) vload_half(0, p)
37 #define store_half(p, v) vstore_half_rtn(v, 0, p)
38 #define GA_DECL_SHARED_PARAM(type, name) , __local type *name
39 #define GA_DECL_SHARED_BODY(type, name)
40 #define GA_WARP_SIZE 64
41
42 KERNEL void extract_tri(GLOBAL_MEM ga_float *a, ga_size a_off, ga_uint N) {
43 a = (GLOBAL_MEM ga_float *)(((char *)a) + a_off);
44 unsigned int idx = GID_1 * LDIM_0 * GDIM_0 +
45 GID_0 * LDIM_0 + LID_0;
46 unsigned int ix = idx/5;
47 unsigned int iy = idx%5;
48 if (idx < N) {
49 if (ix > iy)
50 a[idx] = 0.0;
51 }
52 }
53
有人可以告诉我如何解决这些错误吗?提前谢谢。
(我在这里没有粘贴的其他错误状态&#39; TypeError:这是针对CUDA数组&#39;,所以我认为是无关紧要的;而这两个失败与float16支持有关,我读过的是实验性的,无论如何都不需要。)