简而言之,我正在尝试使用非托管C ++乘法函数从C#进行SIMD数学运算并弄乱指针。
C ++函数:
extern "C" __declspec(dllexport) void SIMDVector4Mult( __m128* a, __m128* b )
{
__m128 c = _mm_mul_ps( *a, *b );
memcpy( &(a->m128_f32), &(c.m128_f32), sizeof( a->m128_f32 ) );
}
C#SIMDVector4 struct:
[StructLayout( LayoutKind.Sequential, Size = 16 )]
struct SIMDVector4
{
float x;
float y;
float z;
float w;
public SIMDVector4( float x, float y, float z, float w )
{
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
public override string ToString( )
{
return "X: " + x + ", Y: " + y + ", Z: " + z + ", W: " + w;
}
public static unsafe SIMDVector4 operator *( SIMDVector4 a, SIMDVector4 b )
{
SIMDVector4Mult( ref a, ref b );
return a;
}
[DllImport("SIMDMathLibrary.dll", CallingConvention = CallingConvention.Cdecl )]
extern private static unsafe void SIMDVector4Mult( ref SIMDVector4 a, ref SIMDVector4 b );
}
目的是将SIMD数学添加到我的C#代码库中,以及使用P / Invoke和C#和C ++结合进行练习。
如果我将它们作为float *变量接收,将它们传递给临时的__m128变量,进行乘法运算,然后将它们回传,它就可以工作。但这比在C#中单独乘以每个变量慢得多。我使用优化的Vector4类测试了这个;它实现了复制 - 乘法 - 复制方法的大约十倍的性能。
我已经尝试了几十种不同的东西,包括当前的设置,它会遇到标题中提到的受保护的内存读/写访问错误:“尝试读取或写入受保护的内存。这通常表明其他记忆已腐败。“
如何修复此错误?我应该修复这个错误吗?我应该尝试一种完全实现意图的不同方法吗?
感谢。
答案 0 :(得分:2)
这是一个应该有效的更新版本。现在struct包装了一个对齐的ptr,所以我们需要额外的4-8个字节。不确定由于间接导致的性能打击是什么。 VirtualAlloc在时间上对齐,所以我们一次分配一个大块。
以下示例非常草率。如果可能,您可能还想查看使用CLI / C ++。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;
using System.Runtime.ConstrainedExecution;
namespace ConsoleApplication1
{
class Program
{
unsafe static void Main(string[] args)
{
using (AlignedBlock block = new AlignedBlock(SIMDVector4.Sizeof, AlignedBlock.AllocationGranularity / SIMDVector4.Sizeof))
{
for (int index = 0; index < block.Count; index++)
{
SIMDVector4.Create(block[index], 1, 2, 3, 4);
}
SIMDVector4 a = SIMDVector4.Create(block[0], 1, 2, 3, 4);
SIMDVector4 b = SIMDVector4.Create(block[1], 1, 2, 3, 4);
SIMDVector4 c = SIMDVector4.Create(block[2], 5, 6, 7, 8);
SIMDVector4 d = a * b * c;
Console.WriteLine(d.ToString());
}
Console.ReadLine();
}
}
public sealed unsafe class AlignedBlock : CriticalFinalizerObject, IDisposable
{
private void* ptr;
static AlignedBlock()
{
SYSTEM_INFO info = new SYSTEM_INFO();
NativeMethods.GetSystemInfo(ref info);
PageSize = (int)info.dwPageSize;
AllocationGranularity = (int)info.dwAllocationGranularity;
}
public AlignedBlock(int itemSize, int count)
{
this.ItemSize = itemSize;
this.Count = count;
ptr = NativeMethods.VirtualAlloc(IntPtr.Zero, new UIntPtr((uint)this.ByteSize), AllocationType.COMMIT, MemoryProtection.READWRITE).ToPointer();
}
~AlignedBlock()
{
this.Dispose();
}
public static int PageSize { get; private set; }
public static int AllocationGranularity { get; private set; }
public int ItemSize { get; private set; }
public int Count { get; private set; }
public int ByteSize
{
get
{
return this.Count * this.ItemSize;
}
}
public void* this[int index]
{
get
{
int offset = this.ItemSize * index;
return ((byte*) this.ptr) + offset;
}
}
public void Dispose()
{
bool result = NativeMethods.VirtualFree(new IntPtr(this.ptr), new UIntPtr(0), 0x8000);
this.ptr = null;
}
}
[StructLayout(LayoutKind.Sequential)]
unsafe struct SIMDVector4
{
public const int Sizeof = 16;
private float* ptr;
public static SIMDVector4 Create(void* ptr, float x, float y, float z, float w)
{
float* value = (float*)ptr;
SIMDVector4 vector = new SIMDVector4(value);
*value = x;
value++;
*value = y;
value++;
*value = z;
value++;
*value = w;
value++;
return vector;
}
private SIMDVector4(float* ptr)
{
this.ptr = ptr;
}
public bool IsEmpty
{
get
{
return this.ptr == null;
}
}
public float x
{
get
{
if (this.IsEmpty)
{
return 0f;
}
return *(ptr);
}
set
{
if (!this.IsEmpty)
{
*(this.ptr) = value;
}
}
}
public float y
{
get
{
if (this.IsEmpty)
{
return 0f;
}
return *(ptr + 1);
}
set
{
if (!this.IsEmpty)
{
*(this.ptr + 1) = value;
}
}
}
public float z
{
get
{
if (this.IsEmpty)
{
return 0f;
}
return *(ptr + 2);
}
set
{
if (!this.IsEmpty)
{
*(this.ptr + 2) = value;
}
}
}
public float w
{
get
{
if (this.IsEmpty)
{
return 0f;
}
return *(ptr + 3);
}
set
{
if (!this.IsEmpty)
{
*(this.ptr + 3) = value;
}
}
}
public override string ToString()
{
return "X: " + x + ", Y: " + y + ", Z: " + z + ", W: " + w;
}
public static SIMDVector4 operator *(SIMDVector4 a, SIMDVector4 b)
{
SIMDVector4Mult(a.ptr, b.ptr);
return new SIMDVector4(a.ptr);
}
[DllImport(@"C:\Users\xxx\Documents\Visual Studio 2010\Projects\TestDll\Debug\TestDll.dll", CallingConvention = CallingConvention.Cdecl)]
extern private static unsafe void SIMDVector4Mult(void* a, void* b);
}
internal static class NativeMethods
{
[DllImport("kernel32.dll", SetLastError = true)]
public static extern bool VirtualFree(IntPtr lpAddress, UIntPtr dwSize,
uint dwFreeType);
[DllImport("kernel32.dll", SetLastError = true)]
public static extern IntPtr VirtualAlloc(IntPtr lpAddress, UIntPtr dwSize,
AllocationType flAllocationType, MemoryProtection flProtect);
[DllImport("kernel32.dll")]
public static extern void GetSystemInfo([MarshalAs(UnmanagedType.Struct)] ref SYSTEM_INFO lpSystemInfo);
}
[StructLayout(LayoutKind.Sequential)]
public struct SYSTEM_INFO
{
internal _PROCESSOR_INFO_UNION uProcessorInfo;
public uint dwPageSize;
public IntPtr lpMinimumApplicationAddress;
public IntPtr lpMaximumApplicationAddress;
public IntPtr dwActiveProcessorMask;
public uint dwNumberOfProcessors;
public uint dwProcessorType;
public uint dwAllocationGranularity;
public ushort dwProcessorLevel;
public ushort dwProcessorRevision;
}
[StructLayout(LayoutKind.Explicit)]
public struct _PROCESSOR_INFO_UNION
{
[FieldOffset(0)]
internal uint dwOemId;
[FieldOffset(0)]
internal ushort wProcessorArchitecture;
[FieldOffset(2)]
internal ushort wReserved;
}
[Flags()]
public enum AllocationType : uint
{
COMMIT = 0x1000,
RESERVE = 0x2000,
RESET = 0x80000,
LARGE_PAGES = 0x20000000,
PHYSICAL = 0x400000,
TOP_DOWN = 0x100000,
WRITE_WATCH = 0x200000
}
[Flags()]
public enum MemoryProtection : uint
{
EXECUTE = 0x10,
EXECUTE_READ = 0x20,
EXECUTE_READWRITE = 0x40,
EXECUTE_WRITECOPY = 0x80,
NOACCESS = 0x01,
READONLY = 0x02,
READWRITE = 0x04,
WRITECOPY = 0x08,
GUARD_Modifierflag = 0x100,
NOCACHE_Modifierflag = 0x200,
WRITECOMBINE_Modifierflag = 0x400
}
}