为什么CPU在此代码中运行速度比GPU快?

时间:2017-08-24 13:31:39

标签: matlab loops gpu

我尝试使用#ifndef ASSIMP_BUILD_NO_X_IMPORTER #include "XFileParser.h" #include "XFileHelper.h" #include "fast_atof.h" #include "Exceptional.h" #include "TinyFormatter.h" #include "ByteSwapper.h" #include "StringUtils.h" #include <assimp/DefaultLogger.hpp> using namespace Assimp; using namespace Assimp::XFile; using namespace Assimp::Formatter; #ifndef ASSIMP_BUILD_NO_COMPRESSED_X # ifdef ASSIMP_BUILD_NO_OWN_ZLIB # include <zlib.h> # else # include "../contrib/zlib/zlib.h" # endif // Magic identifier for MSZIP compressed data #define MSZIP_MAGIC 0x4B43 #define MSZIP_BLOCK 32786 // ------------------------------------------------------------------------------------------------ // Dummy memory wrappers for use with zlib static void* dummy_alloc (void* /*opaque*/, unsigned int items, unsigned int size) { return ::operator new(items*size); } static void dummy_free (void* /*opaque*/, void* address) { return ::operator delete(address); } #endif // !! ASSIMP_BUILD_NO_COMPRESSED_X // ------------------------------------------------------------------------------------------------ // Constructor. Creates a data structure out of the XFile given in the memory block. XFileParser::XFileParser( const std::vector<char>& pBuffer) { mMajorVersion = mMinorVersion = 0; mIsBinaryFormat = false; mBinaryNumCount = 0; P = End = NULL; mLineNumber = 0; mScene = NULL; // vector to store uncompressed file for INFLATE'd X files std::vector<char> uncompressed; // set up memory pointers P = &pBuffer.front(); End = P + pBuffer.size() - 1; // check header if( strncmp( P, "xof ", 4) != 0) throw DeadlyImportError( "Header mismatch, file is not an XFile."); // read version. It comes in a four byte format such as "0302" mMajorVersion = (unsigned int)(P[4] - 48) * 10 + (unsigned int)(P[5] - 48); mMinorVersion = (unsigned int)(P[6] - 48) * 10 + (unsigned int)(P[7] - 48); bool compressed = false; // txt - pure ASCII text format if( strncmp( P + 8, "txt ", 4) == 0) mIsBinaryFormat = false; // bin - Binary format else if( strncmp( P + 8, "bin ", 4) == 0) mIsBinaryFormat = true; // tzip - Inflate compressed text format else if( strncmp( P + 8, "tzip", 4) == 0) { mIsBinaryFormat = false; compressed = true; } // bzip - Inflate compressed binary format else if( strncmp( P + 8, "bzip", 4) == 0) { mIsBinaryFormat = true; compressed = true; } else ThrowException( format() << "Unsupported xfile format '" << P[8] << P[9] << P[10] << P[11] << "'"); // float size mBinaryFloatSize = (unsigned int)(P[12] - 48) * 1000 + (unsigned int)(P[13] - 48) * 100 + (unsigned int)(P[14] - 48) * 10 + (unsigned int)(P[15] - 48); if( mBinaryFloatSize != 32 && mBinaryFloatSize != 64) ThrowException( format() << "Unknown float size " << mBinaryFloatSize << " specified in xfile header." ); // The x format specifies size in bits, but we work in bytes mBinaryFloatSize /= 8; P += 16; // If this is a compressed X file, apply the inflate algorithm to it if (compressed) { #ifdef ASSIMP_BUILD_NO_COMPRESSED_X throw DeadlyImportError("Assimp was built without compressed X support"); #else /* /////////////////////////////////////////////////////////////////////// * COMPRESSED X FILE FORMAT * /////////////////////////////////////////////////////////////////////// * [xhead] * 2 major * 2 minor * 4 type // bzip,tzip * [mszip_master_head] * 4 unkn // checksum? * 2 unkn // flags? (seems to be constant) * [mszip_head] * 2 ofs // offset to next section * 2 magic // 'CK' * ... ofs bytes of data * ... next mszip_head * * http://www.kdedevelopers.org/node/3181 has been very helpful. * /////////////////////////////////////////////////////////////////////// */ // build a zlib stream z_stream stream; stream.opaque = NULL; stream.zalloc = &dummy_alloc; stream.zfree = &dummy_free; stream.data_type = (mIsBinaryFormat ? Z_BINARY : Z_ASCII); // initialize the inflation algorithm ::inflateInit2(&stream, -MAX_WBITS); // skip unknown data (checksum, flags?) P += 6; // First find out how much storage we'll need. Count sections. const char* P1 = P; unsigned int est_out = 0; while (P1 + 3 < End) { // read next offset uint16_t ofs = *((uint16_t*)P1); AI_SWAP2(ofs); P1 += 2; if (ofs >= MSZIP_BLOCK) throw DeadlyImportError("X: Invalid offset to next MSZIP compressed block"); // check magic word uint16_t magic = *((uint16_t*)P1); AI_SWAP2(magic); P1 += 2; if (magic != MSZIP_MAGIC) throw DeadlyImportError("X: Unsupported compressed format, expected MSZIP header"); // and advance to the next offset P1 += ofs; est_out += MSZIP_BLOCK; // one decompressed block is 32786 in size } // Allocate storage and terminating zero and do the actual uncompressing uncompressed.resize(est_out + 1); char* out = &uncompressed.front(); while (P + 3 < End) { uint16_t ofs = *((uint16_t*)P); AI_SWAP2(ofs); P += 4; if (P + ofs > End + 2) { throw DeadlyImportError("X: Unexpected EOF in compressed chunk"); } // push data to the stream stream.next_in = (Bytef*)P; stream.avail_in = ofs; stream.next_out = (Bytef*)out; stream.avail_out = MSZIP_BLOCK; // and decompress the data .... int ret = ::inflate( &stream, Z_SYNC_FLUSH ); if (ret != Z_OK && ret != Z_STREAM_END) throw DeadlyImportError("X: Failed to decompress MSZIP-compressed data"); ::inflateReset( &stream ); ::inflateSetDictionary( &stream, (const Bytef*)out , MSZIP_BLOCK - stream.avail_out ); // and advance to the next offset out += MSZIP_BLOCK - stream.avail_out; P += ofs; } // terminate zlib ::inflateEnd(&stream); // ok, update pointers to point to the uncompressed file data P = &uncompressed[0]; End = out; // FIXME: we don't need the compressed data anymore, could release // it already for better memory usage. Consider breaking const-co. DefaultLogger::get()->info("Successfully decompressed MSZIP-compressed file"); #endif // !! ASSIMP_BUILD_NO_COMPRESSED_X } else { // start reading here ReadUntilEndOfLine(); } mScene = new Scene; ParseFile(); // filter the imported hierarchy for some degenerated cases if( mScene->mRootNode) { FilterHierarchy( mScene->mRootNode); } } // ------------------------------------------------------------------------------------------------ // Destructor. Destroys all imported data along with it XFileParser::~XFileParser() { // kill everything we created delete mScene; } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseFile() { bool running = true; while( running ) { // read name of next object std::string objectName = GetNextToken(); if (objectName.length() == 0) break; // parse specific object if( objectName == "template") ParseDataObjectTemplate(); else if( objectName == "Frame") ParseDataObjectFrame( NULL); else if( objectName == "Mesh") { // some meshes have no frames at all Mesh* mesh = new Mesh; ParseDataObjectMesh( mesh); mScene->mGlobalMeshes.push_back( mesh); } else if( objectName == "AnimTicksPerSecond") ParseDataObjectAnimTicksPerSecond(); else if( objectName == "AnimationSet") ParseDataObjectAnimationSet(); else if( objectName == "Material") { // Material outside of a mesh or node Material material; ParseDataObjectMaterial( &material); mScene->mGlobalMaterials.push_back( material); } else if( objectName == "}") { // whatever? DefaultLogger::get()->warn("} found in dataObject"); } else { // unknown format DefaultLogger::get()->warn("Unknown data object in animation of .x file"); ParseUnknownDataObject(); } } } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectTemplate() { // parse a template data object. Currently not stored. std::string name; readHeadOfDataObject( &name); // read GUID std::string guid = GetNextToken(); // read and ignore data members bool running = true; while ( running ) { std::string s = GetNextToken(); if( s == "}") break; if( s.length() == 0) ThrowException( "Unexpected end of file reached while parsing template definition"); } } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectFrame( Node* pParent) { // A coordinate frame, or "frame of reference." The Frame template // is open and can contain any object. The Direct3D extensions (D3DX) // mesh-loading functions recognize Mesh, FrameTransformMatrix, and // Frame template instances as child objects when loading a Frame // instance. std::string name; readHeadOfDataObject(&name); // create a named node and place it at its parent, if given Node* node = new Node( pParent); node->mName = name; if( pParent) { pParent->mChildren.push_back( node); } else { // there might be multiple root nodes if( mScene->mRootNode != NULL) { // place a dummy root if not there if( mScene->mRootNode->mName != "$dummy_root") { Node* exroot = mScene->mRootNode; mScene->mRootNode = new Node( NULL); mScene->mRootNode->mName = "$dummy_root"; mScene->mRootNode->mChildren.push_back( exroot); exroot->mParent = mScene->mRootNode; } // put the new node as its child instead mScene->mRootNode->mChildren.push_back( node); node->mParent = mScene->mRootNode; } else { // it's the first node imported. place it as root mScene->mRootNode = node; } } // Now inside a frame. // read tokens until closing brace is reached. bool running = true; while ( running ) { std::string objectName = GetNextToken(); if (objectName.size() == 0) ThrowException( "Unexpected end of file reached while parsing frame"); if( objectName == "}") break; // frame finished else if( objectName == "Frame") ParseDataObjectFrame( node); // child frame else if( objectName == "FrameTransformMatrix") ParseDataObjectTransformationMatrix( node->mTrafoMatrix); else if( objectName == "Mesh") { Mesh* mesh = new Mesh(name); node->mMeshes.push_back( mesh); ParseDataObjectMesh( mesh); } else { DefaultLogger::get()->warn("Unknown data object in frame in x file"); ParseUnknownDataObject(); } } } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectTransformationMatrix( aiMatrix4x4& pMatrix) { // read header, we're not interested if it has a name readHeadOfDataObject(); // read its components pMatrix.a1 = ReadFloat(); pMatrix.b1 = ReadFloat(); pMatrix.c1 = ReadFloat(); pMatrix.d1 = ReadFloat(); pMatrix.a2 = ReadFloat(); pMatrix.b2 = ReadFloat(); pMatrix.c2 = ReadFloat(); pMatrix.d2 = ReadFloat(); pMatrix.a3 = ReadFloat(); pMatrix.b3 = ReadFloat(); pMatrix.c3 = ReadFloat(); pMatrix.d3 = ReadFloat(); pMatrix.a4 = ReadFloat(); pMatrix.b4 = ReadFloat(); pMatrix.c4 = ReadFloat(); pMatrix.d4 = ReadFloat(); // trailing symbols CheckForSemicolon(); CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectMesh( Mesh* pMesh) { std::string name; readHeadOfDataObject( &name); // read vertex count unsigned int numVertices = ReadInt(); pMesh->mPositions.resize( numVertices); // read vertices for( unsigned int a = 0; a < numVertices; a++) pMesh->mPositions[a] = ReadVector3(); // read position faces unsigned int numPosFaces = ReadInt(); pMesh->mPosFaces.resize( numPosFaces); for( unsigned int a = 0; a < numPosFaces; a++) { // read indices unsigned int numIndices = ReadInt(); Face& face = pMesh->mPosFaces[a]; for (unsigned int b = 0; b < numIndices; b++) { face.mIndices.push_back( ReadInt() ); } TestForSeparator(); } // here, other data objects may follow bool running = true; while ( running ) { std::string objectName = GetNextToken(); if( objectName.size() == 0) ThrowException( "Unexpected end of file while parsing mesh structure"); else if( objectName == "}") break; // mesh finished else if( objectName == "MeshNormals") ParseDataObjectMeshNormals( pMesh); else if( objectName == "MeshTextureCoords") ParseDataObjectMeshTextureCoords( pMesh); else if( objectName == "MeshVertexColors") ParseDataObjectMeshVertexColors( pMesh); else if( objectName == "MeshMaterialList") ParseDataObjectMeshMaterialList( pMesh); else if( objectName == "VertexDuplicationIndices") ParseUnknownDataObject(); // we'll ignore vertex duplication indices else if( objectName == "XSkinMeshHeader") ParseDataObjectSkinMeshHeader( pMesh); else if( objectName == "SkinWeights") ParseDataObjectSkinWeights( pMesh); else { DefaultLogger::get()->warn("Unknown data object in mesh in x file"); ParseUnknownDataObject(); } } } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectSkinWeights( Mesh *pMesh) { readHeadOfDataObject(); std::string transformNodeName; GetNextTokenAsString( transformNodeName); pMesh->mBones.push_back( Bone()); Bone& bone = pMesh->mBones.back(); bone.mName = transformNodeName; // read vertex weights unsigned int numWeights = ReadInt(); bone.mWeights.reserve( numWeights); for( unsigned int a = 0; a < numWeights; a++) { BoneWeight weight; weight.mVertex = ReadInt(); bone.mWeights.push_back( weight); } // read vertex weights for( unsigned int a = 0; a < numWeights; a++) bone.mWeights[a].mWeight = ReadFloat(); // read matrix offset bone.mOffsetMatrix.a1 = ReadFloat(); bone.mOffsetMatrix.b1 = ReadFloat(); bone.mOffsetMatrix.c1 = ReadFloat(); bone.mOffsetMatrix.d1 = ReadFloat(); bone.mOffsetMatrix.a2 = ReadFloat(); bone.mOffsetMatrix.b2 = ReadFloat(); bone.mOffsetMatrix.c2 = ReadFloat(); bone.mOffsetMatrix.d2 = ReadFloat(); bone.mOffsetMatrix.a3 = ReadFloat(); bone.mOffsetMatrix.b3 = ReadFloat(); bone.mOffsetMatrix.c3 = ReadFloat(); bone.mOffsetMatrix.d3 = ReadFloat(); bone.mOffsetMatrix.a4 = ReadFloat(); bone.mOffsetMatrix.b4 = ReadFloat(); bone.mOffsetMatrix.c4 = ReadFloat(); bone.mOffsetMatrix.d4 = ReadFloat(); CheckForSemicolon(); CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectSkinMeshHeader( Mesh* /*pMesh*/ ) { readHeadOfDataObject(); /*unsigned int maxSkinWeightsPerVertex =*/ ReadInt(); /*unsigned int maxSkinWeightsPerFace =*/ ReadInt(); /*unsigned int numBonesInMesh = */ReadInt(); CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectMeshNormals( Mesh* pMesh) { readHeadOfDataObject(); // read count unsigned int numNormals = ReadInt(); pMesh->mNormals.resize( numNormals); // read normal vectors for( unsigned int a = 0; a < numNormals; a++) pMesh->mNormals[a] = ReadVector3(); // read normal indices unsigned int numFaces = ReadInt(); if( numFaces != pMesh->mPosFaces.size()) ThrowException( "Normal face count does not match vertex face count."); for( unsigned int a = 0; a < numFaces; a++) { unsigned int numIndices = ReadInt(); pMesh->mNormFaces.push_back( Face()); Face& face = pMesh->mNormFaces.back(); for( unsigned int b = 0; b < numIndices; b++) face.mIndices.push_back( ReadInt()); TestForSeparator(); } CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectMeshTextureCoords( Mesh* pMesh) { readHeadOfDataObject(); if( pMesh->mNumTextures + 1 > AI_MAX_NUMBER_OF_TEXTURECOORDS) ThrowException( "Too many sets of texture coordinates"); std::vector<aiVector2D>& coords = pMesh->mTexCoords[pMesh->mNumTextures++]; unsigned int numCoords = ReadInt(); if( numCoords != pMesh->mPositions.size()) ThrowException( "Texture coord count does not match vertex count"); coords.resize( numCoords); for( unsigned int a = 0; a < numCoords; a++) coords[a] = ReadVector2(); CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectMeshVertexColors( Mesh* pMesh) { readHeadOfDataObject(); if( pMesh->mNumColorSets + 1 > AI_MAX_NUMBER_OF_COLOR_SETS) ThrowException( "Too many colorsets"); std::vector<aiColor4D>& colors = pMesh->mColors[pMesh->mNumColorSets++]; unsigned int numColors = ReadInt(); if( numColors != pMesh->mPositions.size()) ThrowException( "Vertex color count does not match vertex count"); colors.resize( numColors, aiColor4D( 0, 0, 0, 1)); for( unsigned int a = 0; a < numColors; a++) { unsigned int index = ReadInt(); if( index >= pMesh->mPositions.size()) ThrowException( "Vertex color index out of bounds"); colors[index] = ReadRGBA(); // HACK: (thom) Maxon Cinema XPort plugin puts a third separator here, kwxPort puts a comma. // Ignore gracefully. if( !mIsBinaryFormat) { FindNextNoneWhiteSpace(); if( *P == ';' || *P == ',') P++; } } CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectMeshMaterialList( Mesh* pMesh) { readHeadOfDataObject(); // read material count /*unsigned int numMaterials =*/ ReadInt(); // read non triangulated face material index count unsigned int numMatIndices = ReadInt(); // some models have a material index count of 1... to be able to read them we // replicate this single material index on every face if( numMatIndices != pMesh->mPosFaces.size() && numMatIndices != 1) ThrowException( "Per-Face material index count does not match face count."); // read per-face material indices for( unsigned int a = 0; a < numMatIndices; a++) pMesh->mFaceMaterials.push_back( ReadInt()); // in version 03.02, the face indices end with two semicolons. // commented out version check, as version 03.03 exported from blender also has 2 semicolons if( !mIsBinaryFormat) // && MajorVersion == 3 && MinorVersion <= 2) { if(P < End && *P == ';') ++P; } // if there was only a single material index, replicate it on all faces while( pMesh->mFaceMaterials.size() < pMesh->mPosFaces.size()) pMesh->mFaceMaterials.push_back( pMesh->mFaceMaterials.front()); // read following data objects bool running = true; while ( running ) { std::string objectName = GetNextToken(); if( objectName.size() == 0) ThrowException( "Unexpected end of file while parsing mesh material list."); else if( objectName == "}") break; // material list finished else if( objectName == "{") { // template materials std::string matName = GetNextToken(); Material material; material.mIsReference = true; material.mName = matName; pMesh->mMaterials.push_back( material); CheckForClosingBrace(); // skip } } else if( objectName == "Material") { pMesh->mMaterials.push_back( Material()); ParseDataObjectMaterial( &pMesh->mMaterials.back()); } else if( objectName == ";") { // ignore } else { DefaultLogger::get()->warn("Unknown data object in material list in x file"); ParseUnknownDataObject(); } } } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectMaterial( Material* pMaterial) { std::string matName; readHeadOfDataObject( &matName); if( matName.empty()) matName = std::string( "material") + to_string( mLineNumber ); pMaterial->mName = matName; pMaterial->mIsReference = false; // read material values pMaterial->mDiffuse = ReadRGBA(); pMaterial->mSpecularExponent = ReadFloat(); pMaterial->mSpecular = ReadRGB(); pMaterial->mEmissive = ReadRGB(); // read other data objects bool running = true; while ( running ) { std::string objectName = GetNextToken(); if( objectName.size() == 0) ThrowException( "Unexpected end of file while parsing mesh material"); else if( objectName == "}") break; // material finished else if( objectName == "TextureFilename" || objectName == "TextureFileName") { // some exporters write "TextureFileName" instead. std::string texname; ParseDataObjectTextureFilename( texname); pMaterial->mTextures.push_back( TexEntry( texname)); } else if( objectName == "NormalmapFilename" || objectName == "NormalmapFileName") { // one exporter writes out the normal map in a separate filename tag std::string texname; ParseDataObjectTextureFilename( texname); pMaterial->mTextures.push_back( TexEntry( texname, true)); } else { DefaultLogger::get()->warn("Unknown data object in material in x file"); ParseUnknownDataObject(); } } } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectAnimTicksPerSecond() { readHeadOfDataObject(); mScene->mAnimTicksPerSecond = ReadInt(); CheckForClosingBrace(); } // ------------------------------------------------------------------------------------------------ void XFileParser::ParseDataObjectAnimationSet() { std::string animName; readHeadOfDataObject( &animName); Animation* anim = new Animation; mScene->mAnims.push_back( anim); anim->mName = animName; bool running = true; while ( running ) { std::string objectName = GetNextToken(); if( objectName.length() == 0) ThrowException( "Unexpected end of file while parsing animation set."); else if( objectName == "}") break; // animation set finished else if( objectName == "Animation") ParseDataObjectAnimation( anim); else { DefaultLogger::get()->warn("Unknown data object in animation set in x file"); ParseUnknownDataObject(); } } } [...] 加速计算。但是,我的代码不是这种情况。

gpuArray

我得到的地方

for i=1:10
    calltest;
end

function [t1,t2]=calltest
N=10;
tic
u=gpuArray(rand(1,N).^(1./[N:-1:1]));
t1=toc
tic
u2=rand(1,N).^(1./[N:-1:1]);
t2=toc
end

我有一块Nvidia GTX850M显卡。我错误地使用t1 = 4.8445e-05 t2 = 1.4369e-05 了吗?这段代码包含在一个函数中,并且循环调用该函数数千次。

1 个答案:

答案 0 :(得分:2)

为什么?
因为既有a)小问题,又有b)不是数学密集的&#34; GPU-内核

比较方法模糊了问题的根本原因

步骤0:从待测部分创建单独的数据集(向量):

-c tessedit_char_whitelist=01234567890"

第1步:测试缩放:

只尝试10个元素,不会让观察结果清晰,因为 开销天真 formulation of Amdahl Law 没有明确强调在CPU上花费的额外时间基于GPU的内核组装&amp;传输+(CPU到GPU + GPU到CPU)数据处理阶段。如果与
相比,这些附加阶段可能会小到可以忽略不计 a)一个确实是大规模的矢量/矩阵GPU内核处理,其中N~10显然不是

b)确实&#34;数学密集&#34; GPU内核处理,N = 10; R = rand( 1, N ); tic; < a-gpu-based-computing-section>; GPU_t = toc tic; c = R.^( 1. / [N:-1:1] ); CPU_t = toc 显然不是 所以,
不要因为获得必须做的部分(开销)而责怪GPU计算,因为如果没有及时的附加组件就无法工作(并且CPU可能会在相同的时间内产生最终结果 - QED )

每个CPU-GPU-CPU工作流程部分的细粒度测量:

R.^()