我使用#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <cusparse_v2.h>
#include "cuSparseUtilities.cuh"
#include "Utilities.cuh"
/********/
/* MAIN */
/********/
int main() {
cusparseHandle_t handle;
// --- Initialize cuSPARSE
cusparseSafeCall(cusparseCreate(&handle));
cusparseMatDescr_t descrA = 0;
/**************************/
/* SETTING UP THE PROBLEM */
/**************************/
const int Nrows = 5; // --- Number of rows
const int Ncols = 4; // --- Number of columns
const int N = Nrows;
// --- Host side dense matrix
double *h_A_dense = (double*)malloc(Nrows * Ncols * sizeof(*h_A_dense));
// --- Column-major storage
h_A_dense[ 0] = 0.4612f; h_A_dense[ 5] = -0.0006f; h_A_dense[10] = 1.3f; h_A_dense[15] = 0.0f;
h_A_dense[ 1] = 0.0f; h_A_dense[ 6] = 1.443f; h_A_dense[11] = 0.0f; h_A_dense[16] = 0.0f;
h_A_dense[ 2] = -0.0006f; h_A_dense[ 7] = 0.4640f; h_A_dense[12] = 0.0723f; h_A_dense[17] = 0.0f;
h_A_dense[ 3] = 0.3566f; h_A_dense[ 8] = 0.0723f; h_A_dense[13] = 0.7543f; h_A_dense[18] = 0.0f;
h_A_dense[ 4] = 0.f; h_A_dense[ 9] = 0.0f; h_A_dense[14] = 0.0f; h_A_dense[19] = 0.1f;
// --- Create device array and copy host array to it
double *d_A_dense; gpuErrchk(cudaMalloc(&d_A_dense, Nrows * Ncols * sizeof(double)));
gpuErrchk(cudaMemcpy(d_A_dense, h_A_dense, Nrows * Ncols * sizeof(*d_A_dense), cudaMemcpyHostToDevice));
/*******************************/
/* FROM DENSE TO SPARSE MATRIX */
/*******************************/
// --- Descriptor for sparse matrix A
setUpDescriptor(descrA, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_INDEX_BASE_ONE);
int nnz = 0; // --- Number of nonzero elements in dense matrix
int *d_nnzPerVector; // --- Device side number of nonzero elements per row
double *d_A; // --- Sparse matrix values - array of size nnz
int *d_A_RowIndices; // --- "Row indices"
int *d_A_ColIndices; // --- "Column indices"
dense2SparseD(d_A_dense, &d_nnzPerVector, &d_A, &d_A_RowIndices, &d_A_ColIndices, nnz, descrA, handle, Nrows, Ncols);
/*******************************************************/
/* CHECKING THE RESULTS FOR DENSE TO SPARSE CONVERSION */
/*******************************************************/
// --- Host side number of nonzero elements per row
int *h_nnzPerVector = (int *)malloc(Nrows * sizeof(int));
gpuErrchk(cudaMemcpy(h_nnzPerVector, d_nnzPerVector, Nrows * sizeof(int), cudaMemcpyDeviceToHost));
printf("Number of nonzero elements in dense matrix = %i\n\n", nnz);
for (int i = 0; i < Nrows; ++i) printf("Number of nonzero elements in row %i = %i \n", i, h_nnzPerVector[i]);
printf("\n");
// --- Host side sparse matrix
double *h_A = (double *)malloc(nnz * sizeof(double));
int *h_A_RowIndices = (int *)malloc((Nrows + 1) * sizeof(int));
int *h_A_ColIndices = (int *)malloc(nnz * sizeof(int));
gpuErrchk(cudaMemcpy(h_A, d_A, nnz * sizeof(double), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_A_RowIndices, d_A_RowIndices, (Nrows + 1) * sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_A_ColIndices, d_A_ColIndices, nnz * sizeof(int), cudaMemcpyDeviceToHost));
printf("\nOriginal matrix in CSR format\n\n");
for (int i = 0; i < nnz; ++i) printf("A[%i] = %f\n", i, h_A[i]); printf("\n");
printf("\n");
for (int i = 0; i < (Nrows + 1); ++i) printf("h_A_RowIndices[%i] = %i \n", i, h_A_RowIndices[i]); printf("\n");
for (int i = 0; i < nnz; ++i) printf("h_A_ColIndices[%i] = %i \n", i, h_A_ColIndices[i]);
/*******************************/
/* FROM SPARSE TO DENSE MATRIX */
/*******************************/
double *d_A_denseReconstructed; gpuErrchk(cudaMalloc(&d_A_denseReconstructed, Nrows * Ncols * sizeof(double)));
cusparseSafeCall(cusparseDcsr2dense(handle, Nrows, Ncols, descrA, d_A, d_A_RowIndices, d_A_ColIndices,
d_A_denseReconstructed, Nrows));
/*******************************************************/
/* CHECKING THE RESULTS FOR SPARSE TO DENSE CONVERSION */
/*******************************************************/
double *h_A_denseReconstructed = (double *)malloc(Nrows * Ncols * sizeof(double));
gpuErrchk(cudaMemcpy(h_A_denseReconstructed, d_A_denseReconstructed, Nrows * Ncols * sizeof(double), cudaMemcpyDeviceToHost));
printf("\nReconstructed dense matrix \n");
for (int m = 0; m < Nrows; m++) {
for (int n = 0; n < Ncols; n++)
printf("%f\t", h_A_denseReconstructed[n * Nrows + m]);
printf("\n");
}
return 0;
}
来跟踪子项目。子模块指向特定分支,但我现在需要更改它。
我尝试手动编辑git submodule
文件并更改.gitmodules
条目,但是我更新后续版本的每个命令都没有做任何事情或失败。< / p>
我已经尝试了
branch =
子项目正确检查出来,gitk在那里显示它确实有我需要切换到的新分支。我可以手动执行此操作并提交新哈希,但我不确定git submodule sync # Succeeds, but nothing happens
git submodule update # Doesn't do anything
git submodule update --remote # fails with Needed a single revision
是否真的能够理解这种变化。
我还尝试删除所有子模块文件夹(包括git submodule
文件夹内)并重新运行
.git
但是我仍然让项目处于和以前一样的状态(我假设因为它仍在更新到提交的哈希)。
答案 0 :(得分:4)
再次克隆,然后:
更改子模块中的分支
cd submodule
git checkout -b anotherBranch
# if it is a new branch, push it first.
git push -u origin anotherBranch
然后反映父回购中的变化:
cd ..
git config -f .gitmodules submodule.<pathToSubmdule>.branch anotherbranch
git add .
git commit -m "record new gilink, and modified .gitmodules"
git push
然后尝试再次克隆您的父回购以检查所有内容是否符合预期。