我有以下CUDA内核,它执行广度优先搜索。
__global__ void bfs(const Edge* edges, int* vertices, int* current_depth, bool* done){
int e = blockDim.x * blockIdx.x + threadIdx.x;
int vfirst = edges[e].first;
int dfirst = vertices[vfirst];
int vsecond = edges[e].second;
int dsecond = vertices[vsecond];
if((dfirst == *current_depth) && (dsecond == -1)){
vertices[vsecond] = dfirst +1;
*current_depth = dfirst+1;
*done = false;
}
if((dsecond == *current_depth) && (dfirst == -1)){
vertices[vfirst] = dsecond + 1;
*current_depth = dsecond +1;
*done = false;
}
}
此内核获取在主机上分配的值,然后在设备上进行修改并写回主机。
所以我宣布了两个变量并以这种方式将它们复制到设备
bool h_done = true;
bool* d_done;
int* d_current_depth;
int h_current_depth = 0;
cudaMalloc((void**)&d_done, sizeof(bool));
cudaMalloc((void**)&d_current_depth, sizeof(int));
cudaMemcpy(d_done, &h_done, sizeof(bool), cudaMemcpyHostToDevice);
cudaMemcpy(d_current_depth, &h_current_depth, sizeof(int), cudaMemcpyHostDevice);
在这里循环启动内核。
bfs<<<blocksPerGrid, threadsPerBlock>>>(h_edges, h_vertices, d_current_depth, d_done);
代码编译并运行正常但主机值永远不会在设备上修改,反之亦然。我已经详细介绍了NVIDIA示例代码,但似乎无法做到这一点。我是CUDA的新手。任何帮助表示赞赏。
答案 0 :(得分:1)
此:
import javafx.animation.Animation;
import javafx.animation.TranslateTransition;
import javafx.application.Application;
import javafx.beans.binding.Bindings;
import javafx.beans.binding.BooleanBinding;
import javafx.geometry.Point2D;
import javafx.scene.Scene;
import javafx.scene.input.KeyCode;
import javafx.scene.layout.Pane;
import javafx.scene.paint.Color;
import javafx.scene.shape.Circle;
import javafx.scene.shape.Line;
import javafx.scene.transform.Rotate;
import javafx.stage.Stage;
import javafx.util.Duration;
public class ShootingGame extends Application {
@Override
public void start(Stage primaryStage) {
final double width = 400 ;
final double height = 400 ;
final double targetRadius = 25 ;
final double projectileRadius = 5 ;
final double weaponLength = 25 ;
final double weaponX = width / 2 ;
final double weaponStartY = height ;
final double weaponEndY = height - weaponLength ;
final double targetStartX = targetRadius ;
final double targetY = targetRadius * 2 ;;
Pane root = new Pane();
Circle target = new Circle(targetStartX, targetY, targetRadius, Color.BLUE);
TranslateTransition targetMotion = new TranslateTransition(Duration.seconds(2), target);
targetMotion.setByX(350);
targetMotion.setAutoReverse(true);
targetMotion.setCycleCount(Animation.INDEFINITE);
targetMotion.play();
Line weapon = new Line(weaponX, weaponStartY, weaponX, weaponEndY);
weapon.setStrokeWidth(5);
Rotate weaponRotation = new Rotate(0, weaponX, weaponStartY);
weapon.getTransforms().add(weaponRotation);
Scene scene = new Scene(root, width, height);
scene.setOnKeyPressed(e -> {
if (e.getCode() == KeyCode.LEFT) {
weaponRotation.setAngle(Math.max(-45, weaponRotation.getAngle() - 2));
}
if (e.getCode() == KeyCode.RIGHT) {
weaponRotation.setAngle(Math.min(45, weaponRotation.getAngle() + 2));
}
if (e.getCode() == KeyCode.SPACE) {
Point2D weaponEnd = weapon.localToParent(weaponX, weaponEndY);
Circle projectile = new Circle(weaponEnd.getX(), weaponEnd.getY(), projectileRadius);
TranslateTransition shot = new TranslateTransition(Duration.seconds(1), projectile);
shot.setByX(Math.tan(Math.toRadians(weaponRotation.getAngle())) * height);
shot.setByY(-height);
shot.setOnFinished(event -> root.getChildren().remove(projectile));
BooleanBinding hit = Bindings.createBooleanBinding(() -> {
Point2D targetLocation = target.localToParent(targetStartX, targetY);
Point2D projectileLocation = projectile.localToParent(weaponEnd);
return (targetLocation.distance(projectileLocation) < targetRadius + projectileRadius) ;
}, projectile.translateXProperty(), projectile.translateYProperty());
hit.addListener((obs, wasHit, isNowHit) -> {
if (isNowHit) {
System.out.println("Hit");
root.getChildren().remove(projectile);
root.getChildren().remove(target);
targetMotion.stop();
shot.stop();
}
});
root.getChildren().add(projectile);
shot.play();
}
});
root.getChildren().addAll(target, weapon);
primaryStage.setScene(scene);
primaryStage.show();
}
public static void main(String[] args) {
launch(args);
}
}
几乎肯定是错的。
除非您使用托管内存(我怀疑),bfs<<<blocksPerGrid, threadsPerBlock>>>(h_edges, h_vertices, d_current_depth, d_done);
和h_edges
在主机内存中(以其名称命名)变量。您无法在设备代码中传递和修改常规主机指针。由于这个错误,你的内核很可能无法运行。
您的代码报告的未指定的启动错误很可能是由此造成的。