diff --git a/PG-PuReMD/src/cuda/cuda_charges.cu b/PG-PuReMD/src/cuda/cuda_charges.cu index 6688eb854b91e42efb7e511295b9b133a00d5ac8..10ab72c6bbd423de03e0cf8b68bdcff4b78a0057 100644 --- a/PG-PuReMD/src/cuda/cuda_charges.cu +++ b/PG-PuReMD/src/cuda/cuda_charges.cu @@ -473,15 +473,11 @@ static void Calculate_Charges_QEq( reax_system const * const system, rvec2 my_sum, all_sum; #if defined(DUAL_SOLVER) int blocks; - rvec2 *spad_rvec2; + rvec2 *spad; #else real *spad; #endif - check_smalloc( &workspace->host_scratch, &workspace->host_scratch_size, - sizeof(real) * system->n, TRUE, SAFE_ZONE, - "Calculate_Charges_QEq::workspace->host_scratch" ); - q = (real *) workspace->host_scratch; #if defined(DUAL_SOLVER) blocks = system->n / DEF_BLOCK_SIZE + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); @@ -489,22 +485,22 @@ static void Calculate_Charges_QEq( reax_system const * const system, cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, sizeof(rvec2) * (blocks + 1), "Calculate_Charges_QEq::workspace->scratch" ); - spad_rvec2 = (rvec2 *) workspace->scratch; - cuda_memset( spad_rvec2, 0, sizeof(rvec2) * (blocks + 1), - "Calculate_Charges_QEq::spad_rvec2" ); + spad = (rvec2 *) workspace->scratch; + cuda_memset( spad, 0, sizeof(rvec2) * (blocks + 1), + "Calculate_Charges_QEq::spad" ); /* compute local sums of pseudo-charges in s and t on device */ k_reduction_rvec2 <<< blocks, DEF_BLOCK_SIZE, sizeof(rvec2) * (DEF_BLOCK_SIZE / 32) >>> - ( workspace->d_workspace->x, spad_rvec2, system->n ); + ( workspace->d_workspace->x, spad, system->n ); cudaCheckError( ); k_reduction_rvec2 <<< 1, ((blocks + 31) / 32) * 32, sizeof(rvec2) * ((blocks + 31) / 32) >>> - ( spad_rvec2, &spad_rvec2[blocks], blocks ); + ( spad, &spad[blocks], blocks ); cudaCheckError( ); - copy_host_device( &my_sum, &spad_rvec2[blocks], + copy_host_device( &my_sum, &spad[blocks], sizeof(rvec2), cudaMemcpyDeviceToHost, "Calculate_Charges_QEq::my_sum," ); #else cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, @@ -527,6 +523,11 @@ static void Calculate_Charges_QEq( reax_system const * const system, u = all_sum[0] / all_sum[1]; + check_smalloc( &workspace->host_scratch, &workspace->host_scratch_size, + sizeof(real) * system->n, TRUE, SAFE_ZONE, + "Calculate_Charges_QEq::workspace->host_scratch" ); + q = (real *) workspace->host_scratch; + /* derive atomic charges from pseudo-charges * and set up extrapolation for next time step */ Extrapolate_Charges_QEq_Part2( system, workspace, q, u ); diff --git a/PG-PuReMD/src/cuda/cuda_dense_lin_alg.cu b/PG-PuReMD/src/cuda/cuda_dense_lin_alg.cu index 2dcd614bcc2c8ced5fb9a2486fa0827eca6ee9e8..df7c9451926fedfe5d0ce68569ed32c3d8f8352e 100644 --- a/PG-PuReMD/src/cuda/cuda_dense_lin_alg.cu +++ b/PG-PuReMD/src/cuda/cuda_dense_lin_alg.cu @@ -673,7 +673,7 @@ void Dot_local_rvec2( control_params const * const control, // Cuda_Reduction_Sum( spad, &spad[k], k ); k_reduction_rvec2 <<< blocks, DEF_BLOCK_SIZE, - sizeof(rvec2) * DEF_BLOCK_SIZE >>> + sizeof(rvec2) * (DEF_BLOCK_SIZE / 32) >>> ( spad, &spad[k], k ); cudaCheckError( ); diff --git a/PG-PuReMD/src/cuda/cuda_spar_lin_alg.cu b/PG-PuReMD/src/cuda/cuda_spar_lin_alg.cu index 2964a08199a509727bd33bc8c5eb158574bf3064..3a6547ffba774f70455aa254cef232957bbe238c 100644 --- a/PG-PuReMD/src/cuda/cuda_spar_lin_alg.cu +++ b/PG-PuReMD/src/cuda/cuda_spar_lin_alg.cu @@ -113,8 +113,8 @@ CUDA_GLOBAL void k_dual_jacobi_apply( real const * const Hdia_inv, rvec2 const * return; } - x[i][0] = y[i][0] * Hdia_inv[i]; - x[i][1] = y[i][1] * Hdia_inv[i]; + x[i][0] = Hdia_inv[i] * y[i][0]; + x[i][1] = Hdia_inv[i] * y[i][1]; }