Newer
Older
Kurt A. O'Hearn
committed
#include "cuda_allocate.h"
Kurt A. O'Hearn
committed
#include "cuda_forces.h"
Kurt A. O'Hearn
committed
#include "cuda_list.h"
#include "cuda_neighbors.h"
Kurt A. O'Hearn
committed
#include "../allocate.h"
#include "../index_utils.h"
#include "../tool_box.h"
#include "../vector.h"
Kurt A. O'Hearn
committed
CUDA_GLOBAL void k_init_nbrs( ivec *nbrs, int N )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i;
i = blockIdx.x * blockDim.x + threadIdx.x;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
if ( i >= N )
Kurt A. O'Hearn
committed
{
return;
}
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
nbrs[i][0] = -1;
nbrs[i][1] = -1;
nbrs[i][2] = -1;
}
void Cuda_Allocate_Control( control_params *control )
{
cuda_malloc( (void **)&control->d_control_params,
sizeof(control_params), TRUE, "control_params" );
copy_host_device( control, control->d_control_params,
sizeof(control_params), cudaMemcpyHostToDevice, "control_params" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Allocate_Grid( reax_system *system )
Kurt A. O'Hearn
committed
{
int total;
Kurt A. O'Hearn
committed
// grid_cell local_cell;
Kurt A. O'Hearn
committed
grid *host = &system->my_grid;
grid *device = &system->d_my_grid;
// ivec *nbrs_x = (ivec *) workspace->scratch;
Kurt A. O'Hearn
committed
total = host->ncells[0] * host->ncells[1] * host->ncells[2];
Kurt A. O'Hearn
committed
ivec_Copy( device->ncells, host->ncells );
rvec_Copy( device->cell_len, host->cell_len );
rvec_Copy( device->inv_len, host->inv_len );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ivec_Copy( device->bond_span, host->bond_span );
ivec_Copy( device->nonb_span, host->nonb_span );
ivec_Copy( device->vlist_span, host->vlist_span );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ivec_Copy( device->native_cells, host->native_cells );
ivec_Copy( device->native_str, host->native_str );
ivec_Copy( device->native_end, host->native_end );
Kurt A. O'Hearn
committed
device->ghost_cut = host->ghost_cut;
Kurt A. O'Hearn
committed
ivec_Copy( device->ghost_span, host->ghost_span );
ivec_Copy( device->ghost_nonb_span, host->ghost_nonb_span );
ivec_Copy( device->ghost_hbond_span, host->ghost_hbond_span );
ivec_Copy( device->ghost_bond_span, host->ghost_bond_span );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &device->str, sizeof(int) * total, TRUE,
Kurt A. O'Hearn
committed
"Cuda_Allocate_Grid::grid->str" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &device->end, sizeof(int) * total, TRUE,
Kurt A. O'Hearn
committed
"Cuda_Allocate_Grid::grid->end" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &device->cutoff, sizeof(real) * total, TRUE,
Kurt A. O'Hearn
committed
"Cuda_Allocate_Grid::grid->cutoff" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &device->nbrs_x, sizeof(ivec) * total * host->max_nbrs,
Kurt A. O'Hearn
committed
TRUE, "Cuda_Allocate_Grid::grid->nbrs_x" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &device->nbrs_cp, sizeof(rvec) * total * host->max_nbrs,
Kurt A. O'Hearn
committed
TRUE, "Cuda_Allocate_Grid::grid->nbrs_cp" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &device->rel_box, sizeof(ivec) * total,
Kurt A. O'Hearn
committed
TRUE, "Cuda_Allocate_Grid::grid->rel_box" );
Kurt A. O'Hearn
committed
// int block_size = 512;
// int blocks = (host->max_nbrs) / block_size + ((host->max_nbrs) % block_size == 0 ? 0 : 1);
//
Kurt A. O'Hearn
committed
// k_init_nbrs <<< blocks, block_size >>>
// ( nbrs_x, host->max_nbrs );
Kurt A. O'Hearn
committed
// cudaDeviceSynchronize( );
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// cudaCheckError( );
//
// cuda_malloc( (void **)& device->cells, sizeof(grid_cell) * total,
// TRUE, "grid:cells");
// fprintf( stderr, " Device cells address --> %ld \n", device->cells );
// cuda_malloc( (void **) &device->order,
// sizeof(ivec) * (host->total + 1), TRUE, "grid:order" );
//
// local_cell.top = local_cell.mark = local_cell.str = local_cell.end = 0;
// fprintf( stderr, "Total cells to be allocated -- > %d \n", total );
// for (int i = 0; i < total; i++)
// {
// //fprintf( stderr, "Address of the local atom -> %ld \n", &local_cell );
//
// cuda_malloc( (void **) &local_cell.atoms, sizeof(int) * host->max_atoms,
// TRUE, "alloc:grid:cells:atoms" );
// //fprintf( stderr, "Allocated address of the atoms --> %ld (%d)\n", local_cell.atoms, host->max_atoms );
//
// cuda_malloc( (void **) &local_cell.nbrs_x, sizeof(ivec) * host->max_nbrs,
// TRUE, "alloc:grid:cells:nbrs_x" );
// copy_device( local_cell.nbrs_x, nbrs_x, host->max_nbrs * sizeof(ivec), "grid:nbrs_x" );
// //fprintf( stderr, "Allocated address of the nbrs_x--> %ld \n", local_cell.nbrs_x );
//
// cuda_malloc( (void **) &local_cell.nbrs_cp, sizeof(rvec) * host->max_nbrs,
// TRUE, "alloc:grid:cells:nbrs_cp" );
// //fprintf( stderr, "Allocated address of the nbrs_cp--> %ld \n", local_cell.nbrs_cp );
//
// //cuda_malloc( (void **) &local_cell.nbrs, sizeof(grid_cell *) * host->max_nbrs,
// // TRUE, "alloc:grid:cells:nbrs" );
// //fprintf( stderr, "Allocated address of the nbrs--> %ld \n", local_cell.nbrs );
//
// copy_host_device( &local_cell, &device->cells[i], sizeof(grid_cell),
// cudaMemcpyHostToDevice, "grid:cell-alloc" );
// }
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Deallocate_Grid_Cell_Atoms( reax_system *system )
Kurt A. O'Hearn
committed
{
int i, total;
Kurt A. O'Hearn
committed
grid_cell local_cell;
grid *host, *device;
host = &system->my_grid;
device = &system->d_my_grid;
Kurt A. O'Hearn
committed
total = host->ncells[0] * host->ncells[1] * host->ncells[2];
for ( i = 0; i < total; ++i )
Kurt A. O'Hearn
committed
{
copy_host_device( &local_cell, &device->cells[i],
Kurt A. O'Hearn
committed
sizeof(grid_cell), cudaMemcpyDeviceToHost,
Kurt A. O'Hearn
committed
"Cuda_Deallocate_Grid_Cell_Atoms::grid" );
Kurt A. O'Hearn
committed
cuda_free( local_cell.atoms,
Kurt A. O'Hearn
committed
"Cuda_Deallocate_Grid_Cell_Atoms::grid_cell.atoms" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Allocate_Grid_Cell_Atoms( reax_system *system, int cap )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, total;
Kurt A. O'Hearn
committed
grid_cell local_cell;
grid *host, *device;
Kurt A. O'Hearn
committed
host = &system->my_grid;
device = &system->d_my_grid;
Kurt A. O'Hearn
committed
total = host->ncells[0] * host->ncells[1] * host->ncells[2];
for ( i = 0; i < total; i++ )
Kurt A. O'Hearn
committed
{
copy_host_device( &local_cell, &device->cells[i],
sizeof(grid_cell), cudaMemcpyDeviceToHost, "grid:cell-dealloc" );
cuda_malloc( (void **)&local_cell.atoms, sizeof(int) * cap,
TRUE, "realloc:grid:cells:atoms" );
Kurt A. O'Hearn
committed
copy_host_device( &local_cell, &device->cells[i],
sizeof(grid_cell), cudaMemcpyHostToDevice, "grid:cell-realloc" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Allocate_System( reax_system *system )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
/* atoms */
cuda_malloc( (void **) &system->d_my_atoms,
system->total_cap * sizeof(reax_atom),
TRUE, "system:d_my_atoms" );
cuda_malloc( (void **) &system->d_numH, sizeof(int), TRUE, "system:d_numH" );
Kurt A. O'Hearn
committed
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/* list management */
cuda_malloc( (void **) &system->d_far_nbrs,
system->total_cap * sizeof(int), TRUE, "system:d_far_nbrs" );
cuda_malloc( (void **) &system->d_max_far_nbrs,
system->total_cap * sizeof(int), TRUE, "system:d_max_far_nbrs" );
cuda_malloc( (void **) &system->d_total_far_nbrs,
sizeof(int), TRUE, "system:d_total_far_nbrs" );
cuda_malloc( (void **) &system->d_realloc_far_nbrs,
sizeof(int), TRUE, "system:d_realloc_far_nbrs" );
cuda_malloc( (void **) &system->d_bonds,
system->total_cap * sizeof(int), TRUE, "system:d_bonds" );
cuda_malloc( (void **) &system->d_max_bonds,
system->total_cap * sizeof(int), TRUE, "system:d_max_bonds" );
cuda_malloc( (void **) &system->d_total_bonds,
sizeof(int), TRUE, "system:d_total_bonds" );
cuda_malloc( (void **) &system->d_realloc_bonds,
sizeof(int), TRUE, "system:d_realloc_bonds" );
cuda_malloc( (void **) &system->d_hbonds,
system->total_cap * sizeof(int), TRUE, "system:d_hbonds" );
cuda_malloc( (void **) &system->d_max_hbonds,
system->total_cap * sizeof(int), TRUE, "system:d_max_hbonds" );
cuda_malloc( (void **) &system->d_total_hbonds,
sizeof(int), TRUE, "system:d_total_hbonds" );
cuda_malloc( (void **) &system->d_realloc_hbonds,
sizeof(int), TRUE, "system:d_realloc_hbonds" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_cm_entries,
Kurt A. O'Hearn
committed
system->local_cap * sizeof(int), TRUE, "system:d_cm_entries" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_max_cm_entries,
Kurt A. O'Hearn
committed
system->local_cap * sizeof(int), TRUE, "system:d_max_cm_entries" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_total_cm_entries,
sizeof(int), TRUE, "system:d_total_cm_entries" );
cuda_malloc( (void **) &system->d_realloc_cm_entries,
sizeof(int), TRUE, "system:d_realloc_cm_entries" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_total_thbodies,
sizeof(int), TRUE, "system:d_total_thbodies" );
Kurt A. O'Hearn
committed
/* simulation boxes */
cuda_malloc( (void **) &system->d_big_box,
sizeof(simulation_box), TRUE, "system:d_big_box" );
cuda_malloc( (void **) &system->d_my_box,
sizeof(simulation_box), TRUE, "system:d_my_box" );
cuda_malloc( (void **) &system->d_my_ext_box,
sizeof(simulation_box), TRUE, "d_my_ext_box" );
Kurt A. O'Hearn
committed
/* interaction parameters */
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->reax_param.d_sbp,
Kurt A. O'Hearn
committed
system->reax_param.num_atom_types * sizeof(single_body_parameters),
TRUE, "system:d_sbp" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->reax_param.d_tbp,
POW( system->reax_param.num_atom_types, 2.0 ) * sizeof(two_body_parameters),
TRUE, "system:d_tbp" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->reax_param.d_thbp,
POW( system->reax_param.num_atom_types, 3.0 ) * sizeof(three_body_header),
TRUE, "system:d_thbp" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->reax_param.d_hbp,
POW( system->reax_param.num_atom_types, 3.0 ) * sizeof(hbond_parameters),
TRUE, "system:d_hbp" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->reax_param.d_fbp,
POW( system->reax_param.num_atom_types, 4.0 ) * sizeof(four_body_header),
TRUE, "system:d_fbp" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->reax_param.d_gp.l,
system->reax_param.gp.n_global * sizeof(real), TRUE, "system:d_gp.l" );
Kurt A. O'Hearn
committed
system->reax_param.d_gp.n_global = 0;
system->reax_param.d_gp.vdw_type = 0;
}
Kurt A. O'Hearn
committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
static void Cuda_Reallocate_System_Part1( reax_system *system, storage *workspace,
int local_cap_old, int local_cap )
{
int *temp;
cuda_check_malloc( &workspace->scratch, &workspace->scratch_size,
sizeof(int) * local_cap,
"Cuda_Reallocate_System_Part1::workspace->scratch" );
temp = (int *) workspace->scratch;
copy_device( temp, system->d_cm_entries, sizeof(int) * local_cap_old,
"Cuda_Reallocate_System_Part1::temp" );
cuda_free( system->d_cm_entries, "Cuda_Reallocate_System_Part1::d_cm_entries" );
cuda_malloc( (void **) &system->d_cm_entries,
sizeof(int) * system->local_cap, TRUE, "Cuda_Reallocate_System_Part1::d_cm_entries" );
copy_device( system->d_cm_entries, temp, sizeof(int) * local_cap_old,
"Cuda_Reallocate_System_Part1::temp" );
copy_device( temp, system->d_max_cm_entries, sizeof(int) * local_cap_old,
"Cuda_Reallocate_System_Part1::temp" );
cuda_free( system->d_max_cm_entries, "Cuda_Reallocate_System_Part1::d_max_cm_entries" );
cuda_malloc( (void **) &system->d_max_cm_entries,
sizeof(int) * system->local_cap, TRUE, "Cuda_Reallocate_System_Part1::d_max_cm_entries" );
copy_device( system->d_max_cm_entries, temp, sizeof(int) * local_cap_old,
"Cuda_Reallocate_System_Part1::temp" );
}
static void Cuda_Reallocate_System_Part2( reax_system *system, storage *workspace,
int total_cap_old, int total_cap )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int *temp;
Kurt A. O'Hearn
committed
reax_atom *temp_atom;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
cuda_check_malloc( &workspace->scratch, &workspace->scratch_size,
Kurt A. O'Hearn
committed
MAX( sizeof(reax_atom), sizeof(int) ) * total_cap_old,
"Cuda_Reallocate_System_Part2::workspace->scratch" );
temp = (int *) workspace->scratch;
temp_atom = (reax_atom *) workspace->scratch;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* free the existing storage for atoms, leave other info allocated */
Kurt A. O'Hearn
committed
copy_device( temp_atom, system->d_my_atoms, sizeof(reax_atom) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp_atom" );
Kurt A. O'Hearn
committed
cuda_free( system->d_my_atoms, "system::d_my_atoms" );
cuda_malloc( (void **) &system->d_my_atoms,
Kurt A. O'Hearn
committed
sizeof(reax_atom) * total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_my_atoms" );
copy_device( system->d_my_atoms, temp_atom, sizeof(reax_atom) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp_atom" );
Kurt A. O'Hearn
committed
/* list management */
Kurt A. O'Hearn
committed
copy_device( temp, system->d_far_nbrs, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
cuda_free( system->d_far_nbrs, "Cuda_Reallocate_System_Part2::d_far_nbrs" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_far_nbrs,
Kurt A. O'Hearn
committed
sizeof(int) * system->total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_far_nbrs" );
copy_device( system->d_far_nbrs, temp, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
copy_device( temp, system->d_max_far_nbrs, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
cuda_free( system->d_max_far_nbrs, "Cuda_Reallocate_System_Part2::d_max_far_nbrs" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_max_far_nbrs,
Kurt A. O'Hearn
committed
sizeof(int) * system->total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_max_far_nbrs" );
copy_device( system->d_max_far_nbrs, temp, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
copy_device( temp, system->d_bonds, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
cuda_free( system->d_bonds, "Cuda_Reallocate_System_Part2::d_bonds" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_bonds,
Kurt A. O'Hearn
committed
sizeof(int) * system->total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_bonds" );
copy_device( system->d_bonds, temp, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
copy_device( temp, system->d_max_bonds, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
cuda_free( system->d_max_bonds, "Cuda_Reallocate_System_Part2::d_max_bonds" );
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &system->d_max_bonds,
Kurt A. O'Hearn
committed
sizeof(int) * system->total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_max_bonds" );
copy_device( system->d_max_bonds, temp, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
copy_device( temp, system->d_hbonds, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
cuda_free( system->d_hbonds, "system::d_hbonds" );
cuda_malloc( (void **) &system->d_hbonds,
Kurt A. O'Hearn
committed
sizeof(int) * system->total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_hbonds" );
copy_device( system->d_hbonds, temp, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
copy_device( temp, system->d_max_hbonds, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
cuda_free( system->d_max_hbonds, "system::d_max_hbonds" );
cuda_malloc( (void **) &system->d_max_hbonds,
Kurt A. O'Hearn
committed
sizeof(int) * system->total_cap, TRUE, "Cuda_Reallocate_System_Part2::d_max_hbonds" );
copy_device( system->d_max_hbonds, temp, sizeof(int) * total_cap_old,
"Cuda_Reallocate_System_Part2::temp" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Allocate_Simulation_Data( simulation_data *data )
Kurt A. O'Hearn
committed
{
cuda_malloc( (void **) &data->d_simulation_data,
sizeof(simulation_data), TRUE, "simulation_data" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Allocate_Workspace( reax_system *system, control_params *control,
Kurt A. O'Hearn
committed
storage *workspace, int local_cap, int total_cap )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int total_real, total_rvec, local_rvec;
Kurt A. O'Hearn
committed
total_real = sizeof(real) * total_cap;
total_rvec = sizeof(rvec) * total_cap;
local_rvec = sizeof(rvec) * local_cap;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* bond order related storage */
cuda_malloc( (void **) &workspace->total_bond_order, total_real, TRUE, "total_bo" );
cuda_malloc( (void **) &workspace->Deltap, total_real, TRUE, "Deltap" );
cuda_malloc( (void **) &workspace->Deltap_boc, total_real, TRUE, "Deltap_boc" );
cuda_malloc( (void **) &workspace->dDeltap_self, total_rvec, TRUE, "dDeltap_self" );
cuda_malloc( (void **) &workspace->Delta, total_real, TRUE, "Delta" );
cuda_malloc( (void **) &workspace->Delta_lp, total_real, TRUE, "Delta_lp" );
cuda_malloc( (void **) &workspace->Delta_lp_temp, total_real, TRUE, "Delta_lp_temp" );
cuda_malloc( (void **) &workspace->dDelta_lp, total_real, TRUE, "Delta_lp_temp" );
cuda_malloc( (void **) &workspace->dDelta_lp_temp, total_real, TRUE, "dDelta_lp_temp" );
cuda_malloc( (void **) &workspace->Delta_e, total_real, TRUE, "Delta_e" );
cuda_malloc( (void **) &workspace->Delta_boc, total_real, TRUE, "Delta_boc" );
cuda_malloc( (void **) &workspace->nlp, total_real, TRUE, "nlp" );
cuda_malloc( (void **) &workspace->nlp_temp, total_real, TRUE, "nlp_temp" );
cuda_malloc( (void **) &workspace->Clp, total_real, TRUE, "Clp" );
cuda_malloc( (void **) &workspace->vlpex, total_real, TRUE, "vlpex" );
cuda_malloc( (void **) &workspace->bond_mark, total_real, TRUE, "bond_mark" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* charge matrix storage */
Kurt A. O'Hearn
committed
if ( control->cm_solver_pre_comp_type == JACOBI_PC )
Kurt A. O'Hearn
committed
{
cuda_malloc( (void **) &workspace->Hdia_inv, sizeof(real) * total_cap, TRUE, "Hdia_inv" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
if ( control->cm_solver_pre_comp_type == ICHOLT_PC
|| control->cm_solver_pre_comp_type == ILUT_PC
|| control->cm_solver_pre_comp_type == ILUTP_PC
|| control->cm_solver_pre_comp_type == FG_ILUT_PC )
Kurt A. O'Hearn
committed
{
cuda_malloc( (void **) &workspace->droptol, sizeof(real) * total_cap, TRUE, "droptol" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &workspace->b_s, sizeof(real) * total_cap, TRUE, "b_s" );
cuda_malloc( (void **) &workspace->b_t, sizeof(real) * total_cap, TRUE, "b_t" );
cuda_malloc( (void **) &workspace->s, sizeof(real) * total_cap, TRUE, "s" );
cuda_malloc( (void **) &workspace->t, sizeof(real) * total_cap, TRUE, "t" );
#if defined(DUAL_SOLVER)
cuda_malloc( (void **) &workspace->b, sizeof(rvec2) * total_cap, TRUE, "b" );
cuda_malloc( (void **) &workspace->x, sizeof(rvec2) * total_cap, TRUE, "x" );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
switch ( control->cm_solver_type )
{
case GMRES_S:
case GMRES_H_S:
cuda_malloc( (void **) &workspace->b_prc,
sizeof(real) * total_cap, TRUE, "b_prc" );
cuda_malloc( (void **) &workspace->b_prm,
sizeof(real) * total_cap, TRUE, "b_prm" );
cuda_malloc( (void **) &workspace->y,
(control->cm_solver_restart + 1) * sizeof(real), TRUE, "y" );
cuda_malloc( (void **) &workspace->z,
(control->cm_solver_restart + 1) * sizeof(real), TRUE, "z" );
cuda_malloc( (void **) &workspace->g,
(control->cm_solver_restart + 1) * sizeof(real), TRUE, "g" );
cuda_malloc( (void **) &workspace->h,
SQR(control->cm_solver_restart + 1) * sizeof(real), TRUE, "h" );
cuda_malloc( (void **) &workspace->hs,
(control->cm_solver_restart + 1) * sizeof(real), TRUE, "hs" );
cuda_malloc( (void **) &workspace->hc,
(control->cm_solver_restart + 1) * sizeof(real), TRUE, "hc" );
cuda_malloc( (void **) &workspace->v,
SQR(control->cm_solver_restart + 1) * sizeof(real), TRUE, "v" );
Kurt A. O'Hearn
committed
break;
case SDM_S:
Kurt A. O'Hearn
committed
cuda_malloc( (void **) &workspace->r, sizeof(real) * total_cap, TRUE, "r" );
cuda_malloc( (void **) &workspace->d, sizeof(real) * total_cap, TRUE, "d" );
cuda_malloc( (void **) &workspace->q, sizeof(real) * total_cap, TRUE, "q" );
cuda_malloc( (void **) &workspace->p, sizeof(real) * total_cap, TRUE, "p" );
Kurt A. O'Hearn
committed
break;
case CG_S:
cuda_malloc( (void **) &workspace->r, sizeof(real) * total_cap, TRUE, "r" );
cuda_malloc( (void **) &workspace->d, sizeof(real) * total_cap, TRUE, "d" );
cuda_malloc( (void **) &workspace->q, sizeof(real) * total_cap, TRUE, "q" );
cuda_malloc( (void **) &workspace->p, sizeof(real) * total_cap, TRUE, "p" );
#if defined(DUAL_SOLVER)
cuda_malloc( (void **) &workspace->r2, sizeof(rvec2) * total_cap, TRUE, "r2" );
cuda_malloc( (void **) &workspace->d2, sizeof(rvec2) * total_cap, TRUE, "d2" );
cuda_malloc( (void **) &workspace->q2, sizeof(rvec2) * total_cap, TRUE, "q2" );
cuda_malloc( (void **) &workspace->p2, sizeof(rvec2) * total_cap, TRUE, "p2" );
#endif
Kurt A. O'Hearn
committed
break;
Kurt A. O'Hearn
committed
case BiCGStab_S:
cuda_malloc( (void **) &workspace->y, sizeof(real) * total_cap, TRUE, "y" );
cuda_malloc( (void **) &workspace->g, sizeof(real) * total_cap, TRUE, "g" );
cuda_malloc( (void **) &workspace->z, sizeof(real) * total_cap, TRUE, "z" );
cuda_malloc( (void **) &workspace->r, sizeof(real) * total_cap, TRUE, "r" );
cuda_malloc( (void **) &workspace->d, sizeof(real) * total_cap, TRUE, "d" );
cuda_malloc( (void **) &workspace->q, sizeof(real) * total_cap, TRUE, "q" );
cuda_malloc( (void **) &workspace->p, sizeof(real) * total_cap, TRUE, "p" );
cuda_malloc( (void **) &workspace->r_hat, sizeof(real) * total_cap, TRUE, "r_hat" );
cuda_malloc( (void **) &workspace->q_hat, sizeof(real) * total_cap, TRUE, "q_hat" );
break;
case PIPECG_S:
break;
case PIPECR_S:
break;
Kurt A. O'Hearn
committed
default:
fprintf( stderr, "[ERROR] Unknown charge method linear solver type. Terminating...\n" );
Kurt A. O'Hearn
committed
exit( INVALID_INPUT );
break;
}
Kurt A. O'Hearn
committed
/* integrator storage */
if ( control->ensemble == nhNVT )
{
cuda_malloc( (void **) &workspace->v_const, local_rvec, TRUE, "v_const" );
}
Kurt A. O'Hearn
committed
/* storage for analysis */
if ( control->molecular_analysis || control->diffusion_coef )
Kurt A. O'Hearn
committed
{
cuda_malloc( (void **) &workspace->mark, local_cap * sizeof(int), TRUE, "mark" );
cuda_malloc( (void **) &workspace->old_mark, local_cap * sizeof(int), TRUE, "old_mark" );
Kurt A. O'Hearn
committed
else
Kurt A. O'Hearn
committed
{
workspace->mark = NULL;
workspace->old_mark = NULL;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
if ( control->diffusion_coef )
Kurt A. O'Hearn
committed
{
cuda_malloc( (void **) &workspace->x_old, local_cap * sizeof(rvec), TRUE, "x_old" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
workspace->x_old = NULL;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* force related storage */
cuda_malloc( (void **) &workspace->f, sizeof(rvec) * total_cap, TRUE, "f" );
cuda_malloc( (void **) &workspace->CdDelta, sizeof(rvec) * total_cap, TRUE, "CdDelta" );
Kurt A. O'Hearn
committed
/* Taper params */
cuda_malloc( (void **) &workspace->Tap, sizeof(real) * 8, TRUE, "Tap" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Deallocate_Workspace( control_params *control, storage *workspace )
Kurt A. O'Hearn
committed
{
/* bond order related storage */
Kurt A. O'Hearn
committed
cuda_free( workspace->total_bond_order, "total_bo" );
cuda_free( workspace->Deltap, "Deltap" );
cuda_free( workspace->Deltap_boc, "Deltap_boc" );
cuda_free( workspace->dDeltap_self, "dDeltap_self" );
cuda_free( workspace->Delta, "Delta" );
cuda_free( workspace->Delta_lp, "Delta_lp" );
cuda_free( workspace->Delta_lp_temp, "Delta_lp_temp" );
cuda_free( workspace->dDelta_lp, "Delta_lp_temp" );
cuda_free( workspace->dDelta_lp_temp, "dDelta_lp_temp" );
cuda_free( workspace->Delta_e, "Delta_e" );
cuda_free( workspace->Delta_boc, "Delta_boc" );
cuda_free( workspace->nlp, "nlp" );
cuda_free( workspace->nlp_temp, "nlp_temp" );
cuda_free( workspace->Clp, "Clp" );
cuda_free( workspace->vlpex, "vlpex" );
cuda_free( workspace->bond_mark, "bond_mark" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* charge matrix storage */
Kurt A. O'Hearn
committed
if ( control->cm_solver_pre_comp_type == JACOBI_PC )
Kurt A. O'Hearn
committed
{
cuda_free( workspace->Hdia_inv, "Hdia_inv" );
}
Kurt A. O'Hearn
committed
if ( control->cm_solver_pre_comp_type == ICHOLT_PC
|| control->cm_solver_pre_comp_type == ILUT_PC
|| control->cm_solver_pre_comp_type == ILUTP_PC
|| control->cm_solver_pre_comp_type == FG_ILUT_PC )
Kurt A. O'Hearn
committed
{
cuda_free( workspace->droptol, "droptol" );
}
cuda_free( workspace->b_s, "b_s" );
cuda_free( workspace->b_t, "b_t" );
cuda_free( workspace->s, "s" );
cuda_free( workspace->t, "t" );
#if defined(DUAL_SOLVER)
Kurt A. O'Hearn
committed
cuda_free( workspace->b, "b" );
cuda_free( workspace->x, "x" );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
switch ( control->cm_solver_type )
{
case GMRES_S:
case GMRES_H_S:
cuda_free( workspace->b_prc, "b_prc" );
cuda_free( workspace->b_prm, "b_prm" );
cuda_free( workspace->y, "y" );
cuda_free( workspace->z, "z" );
cuda_free( workspace->g, "g" );
cuda_free( workspace->h, "h" );
cuda_free( workspace->hs, "hs" );
cuda_free( workspace->hc, "hc" );
cuda_free( workspace->v, "v" );
break;
case CG_S:
cuda_free( workspace->r, "r" );
cuda_free( workspace->d, "d" );
cuda_free( workspace->q, "q" );
cuda_free( workspace->p, "p" );
#if defined(DUAL_SOLVER)
cuda_free( workspace->r2, "r2" );
cuda_free( workspace->d2, "d2" );
cuda_free( workspace->q2, "q2" );
cuda_free( workspace->p2, "p2" );
#endif
break;
Kurt A. O'Hearn
committed
case SDM_S:
Kurt A. O'Hearn
committed
cuda_free( workspace->r, "r" );
cuda_free( workspace->d, "d" );
cuda_free( workspace->q, "q" );
cuda_free( workspace->p, "p" );
break;
case BiCGStab_S:
cuda_free( workspace->y, "y" );
cuda_free( workspace->g, "g" );
cuda_free( workspace->z, "z" );
cuda_free( workspace->r, "r" );
cuda_free( workspace->d, "d" );
cuda_free( workspace->q, "q" );
cuda_free( workspace->p, "p" );
cuda_free( workspace->r_hat, "r_hat" );
cuda_free( workspace->q_hat, "q_hat" );
break;
case PIPECG_S:
break;
case PIPECR_S:
break;
default:
fprintf( stderr, "[ERROR] Unknown charge method linear solver type. Terminating...\n" );
exit( INVALID_INPUT );
break;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* Nose-Hoover integrator */
if ( control->ensemble == nhNVT )
{
cuda_free( workspace->v_const, "v_const" );
}
Kurt A. O'Hearn
committed
/* storage for analysis */
if ( control->molecular_analysis || control->diffusion_coef )
Kurt A. O'Hearn
committed
{
cuda_free( workspace->mark, "mark" );
cuda_free( workspace->old_mark, "old_mark" );
Kurt A. O'Hearn
committed
else
Kurt A. O'Hearn
committed
{
workspace->mark = NULL;
workspace->old_mark = NULL;
Kurt A. O'Hearn
committed
}
if ( control->diffusion_coef )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
cuda_free( workspace->x_old, "x_old" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
workspace->x_old = NULL;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* force related storage */
Kurt A. O'Hearn
committed
cuda_free( workspace->f, "f" );
cuda_free( workspace->CdDelta, "CdDelta" );
Kurt A. O'Hearn
committed
/* Taper params */
Kurt A. O'Hearn
committed
cuda_free( workspace->Tap, "Tap" );
Kurt A. O'Hearn
committed
}
/* Allocate sparse matrix struc
*
* H: pointer to struct
* n: currently utilized number of rows
* n_max: max number of rows allocated
* m: max number of entries allocated
* format: sparse matrix format
*/
void Cuda_Allocate_Matrix( sparse_matrix * const H, int n, int n_max, int m,
int format )
Kurt A. O'Hearn
committed
{
H->allocated = TRUE;
Kurt A. O'Hearn
committed
H->n = n;
H->n_max = n_max;
H->m = m;
H->format = format;
cuda_malloc( (void **) &H->start, sizeof(int) * n_max, TRUE,
"Cuda_Allocate_Matrix::H->start" );
cuda_malloc( (void **) &H->end, sizeof(int) * n_max, TRUE,
"Cuda_Allocate_Matrix::H->end" );
cuda_malloc( (void **) &H->j, sizeof(int) * m, TRUE,
"Cuda_Allocate_Matrix::H->j" );
cuda_malloc( (void **) &H->val, sizeof(real) * m, TRUE,
"Cuda_Allocate_Matrix::H->val" );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
void Cuda_Deallocate_Matrix( sparse_matrix *H )
Kurt A. O'Hearn
committed
{
H->allocated = FALSE;
H->n = 0;
H->n_max = 0;
H->m = 0;
Kurt A. O'Hearn
committed
cuda_free( H->start, "Cuda_Deallocate_Matrix::start" );
cuda_free( H->end, "Cuda_Deallocate_Matrix::end" );
cuda_free( H->j, "Cuda_Deallocate_Matrix::j" );
cuda_free( H->val, "Cuda_Deallocate_Matrix::val" );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Reallocate_Neighbor_List( reax_list *far_nbr_list, size_t n, size_t max_intrs )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
Cuda_Delete_List( far_nbr_list );
Cuda_Make_List( n, max_intrs, TYP_FAR_NEIGHBOR, far_nbr_list );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Reallocate_HBonds_List( reax_list *hbond_list, size_t n, size_t max_intrs )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
Cuda_Delete_List( hbond_list );
Cuda_Make_List( n, max_intrs, TYP_HBOND, hbond_list );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Reallocate_Bonds_List( reax_list *bond_list, size_t n, size_t max_intrs )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
Cuda_Delete_List( bond_list );
Cuda_Make_List( n, max_intrs, TYP_BOND, bond_list );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Reallocate_Thbodies_List( reax_list *thbodies, size_t n, size_t max_intrs )
Kurt A. O'Hearn
committed
{
Cuda_Delete_List( thbodies );
Cuda_Make_List( n, max_intrs, TYP_THREE_BODY, thbodies );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
void Cuda_Reallocate( reax_system *system, control_params *control,
Kurt A. O'Hearn
committed
simulation_data *data, storage *workspace, reax_list **lists,
mpi_datatypes *mpi_data )
{
Kurt A. O'Hearn
committed
int i, j, k;
Kurt A. O'Hearn
committed
int nflag, Nflag, local_cap_old, total_cap_old;
int renbr, format;
Kurt A. O'Hearn
committed
reallocate_data *realloc;
sparse_matrix *H;
grid *g;
realloc = &workspace->d_workspace->realloc;
g = &system->my_grid;
H = &workspace->d_workspace->H;
Kurt A. O'Hearn
committed
/* IMPORTANT: LOOSE ZONES CHECKS ARE DISABLED FOR NOW BY &&'ing with FALSE!!! */
Kurt A. O'Hearn
committed
nflag = FALSE;
if ( system->n >= (int) CEIL( DANGER_ZONE * system->local_cap )
Kurt A. O'Hearn
committed
|| (FALSE && system->n <= (int) CEIL( LOOSE_ZONE * system->local_cap )) )
Kurt A. O'Hearn
committed
{
nflag = TRUE;
Kurt A. O'Hearn
committed
local_cap_old = system->local_cap;
Kurt A. O'Hearn
committed
system->local_cap = (int) CEIL( system->n * SAFE_ZONE );
Kurt A. O'Hearn
committed
}
Nflag = FALSE;
Kurt A. O'Hearn
committed
if ( system->N >= (int) CEIL( DANGER_ZONE * system->total_cap )
|| (FALSE && system->N <= (int) CEIL( LOOSE_ZONE * system->total_cap )) )
Kurt A. O'Hearn
committed
{
Nflag = TRUE;
Kurt A. O'Hearn
committed
total_cap_old = system->total_cap;
Kurt A. O'Hearn
committed
system->total_cap = (int) CEIL( system->N * SAFE_ZONE );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
if ( nflag == TRUE )
{
Cuda_Reallocate_System_Part1( system, workspace, local_cap_old,
system->local_cap );
}
Kurt A. O'Hearn
committed
if ( Nflag == TRUE )
{
Kurt A. O'Hearn
committed
Cuda_Reallocate_System_Part2( system, workspace, total_cap_old,
Kurt A. O'Hearn
committed
system->total_cap );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
Cuda_Deallocate_Workspace( control, workspace );
Kurt A. O'Hearn
committed
Cuda_Allocate_Workspace( system, control, workspace,
system->local_cap, system->total_cap );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
renbr = (data->step - data->prev_steps) % control->reneighbor == 0 ? TRUE : FALSE;
Kurt A. O'Hearn
committed
/* far neighbors */
Kurt A. O'Hearn
committed
if ( renbr == TRUE && (Nflag == TRUE || realloc->far_nbrs == TRUE) )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
Cuda_Reallocate_Neighbor_List( lists[FAR_NBRS],
system->total_cap, system->total_far_nbrs );
Cuda_Init_Neighbor_Indices( system, lists[FAR_NBRS] );
Kurt A. O'Hearn
committed
realloc->far_nbrs = FALSE;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* charge matrix */
if ( nflag == TRUE || realloc->cm == TRUE )
Kurt A. O'Hearn
committed
{
format = H->format;
Kurt A. O'Hearn
committed
Cuda_Deallocate_Matrix( H );
Cuda_Allocate_Matrix( H, system->n, system->local_cap,
system->total_cm_entries, format );
Kurt A. O'Hearn
committed
Cuda_Init_Sparse_Matrix_Indices( system, H );
realloc->cm = FALSE;
Kurt A. O'Hearn
committed
}
/* hydrogen bonds list */
Kurt A. O'Hearn
committed
if ( control->hbond_cut > 0.0 && system->numH > 0
&& (Nflag == TRUE || realloc->hbonds == TRUE) )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
Cuda_Reallocate_HBonds_List( lists[HBONDS],
system->total_cap, system->total_hbonds );
Cuda_Init_HBond_Indices( system, workspace, lists[HBONDS] );
Kurt A. O'Hearn
committed
realloc->hbonds = FALSE;
Kurt A. O'Hearn
committed
}
/* bonds list */
if ( Nflag == TRUE || realloc->bonds == TRUE )
{
Kurt A. O'Hearn
committed
Cuda_Reallocate_Bonds_List( lists[BONDS],
system->total_cap, system->total_bonds );
Cuda_Init_Bond_Indices( system, lists[BONDS] );
Kurt A. O'Hearn
committed
realloc->bonds = FALSE;
}
/* 3-body list */
Kurt A. O'Hearn
committed
if ( Nflag == TRUE || realloc->thbody == TRUE )
Kurt A. O'Hearn
committed
{
Cuda_Reallocate_Thbodies_List( lists[THREE_BODIES],
Kurt A. O'Hearn
committed
system->total_thbodies_indices, system->total_thbodies );
realloc->thbody = FALSE;
Kurt A. O'Hearn
committed
}
/* grid */
Kurt A. O'Hearn
committed
if ( renbr == TRUE && realloc->gcell_atoms > -1 )
Kurt A. O'Hearn
committed
{
for ( i = g->native_str[0]; i < g->native_end[0]; i++ )
{
for ( j = g->native_str[1]; j < g->native_end[1]; j++ )
{
for ( k = g->native_str[2]; k < g->native_end[2]; k++ )
{
// reallocate g->atoms
sfree( g->cells[ index_grid_3d(i,j,k,g) ].atoms, "g:atoms" );
Kurt A. O'Hearn
committed
g->cells[ index_grid_3d(i,j,k,g) ].atoms = (int *)
scalloc( realloc->gcell_atoms, sizeof(int), "g:atoms" );
Kurt A. O'Hearn
committed
}
}
}
fprintf( stderr, "p:%d - *** Reallocating Grid Cell Atoms *** Step:%d\n", system->my_rank, data->step );
Kurt A. O'Hearn
committed
// Cuda_Deallocate_Grid_Cell_Atoms( system );
// Cuda_Allocate_Grid_Cell_Atoms( system, realloc->gcell_atoms );
Kurt A. O'Hearn
committed
realloc->gcell_atoms = -1;
}
}