Skip to content
Snippets Groups Projects
Commit 4f9c2b53 authored by Kurt A. O'Hearn's avatar Kurt A. O'Hearn
Browse files

Refactor MPI only and MPI+GPU code.

parent e84cf0a8
No related branches found
No related tags found
No related merge requests found
......@@ -240,7 +240,7 @@ int Estimate_NumNeighbors( reax_system *system, reax_list **lists )
}
}
fprintf (stderr, "Total numner of host neighbors: %d \n", num_far);
fprintf (stderr, "Total number of host neighbors: %d \n", num_far);
#if defined(DEBUG_FOCUS)
fprintf( stderr, "p%d: estimate nbrs done - num_far=%d\n",
......
......@@ -177,8 +177,9 @@ int main( int argc, char* argv[] )
real t_start = 0, t_elapsed;
real t_begin, t_end;
/* Remove this debug information later */
#ifdef HAVE_CUDA
/* Remove this debug information later */
#if defined(__CUDA_DEBUG_LOG__)
fprintf (stderr, " Size of LR Lookup table %d \n", sizeof (LR_lookup_table) );
#endif
......@@ -197,7 +198,6 @@ int main( int argc, char* argv[] )
fprintf (stderr, " matvec threads per atom: %d \n", MATVEC_KER_THREADS_PER_ROW);
fprintf (stderr, " General block size: %d \n", DEF_BLOCK_SIZE);
#endif
#endif
/* allocated main datastructures */
......@@ -221,7 +221,6 @@ int main( int argc, char* argv[] )
smalloc( sizeof(output_controls), "out_control" );
mpi_data = (mpi_datatypes *) smalloc( sizeof(mpi_datatypes), "mpi_data" );
#ifdef HAVE_CUDA
/* allocate the cuda auxiliary data structures */
dev_workspace = (storage *) smalloc( sizeof(storage), "dev_workspace" );
......@@ -230,7 +229,6 @@ int main( int argc, char* argv[] )
dev_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" );
dev_lists[i]->allocated = 0;
}
#endif
/* Initialize member variables */
system->init_thblist = FALSE;
......@@ -243,7 +241,6 @@ int main( int argc, char* argv[] )
system->wsize = control->nprocs;
system->global_offset = (int *)scalloc(system->wsize+1,sizeof(int),"global_offset");
#ifdef HAVE_CUDA
/* setup the CUDA Device for this process can be on the same machine
* or on a different machine, for now use the rank to compute the device
* This will only work on a single machine with 2 GPUs*/
......@@ -252,7 +249,6 @@ int main( int argc, char* argv[] )
//Cleanup_Cuda_Environment ();
print_device_mem_usage ();
//fprintf( stderr, "p%d: Total number of GPUs on this node -- %d\n", system->my_rank, my_device_id);
#endif
/* read system description files */
Read_System( argv[1], argv[2], argv[3], system, control,
......@@ -262,32 +258,23 @@ int main( int argc, char* argv[] )
MPI_Barrier( MPI_COMM_WORLD );
#endif
#ifdef HAVE_CUDA
/* init the blocks sizes for cuda kernels */
init_blocks (system);
#endif
/* measure total simulation time after input is read */
if( system->my_rank == MASTER_NODE )
t_start = Get_Time( );
/* initialize datastructures */
#ifdef HAVE_CUDA
Cuda_Initialize( system, control, data, workspace, lists, out_control, mpi_data );
#if defined(__CUDA_DEBUG__)
Pure_Initialize( system, control, data, workspace, lists, out_control, mpi_data );
#endif
#else
Initialize( system, control, data, workspace, lists, out_control, mpi_data );
#endif
#ifdef HAVE_CUDA
print_device_mem_usage ();
/* init the blocks sizes for cuda kernels */
init_blocks (system);
#endif
#if defined(DEBUG)
fprintf( stderr, "p%d: initializated data structures\n", system->my_rank );
......@@ -295,31 +282,25 @@ print_device_mem_usage ();
#endif
//END OF FIRST STEP
fprintf(stderr, "===>HERE\n");
// compute f_0
Comm_Atoms( system, control, data, workspace, lists, mpi_data, 1 );
#ifdef HAVE_CUDA
Sync_Atoms ( system );
Sync_Grid (&system->my_grid, &system->d_my_grid);
init_blocks (system);
#if defined(__CUDA_DENUG_LOG__)
fprintf( stderr, "p%d: Comm_Atoms synchronized \n", system->my_rank );
#endif
#endif
//Second step
#ifdef HAVE_CUDA
Cuda_Reset ( system, control, data, workspace, lists );
#if defined(__CUDA_DEBUG__)
Reset( system, control, data, workspace, lists );
#endif
//fprintf( stderr, "p%d: Cuda_Reset done...\n", system->my_rank );
#else
Reset( system, control, data, workspace, lists );
#endif
//Third Step
#ifdef HAVE_CUDA
Cuda_Generate_Neighbor_Lists( system, data, workspace, lists );
#if defined(__CUDA_DEBUG__)
Generate_Neighbor_Lists( system, data, workspace, lists );
......@@ -327,13 +308,9 @@ print_device_mem_usage ();
#if defined(__CUDA_DENUG_LOG__)
fprintf (stderr, "p%d: Cuda_Generate_Neighbor_Lists done...\n", system->my_rank );
#endif
#else
Generate_Neighbor_Lists( system, data, workspace, lists );
#endif
//Fourth Step
#ifdef HAVE_CUDA
#if defined(__CUDA_DEBUG__)
fprintf (stderr, " Host Compute Forces begin.... \n");
Compute_Forces( system, control, data, workspace,
......@@ -344,13 +321,8 @@ print_device_mem_usage ();
#if defined(__CUDA_DENUG_LOG__)
fprintf (stderr, "p%d: Cuda_Compute_Forces done...\n", system->my_rank );
#endif
#else
Compute_Forces( system, control, data, workspace,
lists, out_control, mpi_data );
#endif
#ifdef HAVE_CUDA
#if defined (__CUDA_DEBUG__)
Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
#endif
......@@ -358,11 +330,7 @@ print_device_mem_usage ();
#if defined(__CUDA_DENUG_LOG__)
fprintf (stderr, "p%d: Cuda_Compute_Kinetic_Energy done ... \n", system->my_rank);
#endif
#else
Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
#endif
#ifdef HAVE_CUDA
#if defined(__CUDA_DEBUG__)
validate_device (system, data, workspace, lists);
#endif
......@@ -371,16 +339,10 @@ print_device_mem_usage ();
Output_Results( system, control, data, lists, out_control, mpi_data );
//fprintf (stderr, "p%d: Output_Results done ... \n", system->my_rank);
#endif
#else
Output_Results( system, control, data, lists, out_control, mpi_data );
//fprintf (stderr, "p%d: Output_Results done ... \n", system->my_rank);
#endif
#ifdef HAVE_CUDA
#if defined(DEBUG)
fprintf( stderr, "p%d: computed forces at t0\n", system->my_rank );
MPI_Barrier( MPI_COMM_WORLD );
#endif
#endif
// start the simulation
......@@ -390,7 +352,6 @@ print_device_mem_usage ();
//t_begin = Get_Time ();
#ifdef HAVE_CUDA
#if defined(__CUDA_DEBUG__)
Evolve( system, control, data, workspace, lists, out_control, mpi_data );
#endif
......@@ -407,18 +368,10 @@ print_device_mem_usage ();
//t_end = Get_Timing_Info (t_begin);
//fprintf (stderr, " Post Evolve time: %f \n", t_end);
#else
Evolve( system, control, data, workspace, lists, out_control, mpi_data );
Post_Evolve(system, control, data, workspace, lists, out_control, mpi_data);
#endif
#ifdef HAVE_CUDA
#if !defined(__CUDA_DEBUG__)
Output_Results( system, control, data, lists, out_control, mpi_data );
#endif
#else
Output_Results( system, control, data, lists, out_control, mpi_data );
#endif
//Analysis(system, control, data, workspace, lists, out_control, mpi_data);
......@@ -437,11 +390,98 @@ print_device_mem_usage ();
}
#ifdef HAVE_CUDA
//vaildate the results in debug mode
#if defined(__CUDA_DEBUG__)
validate_device (system, data, workspace, lists);
#endif
#else
/* allocated main datastructures */
system = (reax_system *)
smalloc( sizeof(reax_system), "system" );
control = (control_params *)
smalloc( sizeof(control_params), "control" );
data = (simulation_data *)
smalloc( sizeof(simulation_data), "data" );
workspace = (storage *)
smalloc( sizeof(storage), "workspace" );
lists = (reax_list **)
smalloc( LIST_N * sizeof(reax_list*), "lists" );
for( i = 0; i < LIST_N; ++i ) {
lists[i] = (reax_list *)
smalloc( sizeof(reax_list), "lists[i]" );
lists[i]->allocated = 0;
}
out_control = (output_controls *)
smalloc( sizeof(output_controls), "out_control" );
mpi_data = (mpi_datatypes *)
smalloc( sizeof(mpi_datatypes), "mpi_data" );
/* setup the parallel environment */
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &(control->nprocs) );
MPI_Comm_rank( MPI_COMM_WORLD, &(system->my_rank) );
system->wsize = control->nprocs;
system->global_offset = (int*)
scalloc( system->wsize+1, sizeof(int), "global_offset" );
/* read system description files */
Read_System( argv[1], argv[2], argv[3], system, control,
data, workspace, out_control, mpi_data );
#if defined(DEBUG)
fprintf( stderr, "p%d: read simulation info\n", system->my_rank );
MPI_Barrier( MPI_COMM_WORLD );
#endif
/* measure total simulation time after input is read */
if( system->my_rank == MASTER_NODE )
t_start = Get_Time( );
/* initialize datastructures */
Initialize( system, control, data, workspace, lists, out_control, mpi_data );
#if defined(DEBUG)
fprintf( stderr, "p%d: initializated data structures\n", system->my_rank );
MPI_Barrier( mpi_data->world );
#endif
/* compute f_0 */
Comm_Atoms( system, control, data, workspace, lists, mpi_data, 1 );
Reset( system, control, data, workspace, lists );
Generate_Neighbor_Lists( system, data, workspace, lists );
Compute_Forces( system, control, data, workspace,
lists, out_control, mpi_data );
Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
Output_Results( system, control, data, lists, out_control, mpi_data );
#if defined(DEBUG)
fprintf( stderr, "p%d: computed forces at t0\n", system->my_rank );
MPI_Barrier( mpi_data->world );
#endif
/* start the simulation */
for( ++data->step; data->step <= control->nsteps; data->step++ ) {
if( control->T_mode )
Temperature_Control( control, data );
Evolve( system, control, data, workspace, lists, out_control, mpi_data );
Post_Evolve(system, control, data, workspace, lists, out_control, mpi_data);
Output_Results( system, control, data, lists, out_control, mpi_data );
//Analysis(system, control, data, workspace, lists, out_control, mpi_data);
/* dump restart info */
if( out_control->restart_freq &&
(data->step-data->prev_steps) % out_control->restart_freq == 0 ) {
if( out_control->restart_format == WRITE_ASCII )
Write_Restart( system, control, data, out_control, mpi_data );
else if( out_control->restart_format == WRITE_BINARY )
Write_Binary_Restart( system, control, data, out_control, mpi_data );
}
#if defined(DEBUG)
fprintf( stderr, "p%d: step%d completed\n", system->my_rank, data->step );
MPI_Barrier( mpi_data->world );
#endif
}
#endif
/* end of the simulation, write total simulation time */
......
......@@ -398,7 +398,7 @@ int Tokenize( char* s, char*** tok )
/***************** taken from lammps ************************/
/* safe malloc */
void *smalloc( long n, char *name )
void* smalloc( long n, char *name )
{
void *ptr;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment