diff --git a/.gitignore b/.gitignore index 280cf9b1a1b72856e15c9d2e5ad9b4e462f294ad..02821017f8f68410f23d1b42864a899f2babe99e 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,4 @@ stamp-h1 # General *.tar.gz +*.pdf diff --git a/Makefile.am b/Makefile.am index 42fe77e44b8b51da6ec841ba54df84fdda05419d..407d10df0c18c1ddb070daa8e9aa625ea3cb13be 100644 --- a/Makefile.am +++ b/Makefile.am @@ -15,6 +15,8 @@ if BUILD_MPI_GPU SUBDIRS += PG-PuReMD endif -EXTRA_DIST = doc/README.txt doc/manual.pdf +if BUILD_DOC +SUBDIRS += doc +endif dist-hook: rm -rf `find $(distdir) -name .git` diff --git a/PG-PuReMD/src/cuda_forces.cu b/PG-PuReMD/src/cuda_forces.cu index 063554bf53be41f6d1fd601d4e2cc7b87afa3b41..195642e2b37ada412cadb7a91b16b7d9804ea794 100644 --- a/PG-PuReMD/src/cuda_forces.cu +++ b/PG-PuReMD/src/cuda_forces.cu @@ -57,8 +57,10 @@ CUDA_GLOBAL void ker_estimate_storages (reax_atom *my_atoms, reax_atom *atom_i, *atom_j; i = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= N) return; - + if (i >= N) + { + return; + } //Commented in CUDA_KERNEL //for( i = 0; i < N; ++i ) { @@ -68,25 +70,29 @@ CUDA_GLOBAL void ker_estimate_storages (reax_atom *my_atoms, end_i = Dev_End_Index(i, &far_nbrs); sbp_i = &(sbp[type_i]); - if( i < n ) { + if( i < n ) + { local = 1; cutoff = control->nonb_cut; //++(*Htop); atomicAdd (Htop, 1); ihb = sbp_i->p_hbond; } - else { + else + { local = 0; cutoff = control->bond_cut; ihb = -1; } - for( pj = start_i; pj < end_i; ++pj ) { + for( pj = start_i; pj < end_i; ++pj ) + { nbr_pj = &( far_nbrs.select.far_nbr_list[pj] ); j = nbr_pj->nbr; atom_j = &(my_atoms[j]); - if (nbr_pj->d <= control->nonb_cut) { + if (nbr_pj->d <= control->nonb_cut) + { type_j = my_atoms[j].type; sbp_j = &(sbp[type_j]); ihb = sbp_i->p_hbond; @@ -98,40 +104,56 @@ CUDA_GLOBAL void ker_estimate_storages (reax_atom *my_atoms, && (j < n) && (i > n) ) + { atomicAdd (&hb_top [i], 1); + } - if (i >= n) ihb = -1; + if (i >= n) + { + ihb = -1; + } } - if(nbr_pj->d <= cutoff) { + if(nbr_pj->d <= cutoff) + { type_j = my_atoms[j].type; r_ij = nbr_pj->d; sbp_j = &(sbp[type_j]); twbp = &(tbp[index_tbp (type_i,type_j,num_atom_types)]); - if( local ) { + if( local ) + { //if( j < n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 if( j < n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 + { //++(*Htop); atomicAdd (Htop, 1); + } else if( j < n || atom_i->orig_id > atom_j->orig_id ) //tryQEq ||1 + { //++(*Htop); atomicAdd (Htop, 1); + } if( control->hbond_cut > 0.1 && (ihb==1 || ihb==2) && nbr_pj->d <= control->hbond_cut - ) { + ) + { jhb = sbp_j->p_hbond; if( (ihb == 1) && (jhb == 2)) + { //++hb_top[i]; atomicAdd (&hb_top[i], 1); + } //else if( j < n && ihb == 2 && jhb == 1 ) //else if( ihb == 2 && jhb == 1 && j < n) else if( ihb == 2 && jhb == 1 && j < n) + { //++hb_top[j]; atomicAdd (&hb_top[i], 1); + } } } @@ -176,8 +198,13 @@ CUDA_GLOBAL void ker_estimate_storages (reax_atom *my_atoms, CUDA_GLOBAL void ker_init_system_atoms(reax_atom *my_atoms, int N, int *hb_top, int *bond_top) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= N) return; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= N) + { + return; + } my_atoms[i].num_bonds = bond_top [i]; my_atoms[i].num_hbonds = hb_top [i]; @@ -189,69 +216,83 @@ void Cuda_Estimate_Storages(reax_system *system, control_params *control, int *Htop, int *hb_top, int *bond_top, int *num_3body) { + int i; int blocks = 0; - int *l_Htop, *l_hb_top, *l_bond_top, *l_num_3body; - int *tmp = (int *)scratch; + int *d_Htop, *d_hb_top, *d_bond_top, *d_num_3body; + int bond_count = 0; + int hbond_count = 0; + int max_bonds = 0, min_bonds = 999999; + int max_hbonds = 0, min_hbonds = 999999; *Htop = 0; //memset( hb_top, 0, sizeof(int) * local_cap); - memset( hb_top, 0, sizeof(int) * total_cap); + memset( hb_top, 0, sizeof(int) * total_cap ); memset( bond_top, 0, sizeof(int) * total_cap ); *num_3body = 0; - //cuda_memset (tmp, 0, 1 + 1 + sizeof (int) * (local_cap+ total_cap), "Cuda_Estimate_Storages"); - cuda_memset (tmp, 0, sizeof (int) * (1 + 1 + total_cap+ total_cap), "Cuda_Estimate_Storages"); - - l_Htop = tmp; - l_num_3body = l_Htop + 1; - l_hb_top = l_num_3body + 1; - //l_bond_top = l_hb_top + local_cap; - l_bond_top = l_hb_top + total_cap; + cuda_memset ( d_Htop, 0, sizeof (int), "Cuda_Estimate_Storages" ); + cuda_memset ( d_hb_top, 0, sizeof (int) * total_cap, "Cuda_Estimate_Storages" ); +// cuda_memset ( d_bond_top, 0, sizeof (int) * local_cap, "Cuda_Estimate_Storages" ); + cuda_memset ( d_bond_top, 0, sizeof (int) * total_cap, "Cuda_Estimate_Storages" ); + cuda_memset ( d_num_3body, 0, sizeof (int), "Cuda_Estimate_Storages" ); - blocks = system->N / ST_BLOCK_SIZE + - ((system->N % ST_BLOCK_SIZE == 0) ? 0 : 1); + blocks = (int) CEIL((real)system->N / ST_BLOCK_SIZE); ker_estimate_storages <<< blocks, ST_BLOCK_SIZE>>> (system->d_my_atoms, system->reax_param.d_sbp, system->reax_param.d_tbp, (control_params *)control->d_control_params, *(*dev_lists + FAR_NBRS), system->reax_param.num_atom_types, system->n, system->N, system->Hcap, system->total_cap, - l_Htop, l_num_3body, l_bond_top, l_hb_top ); + d_Htop, d_num_3body, d_bond_top, d_hb_top ); cudaThreadSynchronize (); cudaCheckError (); - copy_host_device( Htop, l_Htop, sizeof (int), cudaMemcpyDeviceToHost, "Htop"); - copy_host_device( num_3body, l_num_3body, sizeof (int), cudaMemcpyDeviceToHost, "num_3body"); - //copy_host_device( hb_top, l_hb_top, sizeof (int) * local_cap, cudaMemcpyDeviceToHost, "hb_top"); - copy_host_device( hb_top, l_hb_top, sizeof (int) * total_cap, cudaMemcpyDeviceToHost, "hb_top"); - copy_host_device( bond_top, l_bond_top, sizeof (int) * total_cap, cudaMemcpyDeviceToHost, "bond_top"); - + copy_host_device( Htop, d_Htop, sizeof (int), cudaMemcpyDeviceToHost, "Htop"); + copy_host_device( num_3body, d_num_3body, sizeof (int), cudaMemcpyDeviceToHost, "num_3body"); + //copy_host_device( hb_top, d_hb_top, sizeof (int) * local_cap, cudaMemcpyDeviceToHost, "hb_top"); + copy_host_device( hb_top, d_hb_top, sizeof (int) * total_cap, cudaMemcpyDeviceToHost, "hb_top"); + copy_host_device( bond_top, d_bond_top, sizeof (int) * total_cap, cudaMemcpyDeviceToHost, "bond_top"); - int bond_count = 0; - int hbond_count = 0; - int max_bonds = 0, min_bonds = 999999; - int max_hbonds = 0, min_hbonds = 999999; + for (i = 0; i < system->N; i++) + { + if (bond_top[i] >= max_bonds) + { + max_bonds = bond_top[i]; + } + if (bond_top[i] <= min_bonds) + { + min_bonds = bond_top[i]; + } - for (int i = 0; i < system->N; i++) { - if (bond_top[i] >= max_bonds) max_bonds = bond_top[i]; - if (bond_top[i] <= min_bonds) min_bonds = bond_top[i]; bond_count += bond_top[i]; } system->max_bonds = max_bonds * SAFER_ZONE; + //for (int i = 0; i < system->n; i++) - for (int i = 0; i < system->N; i++){ - if (hb_top[i] >= max_hbonds) max_hbonds = hb_top[i]; - if (hb_top[i] <= min_hbonds) min_hbonds = hb_top[i]; + for (i = 0; i < system->N; i++) + { + if (hb_top[i] >= max_hbonds) + { + max_hbonds = hb_top[i]; + } + if (hb_top[i] <= min_hbonds) + { + min_hbonds = hb_top[i]; + } + hbond_count += hb_top [i]; } system->max_hbonds = max_hbonds * SAFER_ZONE; - //fprintf (stderr, " TOTAL DEVICE BOND COUNT: %d \n", bond_count); - //fprintf (stderr, " TOTAL DEVICE HBOND COUNT: %d \n", hbond_count); - //fprintf (stderr, " TOTAL DEVICE SPARSE COUNT: %d \n", *Htop); + +//#if defined(DEBUG) + fprintf (stderr, " TOTAL DEVICE BOND COUNT: %d \n", bond_count); + fprintf (stderr, " TOTAL DEVICE HBOND COUNT: %d \n", hbond_count); + fprintf (stderr, " TOTAL DEVICE SPARSE COUNT: %d \n", *Htop); fprintf (stderr, "p:%d --> Bonds(%d, %d) HBonds (%d, %d) *******\n", system->my_rank, min_bonds, max_bonds, min_hbonds, max_hbonds); +//#endif ker_init_system_atoms <<<blocks, ST_BLOCK_SIZE>>> - (system->d_my_atoms, system->N, l_hb_top, l_bond_top ); + (system->d_my_atoms, system->N, d_hb_top, d_bond_top ); cudaThreadSynchronize (); cudaCheckError (); } @@ -964,7 +1005,7 @@ int Cuda_Validate_Lists (reax_system *system, storage *workspace, reax_list **li return FAILURE; } if (end_index[i] - index[i] >= max_bonds) - max_bonds = index[i] - index[i]; + max_bonds = end_index[i] - index[i]; } realloc->num_bonds = max_bonds; diff --git a/PG-PuReMD/src/cuda_helpers.h b/PG-PuReMD/src/cuda_helpers.h index 1fea3e26f343513eb68d9ce480572492da10805f..0d6282a842e40e377d8c8ef11f7c52a0ae8f904d 100644 --- a/PG-PuReMD/src/cuda_helpers.h +++ b/PG-PuReMD/src/cuda_helpers.h @@ -1,9 +1,9 @@ - #ifndef __CUDA_HELPERS__ #define __CUDA_HELPERS__ #include "reax_types.h" + CUDA_DEVICE inline int cuda_strcmp (char *a, char *b, int len) { char *src, *dst; @@ -26,6 +26,7 @@ CUDA_DEVICE inline int cuda_strcmp (char *a, char *b, int len) return 0; } + CUDA_DEVICE inline real atomicAdd(real* address, real val) { unsigned long long int* address_as_ull = @@ -38,9 +39,11 @@ CUDA_DEVICE inline real atomicAdd(real* address, real val) __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); + return __longlong_as_double(old); } + CUDA_DEVICE inline void atomic_rvecAdd( rvec ret, rvec v ) { atomicAdd ( &ret[0], v[0] ); @@ -48,6 +51,7 @@ CUDA_DEVICE inline void atomic_rvecAdd( rvec ret, rvec v ) atomicAdd ( &ret[2], v[2] ); } + CUDA_DEVICE inline void atomic_rvecScaledAdd( rvec ret, real c, rvec v ) { atomicAdd ( &ret[0], c * v[0] ); diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h index 504d8c2197b13ff499ac73b6e0967c5c181b00c4..eaf7564152ca7c27f83baed643f72e94a685e19e 100644 --- a/PG-PuReMD/src/reax_types.h +++ b/PG-PuReMD/src/reax_types.h @@ -244,11 +244,11 @@ /******************* ENUMERATIONS *************************/ enum geo_formats { CUSTOM = 0, PDB = 1, ASCII_RESTART = 2, BINARY_RESTART = 3, GF_N = 4 }; - + enum restart_formats { WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2 }; - + enum ensembles { NVE = 0, bNVT = 1, nhNVT = 2, sNPT = 3, iNPT = 4, NPT = 5, ens_N = 6 }; - + enum lists { BONDS = 0, OLD_BONDS = 1, THREE_BODIES = 2, HBONDS = 3, FAR_NBRS = 4, DBOS = 5, DDELTAS = 6, LIST_N = 7 }; diff --git a/configure.ac b/configure.ac index 035c072019fce2db6d16b23663377e361699f50a..f0db1ac740358957321ae2de3894f09faefa9e22 100644 --- a/configure.ac +++ b/configure.ac @@ -14,57 +14,65 @@ AC_CONFIG_MACRO_DIR([m4]) # Enable silent build rules by default. m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])], [AC_SUBST([AM_DEFAULT_VERBOSITY],[1])]) +AC_DEFUN([AC_PROG_PDFLATEX], + [AC_ARG_VAR([PDFLATEX], [LaTeX PDF generation program])dnl + AC_CHECK_PROGS([PDFLATEX], [pdflatex]) + m4_ifval([$1],, + [if test -z "$PDFLATEX"; then + AC_MSG_WARN([pdflatex not found. Unable to build documentation. Continuing...]) + fi])]) + AC_ARG_ENABLE([serial], [AS_HELP_STRING([--enable-serial], [enable serial support @<:@default: no@:>@])], - [package_serial_enabled=${enableval}], [package_serial_enabled=no]) + [pack_serial_enabled=${enableval}], [pack_serial_enabled=no]) AC_ARG_ENABLE([openmp], [AS_HELP_STRING([--enable-openmp], [enable OpenMP support @<:@default: yes@:>@])], - [package_openmp_enabled=${enableval}], [package_openmp_enabled=yes]) + [pack_openmp_enabled=${enableval}], [pack_openmp_enabled=yes]) AC_ARG_ENABLE([mpi], [AS_HELP_STRING([--enable-mpi], [enable MPI support @<:@default: no@:>@])], - [package_mpi_enabled=${enableval}], [package_mpi_enabled=no]) + [pack_mpi_enabled=${enableval}], [pack_mpi_enabled=no]) AC_ARG_ENABLE([gpu], [AS_HELP_STRING([--enable-gpu], [enable CUDA (single GPU) support @<:@default: no@:>@])], - [package_gpu_enabled=${enableval}], [package_gpu_enabled=no]) + [pack_gpu_enabled=${enableval}], [pack_gpu_enabled=no]) AC_ARG_ENABLE([mpi-not-gpu], [AS_HELP_STRING([--enable-mpi-not-gpu], [enable MPI but not CUDA support @<:@default: no@:>@])], - [package_mpi_not_gpu_enabled=${enableval}], [package_mpi_not_gpu_enabled=no]) + [pack_mpi_not_gpu_enabled=${enableval}], [pack_mpi_not_gpu_enabled=no]) AC_ARG_ENABLE([mpi-gpu], [AS_HELP_STRING([--enable-mpi-gpu], [enable MPI+CUDA (multi GPU) support @<:@default: no@:>@])], - [package_mpi_gpu_enabled=${enableval}], [package_mpi_gpu_enabled=no]) + [pack_mpi_gpu_enabled=${enableval}], [pack_mpi_gpu_enabled=no]) -if test "x${package_serial_enabled}" = "xyes" || test "x${package_openmp_enabled}" = "xyes"; then +if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"; then AC_CONFIG_SUBDIRS([sPuReMD]) - if test "x${package_serial_enabled}" = "xyes" || test "x${package_openmp_enabled}" != "xyes"; then + if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" != "xyes"; then export BUILD_OPENMP="no" else export BUILD_OPENMP="yes" fi fi -AM_CONDITIONAL([BUILD_S_OMP], [test "x${package_serial_enabled}" = "xyes" || test "x${package_openmp_enabled}" = "xyes"]) -if test "x${package_mpi_enabled}" = "xyes"; then +AM_CONDITIONAL([BUILD_S_OMP], [test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"]) +if test "x${pack_mpi_enabled}" = "xyes"; then AC_CONFIG_SUBDIRS([PuReMD]) fi -AM_CONDITIONAL([BUILD_MPI], [test "x${package_mpi_enabled}" = "xyes"]) -if test "x${package_gpu_enabled}" = "xyes"; then +AM_CONDITIONAL([BUILD_MPI], [test "x${pack_mpi_enabled}" = "xyes"]) +if test "x${pack_gpu_enabled}" = "xyes"; then AC_CONFIG_SUBDIRS([PuReMD-GPU]) fi -AM_CONDITIONAL([BUILD_GPU], [test "x${package_gpu_enabled}" = "xyes"]) -if test "x${package_mpi_not_gpu_enabled}" = "xyes" || test "x${package_mpi_gpu_enabled}" = "xyes"; then +AM_CONDITIONAL([BUILD_GPU], [test "x${pack_gpu_enabled}" = "xyes"]) +if test "x${pack_mpi_not_gpu_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" = "xyes"; then AC_CONFIG_SUBDIRS([PG-PuReMD]) - if test "x${package_mpi_not_gpu_enabled}" = "xyes" || test "x${package_mpi_gpu_enabled}" != "xyes"; then + if test "x${pack_mpi_not_gpu_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" != "xyes"; then export BUILD_GPU="no" else export BUILD_GPU="yes" fi fi -AM_CONDITIONAL([BUILD_MPI_GPU], [test "x${package_mpi_not_gpu_enabled}" = "xyes" || test "x${package_mpi_gpu_enabled}" = "xyes"]) +AM_CONDITIONAL([BUILD_MPI_GPU], [test "x${pack_mpi_not_gpu_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" = "xyes"]) # Provides debug compilation mode. AC_ARG_ENABLE([debug], @@ -137,6 +145,14 @@ then export BUILD_SUPERLU_MT="${package_superlu_mt}" fi +AC_PROG_PDFLATEX +AM_CONDITIONAL([BUILD_DOC], [test "x${PDFLATEX}" != "x"]) + AC_CONFIG_FILES([Makefile]) +if test "x${PDFLATEX}" != "x" +then + AC_CONFIG_FILES([doc/Makefile]) +fi + AC_OUTPUT diff --git a/data/benchmarks/water/water_6540.pdb b/data/benchmarks/water/water_6540.pdb index 660883aa9057f06a4dd1561a567a5dc8821e0c6d..4c09dc5106e89661f7edd68efc65f0821ae58962 100644 --- a/data/benchmarks/water/water_6540.pdb +++ b/data/benchmarks/water/water_6540.pdb @@ -1,4 +1,4 @@ -CRYST1 40.299 40.299 40.299 90.00 90.00 90.00 0 +CRYST1 40.299 40.299 40.299 90.00 90.00 90.00 0 ATOM 1 O REX 1 5.690 12.751 11.651 1.00 0.00 0 O ATOM 2 H REX 1 4.760 12.681 11.281 1.00 0.00 0 H ATOM 3 H REX 1 5.800 13.641 12.091 1.00 0.00 0 H diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..7eedf6bbb79fdde0d84e39838d3cbf1633dba85c --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,16 @@ +docs = README.txt + +AM_V_PDFLATEX = $(AM_V_PDFLATEX_@AM_V@) +AM_V_PDFLATEX_ = $(AM_V_PDFLATEX_@AM_DEFAULT_V@) +AM_V_PDFLATEX_0 = @echo " PDFLATEX" $@; + +if BUILD_DOC +doc_DATA = manual.pdf + +manual.pdf: src/manual.tex + $(AM_V_PDFLATEX)$(PDFLATEX) $< + +CLEANFILES = manual.pdf manual.log manual.out manual.aux +endif + +dist_doc_DATA = ${docs} diff --git a/doc/manual.pdf b/doc/manual.pdf deleted file mode 100644 index ccd26a86b2c5faa2036da965cba886223a25103c..0000000000000000000000000000000000000000 Binary files a/doc/manual.pdf and /dev/null differ diff --git a/doc/src/manual.tex b/doc/src/manual.tex new file mode 100644 index 0000000000000000000000000000000000000000..257aaee3af94222f0593ab8989a504afcd69681e --- /dev/null +++ b/doc/src/manual.tex @@ -0,0 +1,651 @@ +%% +%% This is the PuReMD manual. +%% +\documentclass{article} + +\usepackage{hyperref} + + +\title{PuReMD Manual \\ + (Purdue Reactive Molecular Dynamics Program)} + +\author{Hasan Metin Aktulga} + +\begin{document} + +\maketitle + +This manual is for the two simulation programs which have +come to existence as a result of our ReaxFF realization efforts. +Our initial efforts have led to the SerialReax program, which is a +sequential implementation for ReaxFF. SerialReax has helped us in verifying +the accuracy of our implementation in C against the original ReaxFF code +which was developed in Fortran, such a task would be cumbersome in a parallel +context. It has also served as a test bed for quickly implementing new +algorithms and numerical techniques and benchmarking their effectiveness +before we incorporated them into the parallel version. + +PuReMD (Purdue Reactive Molecular Dynamics) program is based on the +sequential implemention, SerialReax, and highly efficient and scalable +parallelization techniques. It inherits the excellent +performance and small memory foot-print features of SerialReax and +extends these desirable capabilities to systems of sizes that are of +interest to computational scientists. + +For reasons described above, setting up a simulation and running it using +PuReMD or SerialReax is quite similar to each other. Therefore in this +manual, we take PuReMD as our basis and describe it first. In a following +section, we describe the extras that come with SerialReax which we hope +to incorporate into PuReMD in the near future. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Input Files} +\label{sec:puremd_inp} + +PuReMD expects 3 input files: a geometry file describing the system to be +simulated, a force field file containing ReaxFF parameters and a control +file to manage simulation variables. + +\subsection{Geometry File} +\label{sec:puremd_geo} + +Geometry file tells about the types and initial positions of the atoms +in the system. PuReMD supports geometry files in two formats: +the PDB format and our custom input format. It is also possible to +restart from an earlier simulation check-point using a restart file +(which can be either in ASCII or binary format as explained below). + +\subsubsection{PDB format} +\label{sec:puremd_pdb} + +For detailed and up-to-date information on the PDB format, please visit +\url{http://www.wwpdb.org/docs.html}. Input files of various other formats +can easily be converted to the pdb format using the freely available +OpenBabel software: (\url{http://openbabel.sourceforge.net/wiki/Main_Page}). + +In the geometry file, each atom is assigned a unique serial id to be +able to identify atoms easily during the simulation. PDB format limits the +number of digits in the atom serial field to only 5 digits, therefore the +maximum number of atoms that can be input using the PDB format is only +$100000$, way behind what is generally used for parallel MD simulations. + +\subsubsection{custom format} +\label{sec:puremd_custom} + +PuReMD features a very simple custom geometry format to alleviate the +maximum number of atoms limitation of the PDB format and to ease the task of +preparing a geometry file. The general layout of our custom GEO format is as +follows: The first line describes the simulation box and the second line +gives the total number of atoms in the system. These initial two lines need +to be followed by a single line for each atom describing it in detail. +Here is what a custom geo file looks like: +\begin{verbatim} +BOXGEO x_len y_len z_len alpha beta gamma +N +1 ele1 name1 x1 y1 z1 +2 ele2 name2 x2 y2 z2 +. +. +. +N eleN nameN xN yN zN +\end{verbatim} + +First three floating point numbers on the first line give the length of +the simulation box in x, y, z dimensions, the remaining ones are for the +angles between each box dimension. Currently, PuReMD works only with an +orthogonal box meaning all angles need to be $90.0$ degrees. + +There is no limit by the format on the number of atoms that can be input, +so N shown on the second line can be as large as allowed by the memory +limitations. Each atom line starting from line 3 and until line $N+2$ consists +of 6 fields: +\begin{itemize} + \item an integer denoting the atom's serial id + \item a string for the chemical symbol of the element + (2 characters max, case insensitive) + \item a string for the atom name (7 characters max) + \item 3 floating point numbers describing the position in cartesian + coordinates +\end{itemize} + + +\subsection{Force Field File} +\label{sec:puremd_ffield} + +Force field file contains the ReaxFF parameters to be used during +the simulation. Adri van Duin is the main developer and distributor +for Reax force fields, you can see his contact info at +\url{http://www.mne.psu.edu/vanduin/}. + +\subsection{Control File} +\label{sec:puremd_control} + +Parameters in the control file allow the user to tune various simulation +options. Parameter names are case-sensitive but their order is not important +(except that {\tt ensemble\_type} needs to precede both {\tt p\_mass} and +{\tt pressure}). Described below are the fields that you might use in a +control file. If a parameter is missing from the control file, its default +value (as given in each parameter's description below) will be assumed. +Each parameter must be specified in a single line, first token should be +the parameter and the second token should be an appropriate value. +Comments regarding a parameter can be included after the value field +on the same line. + +\begin{verbatim} + simulation_name test_puremd +\end{verbatim} +Output files produced by PuReMD will be in +{\tt simulation\_name.some\_extension} format. Output files will be +discussed in more detail in Section~\ref{sec:puremd_output}. Default value +is {\tt simulate}. + +\begin{verbatim} + ensemble_type 1 +\end{verbatim} +{\tt ensemble\_type} denotes the type of the ensemble to be produced by +PuReMD. Supported ensembles are as follows: +\begin{itemize} + \item 0: NVE + \item 1: bNVT: NVT with Berendsen thermostat + \item 2: nhNVT: NVT with Nose-Hoover thermostat (under testing) + \item 3: sNPT: semiisotropic NPT with Berendsen's coupling + \item 4: iNPT: isotropic NPT with Berendsen's coupling + \item 5: NPT: anisotropic NPT with Parrinello-Rehman coupling + (under development) +\end{itemize} +{\tt ensemble\_type} is NVE by default. + +\begin{verbatim} + nsteps 1000 + dt 0.25 +\end{verbatim} +{\tt nsteps} controls the total number of steps for the simulation and +{\tt dt} controls the length of each time step (measured in femtoseconds). +Number of steps is 0 by default and time step length is 0.25~fs. + +\begin{verbatim} + proc_by_dim 1 1 3 +\end{verbatim} +PuReMD uses the domain decomposition technique to distribute the load +among processors, it currently does not have dynamic load balancing. +{\tt proc\_by\_dim} denotes the desired decomposition of the simulation +box into subdomains (first integer is the number of equal-length +partitions in x dimension, second integer is for y dimension and +the last one is for z dimension). Each subdomain is subsequently assigned +to a single processor. PuReMD constructs a 3D torus based on the +{\tt proc\_by\_dim} parameter. The default is to use a single processor. +SerialReax does not accept the {\tt proc\_by\_dim} parameter. + +\begin{verbatim} + geo_format 0 +\end{verbatim} +{\tt geo\_format} parameter informs PuReMD about the format of the +geometry file to be read. Options are: +\begin{itemize} + \item 0: custom GEO format + \item 1: PDB format + \item 2: ASCII restart file + \item 3: binary restart file +\end{itemize} +PDB and custom formats were already discussed in Section~\ref{sec:puremd_geo}. +Another option is to resume from an older run by setting {\tt geo\_format} +to 2 (for ASCII restarts) or 3 (for binary restarts) and providing the name +of the restart file as an argument to PuReMD (instead of the GEO file name). +Then PuReMD will read the box geometry, positions and velocities for all +atoms in the system from the restart file and continue execution from thereon. +Default is the custom geometry format. + +\begin{verbatim} + restart_format 1 + restart_freq 0 +\end{verbatim} +PuReMD can output restart files in an ASCII format (when +{\tt restart\_format = 0}) or in a binary format (when {\tt restart\_format = 1}). +While ASCII restarts are good for portability, binary restart files are +much more compact and does not cause any loss of information due to +truncation of floating point numbers. Binary restart is the default. + +There will not be any restart files output unless {\tt restart\_freq} +parameter is set to a positive integer. A restart file is named as follows: +{\tt simulation\_name.resS} where {\tt S} denotes the step that the restart +file is written. + +\begin{verbatim} + tabulate_long_range 10000 +\end{verbatim} +When set to $m$ (must be a positive integer), {\tt tabulate\_long\_range} +option turns on the tabulation optimization for computing electrostatics and +van der Waals interactions. The range [0, cutoff] is sampled at $m$ equally +spaced points; energy and forces due to long range interactions between each +atom type in the system are computed at each of these sample points and +are stored in a table. Then for each interval, coefficients of a fitted +cubic spline interpolation function are computed. During the simulation +while computing the long range interactions between any two atoms, +the appropriate interpolation function is located and energy and forces +between the atom pair is approximated by means of cubic spline interpolation. +This method gives significant speed-up compared to computing everything from +scratch each time and with only 10000 sample points it is able to provide +results with an accuracy at machine precision level. Default is no tabulation. + +\begin{verbatim} + energy_update_freq 10 +\end{verbatim} +This option controls the frequency of writes into output files described +in detail in Section~\ref{sec:puremd_output} (except for the trajectory +and restart files which are controlled by other parameters explained +separately). The default value for this parameter is 0, meaning there will +not be any energies and performance logs output. + +\begin{verbatim} + remove_CoM_vel 500 +\end{verbatim} +Removal of translational and rotational velocities around the center of +mass needs to be done for NVT and NPT type ensembles to remove the +nonphysical effects of scaling velocities. In case of NVE, this is +unnecessary and is not done regardless of the value of {\tt remove\_CoM\_vel}. +The default is to remove translational and rotational velocities at +every 250 steps. + +\begin{verbatim} + nbrhood_cutoff 5.0 + thb_cutoff 0.001 + hbond_cutoff 7.50 +\end{verbatim} +These cutoff parameters are crucial for the correctness and efficiency +of PuReMD. Normally, bonded interactions are truncated after 4-5~\AA\ in +ReaxFF and this is controlled by the {\tt nbrhood\_cutoff} parameter +whose default value is 4~\AA. + +{\tt thb\_cutoff} sets the bond strength threshold for valence angle +interactions. Bonds which are weaker than {\tt thb\_cutoff} will not +be included in valence angle interactions. Default for {\tt thb\_cutoff} +is 0.001. + +{\tt hbond\_cutoff} controls the distance between the donor and acceptor +atoms in a hydrogen bond interaction. Its typical value is from 6\AA\ to +7.5~\AA. If {\tt hbond\_cutoff} is set to 0, hydrogen bond interactions +will be turned off completely (could be useful for improved +performance in simulations where it is known apriori that there are no +hydrogen bonding interactions). Default is to set {\tt hbond\_cutoff} to 0. + +\begin{verbatim} + reneighbor 10 + vlist_buffer 2 +\end{verbatim} +PuReMD features delayed neighbor generation by using Verlet lists. +{\tt reneighbor} controls the reneighboring frequency and {\tt vlist\_buffer} +controls the buffer space beyond the maximum ReaxFF interaction cutoff. +By default, {\tt vlist\_buffer} is set to 0 and reneighboring is done at +every step. + +\begin{verbatim} + q_err 1e-6 + qeq_freq 1 +\end{verbatim} +PuReMD uses a preconditioned conjugate gradients (PCG) solver with a +diagonal preconditioner for the QEq problem. {\tt q\_err} denotes the +stopping criteria for the PCG solver, the norm of the relative residual. +A lower threshold would yield more accurate equilibration of charges at +the expense of an increase in computation time. A threshold of $10^{-6}$ +should be good enough for most cases and this is the default value. + +{\tt qeq\_freq} can be used to perform charge equilibration at every +few steps instead of the default behaviour of performing it at every +step. Although doing QEq less frequently would save important +computational time, it is not recommended. Because this might cause wild +fluctuations in energies and forces. + +\begin{verbatim} + temp_init 0.0 + temp_final 300.0 + t_mass 0.1666 +\end{verbatim} +Temperature coupling parameters ({\tt temp\_final} and {\tt t\_mass}) are +effective in all types of ensembles except for NVE. Initial temperature +is controlled via the {\tt temp\_init} parameter including the NVE ensemble. +0~K is the default value for {\tt temp\_init} and 300~K is the default value +for {\tt temp\_final}. PuReMD features both Berendsen~\cite{ref:berendsen} +and Nose-Hoover~\cite{ref:klein} type thermostats as was mentioned while +explaining the {\tt ensemble\_type} parameter. +\emph{Important note: Nose-Hoover thermostat in PuReMD is still under testing.} + +{\tt t\_mass} is the thermal inertia given in femtoseconds. Suggested (and +the default) value of {\tt t\_mass} is 500.0, and 0.166 for the Berendsen +thermostat, and for the Nose-Hoover thermostat, respectively. + +\begin{verbatim} + pressure 0.000101 0.000101 0.000101 + p_mass 5000.0 5000.0 5000.0 +\end{verbatim} +Pressure coupling parameters are needed only when working with NPT-type +ensembles. Currently iNPT (isotropic NPT) and sNPT (semi-isotropic NPT) +are the available pressure coupling ensembles in PuReMD. Berendsen +thermostats and barostats are used in both cases~\cite{ref:berendsen}. +{\tt pressure} is the desired pressure of the system in GPa and {\tt p\_mass} +is the virial inertia in femtoseconds. Suggested (and the default) value of +{\tt p\_mass} is 5000.0 together with a {\tt t\_mass} of 500.0 as NPT methods +use Berendsen-type thermostats only. + +For the iNPT ensemble, {\tt pressure} parameter expects a single +floating number (in case there are more, they will simply be ignored) +to control pressure. For the sNPT ensemble, {\tt pressure} parameter +expects 3 floating point numbers to control pressure on each dimension. +Same things apply for {\tt p\_mass} as well. + +\begin{verbatim} + write_freq 100 + traj_method 1 +\end{verbatim} +Trajectory of the simulation will be output to the trajectory file +(which will automatically be named as {\tt simulation\_name.trj}) at +every {\tt write\_freq} steps. For making analysis easier, the trajectory +file is written as an ASCII file. By default, no trajectory file +is written. + +PuReMD can output trajectories either using simple MPI send/receives +(option 0 which is the default) or using MPI I/O calls (option 1) which +are part of the MPI-2 standard. The latter option is supposed to be more +efficient (not verified by tests though) but may not be available in some +MPI implementations. {\tt traj\_method} option is not applicable to +SerialReax simulations. + +\begin{verbatim} + traj_title TEST + atom_info 1 + atom_forces 1 + atom_velocities 1 + bond_info 0 + angle_info 0 +\end{verbatim} +Currently PuReMD only outputs trajectories in its custom trajectory +format. This custom format starts with a trajectory header detailing +the trajectory title and the values of control parameters used for +the simulation. A brief description of atoms follow the trajectory +header with atom serial ids and what element each atom is. + +Then at each {\tt write\_freq} steps (including step 0), a trajectory +frame is appended to the trajectory file. The frame header which gives +information about various potential energies, temperature, pressure and +box geometry is standard. However, the latter parts of the frame can be +customized using {\tt atom\_info}, {\tt atom\_forces}, {\tt atom\_velocities}, +{\tt bond\_info} and {\tt angle\_info} parameters which are already +self-explanatory. The ordering is atoms section, bonds section and angles +section assuming that they are all present. By default, all atom, bond and +angle information outputting is turned off. + +One nice property of the custom trajectory format is that each part of +the trajectory is prepended by a number that can be used to skip that part. +For example, the trajectory header is prepended by an integer giving the +number of characters to skip the control parameters section. The initial +atom descriptions is prepended by the number of characters to skip the +initial descriptions part and another one that tells the number of atom +description lines. Similar numbers are found at the start of each section +within a trajectory frame as well, making it easy to skip parts which are +not of interest to a particular trajectory analysis procedure. So the +general layout of our custom trajectory format is as follows (assuming +all trajectory options are turned on): +\begin{verbatim} +CHARS_TO_SKIP_SECTION +trajectory header +CHARS_TO_SKIP_ATOM_DESCS NUM_LINES +atom descriptions +CHARS_TO_SKIP_FRAME_HEADER +frame1 header +CHARS_TO_SKIP_ATOM_LINES NUM_ATOM_LINES +frame1 atom info +CHARS_TO_SKIP_BOND_LINES NUM_BOND_LINES +frame1 bond info +CHARS_TO_SKIP_ANGLE_LINES NUM_ANGLE_LINES +frame1 angle info +. +. +. +CHARS_TO_SKIP_FRAME_HEADER +frameN header +CHARS_TO_SKIP_ATOM_LINES NUM_ATOM_LINES +frameN atom info +CHARS_TO_SKIP_BOND_LINES NUM_BOND_LINES +frameN bond info +CHARS_TO_SKIP_ANGLE_LINES NUM_ANGLE_LINES +frameN angle info +\end{verbatim} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{SerialReax Extras} +\label{sec:serialreax_extras} + +In this section, we explain the parameters found in SerialReax but not in +PuReMD. Our work towards adding the same functionalities into PuReMD is +underway. + +In addition to the PCG solver, SerialReax features a preconditioned GMRES +(PGMRES) solver and an incomplete LU factorization (ILU) based +preconditioning scheme. An ILU factorization essentially does the +same thing as an LU factorization but small terms in the matrix are dropped +to expedite the factorization and to prevent a huge number of fill-ins in the +factor matrices. Following are the extra control parameters found in +SerialReax regarding the QEq solver: +\begin{verbatim} + ilu_refactor 100 + ilu_droptol 0.01 +\end{verbatim} +{\tt ilu\_droptol} sets the threshold for dropping small terms in the +resulting ILU factors. Suggested (and the default) value for +{\tt ilu\_droptol} is $10^{-2}$. Despite the drop rules, ILU factorization +is still a costly operation. So a user can choose to perform it at +every {\tt ilu\_refactor} steps. The fact that atoms move very slowly in an +MD simulation allows the use of same ILU factors as preconditioners in the +subsequent steps with little performance loss. For liquids, this frequency +can be on the order of 100-200 steps, for solids it can go up to thousands +of steps depending on how fast atoms are moving. The default for +{\tt ilu\_refactor} is 100. + +\begin{verbatim} + t_mode 0 + t_rate -100.0 + t_freq 2.0 +\end{verbatim} +These options are specifically for being able to change the temperature of +the system during a simulation. {\tt t\_mode} of 1 gives a step-wise +control over temperature, \emph{i.e.} the system maintains its temperature +for a simulation time of {\tt t\_freq} picoseconds. After that, the +target temperature is increased/decreased by {\tt t\_rate}~K and the +thermostat lets the system converge to its new target temperature. + +On the other hand, {\tt t\_mode} of 2 increases/decreases the target +temperature at each time-step by an amount which corresponds to +{\tt t\_rate / t\_freq}~K/ps. The overall effect of such a regime is a +constant slope (instead of the step pattern with a {\tt t\_mode} of 1) +in the target temperature--simulation time graph. + +\begin{verbatim} + molec_anal 1 + freq_molec_anal 1 + bond_graph_cutoff 0.3 + ignore 2 0 3 +\end{verbatim} +Since ReaxFF is a reactive force field, during the simulation molecules +present in the system will change. These changes can be traced by turning +the {\tt molec\_anal} option on by setting it to a non-zero integer. +Molecules are determined based on the {\tt bond\_graph\_cutoff} parameter: +bond orders less than this threshold are not counted as a physical +bond and do not contribute to molecular structures, all others do. +{\tt ignore} allows one to ignore the bondings of specific atom types. +The first number after {\tt ignore} denotes how many atom types will be +listed and the following numbers (which correspond to the order of +elements in the force field file) denote the atom types to be ignored +in molecular analysis. + + +%\begin{verbatim} +% dipole_anal 1 ! 1: calc electric dipole moment +% freq_dipole_anal 1 ! electric dipole calc freq +%\end{verbatim} +%Currently electric dipole moment analysis is available for water molecules +%only but it can be extended to other molecule types upon request. + + +%\begin{verbatim} +% diffusion_coef 1 ! 1: calc diffusion coef +% freq_diffusion_coef 1 ! diffusion coef calc freq +% restrict_type 2 ! -1: all, >=0: only this type +%\end{verbatim} +%Our program allows you to compute the diffusion coefficient of +%the system, too. It can be restricted to certain types of atoms +%if desired. + +%\begin{verbatim} +% reposition_atoms 0 ! 1: CoM-centered, 2: CoM-origin +%\end{verbatim} +%This option lets you position the atoms based on your choice. +%Option $0$ will simply fit the atoms into the periodic box. +%Option $1$ will translate the atoms such that the center of mass +%will be at the center of the box, +%whereas option $2$ positions the center of mass to the origin. +%Options $1$ and $2$ may need further testing, so it is safer to +%use option $0$ for now. + +%\begin{verbatim} +% restrict_bonds 0 ! turns reactions on and off +%\end{verbatim} +%When set to $m$ (must be a positive integer), this option enforces +%bonds given in CONECT lines of geometry file for the first $m$ +%steps of the simulation. +%This is done by including only the atoms given on the CONECT lines +%among the near neighbors of an atom. +%Turning this option would probably produce nonphysical trajectories +%but may be useful for energy minimization purposes. + +% \begin{verbatim} +% periodic_boundaries 1 ! 1: periodic boundaries on +% periodic_images 3 3 3 ! no of images in each direction +% \end{verbatim} +% Above parameters are concerned with the boundary conditions of the +% system. +% Currently only periodic boundaries are supported but non-periodic boundaries +% effect can be accomplished by making the simulation box big enough. +% Note that adding an empty space of \emph{r\_cut} \AA (discussed below) in +% \emph{x,y,z} dimensions will be enough for a completely non-periodic +% simulation box. +% Or if desired, any combination of dimensions might be made non-periodic +% by adding this empty space to them and letting others end where the system +% ends. +% Since currently we are implementing shielded electrostatic interactions, +% \emph{periodic\_images} is not effective yet. It will be required when +% electrostatic interactions across periodic images are implemented. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Compilation and Execution} +\label{sec:puremd_execute} + +PuReMD is distributed in the {\tt tar.gz} compression format which can +be extracted under a Unix system with the following command: +\begin{verbatim} + gtar xvzf PuReMD.tar.gz +\end{verbatim} + +This results in the creation of a new directory, named {\tt PuReMD}, will appear in the working +directory. It contains the source code directory ({\tt src}) +along with a directory for sample systems ({\tt examples}). + +PuReMD can be compiled by switching to the {\tt src} directory and +running {\tt make}. The executable, {\tt puremd}, will be created inside +the source directory. The Makefile that comes in the distribution assumes +OpenMPI as the default MPI implementation and {\tt mpicc} as the default +MPI compiler. In case you have a different MPI implementation, +please set your MPI compiler in the Makefile appropriately. + +PuReMD requires 3 input files as mentioned in section~\ref{sec:puremd_inp}. +For example, the command to run {\tt puremd} with OpenMPI is as follows: +\begin{verbatim} + mpirun -np #p -machinefile m.txt puremd geo ffield control +\end{verbatim} + +SerialReax comes in a similar distribution format and Makefile, +so instructions for compiling and running PuReMD is applicable for +SerialReax as well. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Output} +\label{sec:puremd_output} + +PuReMD writes its output files into the directory where it is run. +There are a number of output files all of which have the +{\tt simulation\_name} as the first part of their names followed +by its unique extension: + +\begin{itemize} + \item{\textbf{.out}} contains a summary of the simulation progress. + Its format is: + \begin{verbatim} + Step Total Energy Potential Kinetic + T (in K) Volume(in A^3) P(in GP) + \end{verbatim} + +\item{\textbf{.pot}} contains detailed information regarding various types of + energies that comprise the total potential energy: + \begin{verbatim} + Step Bonds OverCoor+UnderCoor LonePair + Angle+Penalty 3-body Coalition Hydrogen Bonds + Torsion 4-body Conjugation + vander Waals Coulomb Polarization + \end{verbatim} + +\item{\textbf{.log}} is intended for performance tracking purposes. + It displays the total time per step and what parts of code take + up how much time to compute. + + \item{\textbf{.prs}} is output only when pressure coupling is on. + It displays detailed information regarding the pressure and + box dimensions as simulation progresses. + + \item{\textbf{.trj}} is the trajectory file. Atom positions are written + into this file at every {\tt write\_freq} steps using the desired format + as explained before. Each frame is concatenated one below the other. + + %\item{\textbf{.dpl}} is for dipole moment analysis: + %\begin{verbatim} + %Step Molecule Count Avg Dipole Moment + %\end{verbatim} + % + %\item{\textbf{.drft}} is for diffusion coefficient analysis: + %\begin{verbatim} + %Step Type Count Avg Squared Displacement + %\end{verbatim} +\end{itemize} + +Apart from these, there might be some text printed to \emph{stderr} +for debugging purposes. If you encounter some problems with the code +(like a segmentation fault or unexpected termination of the code), +please contact \href{mailto:haktulga@cs.purdue.edu}{haktulga@cs.purdue.edu} with the error message +printed to \emph{stderr} and your input files. + +In addition to the output files above, SerialReax can output another +file (with extension \textbf{.mol}) which contains the fragmentation +analysis output. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{thebibliography}{99} + \bibitem{ref:klein} + Glenn J. Martyna, Douglas J. Tobias, and Michael L. Klein. + ``Constant pressure molecular dynamics algorithms.'' + The Journal of Chemical Physics 101, 4177 (1994). + + \bibitem{ref:berendsen} + H. J. C. Berendsen, J. P. M. Postma, W. F. van Gunsteren, A. DiNola, and + J. R. Haak. + ``Molecular dynamics with coupling to an external bath.'' + The Journal of Chemical Physics 81, 3684-3690 (1984). + +\end{thebibliography} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\end{document} diff --git a/tools/geo2lmp_reax.awk b/tools/geo2lmp_reax.awk new file mode 100644 index 0000000000000000000000000000000000000000..9a5823f290875c60d4efe3248f2958c42fb37236 --- /dev/null +++ b/tools/geo2lmp_reax.awk @@ -0,0 +1,38 @@ +BEGIN{ + comment = "water strong scaling test"; + ntypes = 2; + masses[1] = 1.0080; + masses[2] = 15.990; + types["H"] = 1; + types["O"] = 2; + box[0] = box[1] = box[2] = 0; +} +{ + if( $1 == "BOXGEO" ) { + box[0] = $2; + box[1] = $3; + box[2] = $4; + } + else if( NF == 1 ) { + natoms = $1; + # print the header + print "#", comment, "\n"; + + print natoms, "atoms"; + print ntypes, "atom types\n"; + + print "0", box[0], "xlo", "xhi"; + print "0", box[1], "ylo", "yhi"; + print "0", box[2], "zlo", "zhi\n"; + + print "Masses\n"; + for( i = 1; i <= ntypes; ++i ) + print i, masses[i]; + + print "\nAtoms\n"; + } + else{ + # print the atom info + print $1, types[$2], "0", $4, $5, $6; + } +} \ No newline at end of file diff --git a/tools/geo_tool.py b/tools/geo_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..9f6a61ed1ce0b1653e45847ef49731e824be8482 --- /dev/null +++ b/tools/geo_tool.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +import argparse +import contextlib +from os import linesep +from re import match, sub +import sys + + +FORMATS=["PDB", "PUREMD", "REAXFF_FORTRAN"] + + +@contextlib.contextmanager +def smart_open(filename=None): + if filename and filename != '-': + fh = open(filename, 'w') + else: + fh = sys.stdout + + try: + yield fh + finally: + if fh is not sys.stdout: + fh.close() + + +def count_atoms(input_file, input_format): + count = 0 + regex = lambda x: False + if input_format == "PDB": + regex = lambda l: match(r'ATOM ', l) != None + elif input_format == "PUREMD": + regex = lambda l: match(r'BOXGEO', l) == None + elif input_format == "REAXFF_FORTRAN": + regex = lambda l: match(r'HETATM', l) != None + + with open(input_file, 'r') as infile: + for line in infile: + if regex(line): + count = count + 1 + + return count + + +def convert(args): + patterns = { \ + ("PDB", "PUREMD"): [ \ + lambda l, **kwargs: (sub(r'^ATOM ([ 0-9]{5}) (.{4}).{14}([. 0-9]{8})([. 0-9]{8})([. 0-9]{8}).{22}([ a-zA-z]{2}).{2}$', + r'\1 \2 \6 \3 \4 \5', l), True) if not match(r'ATOM ', l) == None else (l, False), + lambda l, **kwargs: (sub(r'^CRYST1([. 0-9]{9})([. 0-9]{9})([. 0-9]{9})([. 0-9]{7})([. 0-9]{7})([. 0-9]{7}) .{15}$', + r'BOXGEO \1 \2 \3 \4 \5 \6', l), False) if not match(r'CRYST1', l) == None else (l, False), + lambda l, **kwargs: (l + str(kwargs['atom_count']) + linesep, True) + if not match(r'BOXGEO', l) == None else (l, False) + ], + ("PDB", "REAXFF_FORTRAN"): [ \ + lambda l, **kwargs: ('HETATM {:5d} {:<5} {:3} {:1} {:5}{:10.5f}{:10.5f}{:10.5f} {:5}{:3d}{:2d} {:8.5f}'.format( + int(l[6:11].strip()), l[76:78].strip(), ' ', ' ', ' ', float(l[30:38].strip()), + float(l[38:46].strip()), float(l[46:54].strip()), l[76:78].strip(), + 1, 1, 0.0) + linesep, True) + if not match(r'ATOM ', l) == None else (l, False), + lambda l, **kwargs: (l, True), + ], + ("PUREMD", "PDB"): [ \ + #TODO + ], + ("PUREMD", "REAXFF_FORTRAN"): [ \ + #TODO + ], + ("REAXFF_FORTRAN", "PDB"): [ \ + #TODO + ], + ("REAXFF_FORTRAN", "PUREMD"): [ \ + #TODO + ], + } + + ac = count_atoms(args.input_file, args.input_format) + + with open(args.input_file, 'r') as infile, smart_open(args.output_file) as outfile: + for line in infile: + for p in patterns[(args.input_format, args.output_format)]: + try: + (line, matched) = p(line, atom_count=ac) + except Exception as exc: + raise RuntimeError('Invalid input format') + if matched: + break + else: + continue + outfile.write(line) + + +def replicate(args): + #TODO + print(args) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Munge geometry file(s) for molecular dynamics simulations.') + subparsers = parser.add_subparsers(dest='cmd') + subparsers.required = True + + # convert subcommand + parser_convert = subparsers.add_parser('convert', aliases=['c', 'co'], + help='Convert between geometry formats.') + parser_convert.add_argument('input_format', metavar='in_format', + choices=FORMATS, help='input format') + parser_convert.add_argument('input_file', metavar='in_file', + help='input geometry file') + parser_convert.add_argument('output_format', metavar='out_format', + choices=FORMATS, help='output format') + parser_convert.add_argument('output_file', metavar='out_file', nargs='?', + help='output geometry file') + parser_convert.set_defaults(func=convert) + + # replicate subcommand + parser_replicate = subparsers.add_parser('replicate', aliases=['r', 'rep'], + help='Replicate geometry by prescribed factors in X, Y, and Z dimensions.') + parser_replicate.add_argument('input_format', metavar='in_format', + choices=FORMATS, help='input format') + parser_replicate.add_argument('input_file', metavar='in_file', + help='input geometry file') + parser_replicate.add_argument('X repl', metavar='X', type=int, + help='replication factor in X dimension') + parser_replicate.add_argument('Y repl', metavar='Y', type=int, + help='replication factor in Y dimension') + parser_replicate.add_argument('Z repl', metavar='Z', type=int, + help='replication factor in Z dimension') + parser_replicate.add_argument('output_file', metavar='out_file', nargs='?', + help='output geometry file') + parser_replicate.set_defaults(func=replicate) + + # parse args and take action + args = parser.parse_args() + args.func(args) diff --git a/tools/lmp2pdb.awk b/tools/lmp2pdb.awk new file mode 100644 index 0000000000000000000000000000000000000000..482bd5026f9d7d9e4b450ff04100db2ab61b55a3 --- /dev/null +++ b/tools/lmp2pdb.awk @@ -0,0 +1,106 @@ +BEGIN{ + box_flag = 0; + natoms = -1; + ntypes = -1; + atom_style = "charge"; +} +{ + # num atoms + if( $2 == "atoms" ) + natoms = $1; + # num atom types + else if( $2 == "atom" && $3 == "types" ) + ntypes = $1; + # box geometry + else if( $3 == "xlo" && $4 == "xhi" ) { + box[0] = $2 - $1; + ++box_flag; + } + else if( $3 == "ylo" && $4 == "yhi" ) { + box[1] = $2 - $1; + ++box_flag; + } + else if( $3 == "zlo" && $4 == "zhi" ) { + box[2] = $2 - $1; + ++box_flag; + } + # atom masses + else if( $1 == "Masses" ) { + if( ntypes <= 0 ) { + printf( "number of atom types can not be %d!\n", ntypes ); + exit; + } + + getline; # skip the empty line + for( i = 0; i < ntypes; ++i ) { + getline; + if( NF != 2 ) { # expect one integer, one float + printf( "unexpected mass line format: %s!\n", $0 ); + exit; + } + + # record the atom type + if( $2 == 12.0000 ) + types[$1] = "C"; + else if( $2 == 1.0080 ) + types[$1] = "H"; + else if( $2 == 15.9990 ) + types[$1] = "O"; + else if( $2 == 14.0000 ) + types[$1] = "N"; + else { + printf( "unknown atom type!\n" ); + exit; + } + } + } + # atom info + else if( $1 == "Atoms" ) { + if( natoms <= 0 ) { + printf( "number of atoms can not be %d!\n", natoms ); + exit; + } + + getline; # skip the empty line + for( i = 0; i < natoms; ++i ) { + getline; + if( NF != 6 ) { # expect 3 ints, 3 floats + printf( "unexpected atom line format: %s!\n", $0 ); + exit; + } + + atoms[i,"serial"] = $1; + atoms[i,"type"] = types[$2]; + atoms[i,"q"] = $3; + atoms[i,"x"] = $4; + atoms[i,"y"] = $5; + atoms[i,"z"] = $6; + } + } + else if( $1 == "#" ) + 1; # skip the comment + else if( NF == 0 ) + 1; # skip the empty line + else { + printf( "unexpected line: %s\n", $0 ); + exit; + } +} +END{ + if( box_flag != 3 ) { + printf( "incorrect box geometry!\n" ); + exit; + } + + printf( "%6s%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f%11s%4d\n", + "CRYST1", box[0], box[1], box[2], 90, 90, 90, "P", 1 ); + + for( i = 0; i < natoms; ++i ) { + printf( "%-6s%5d%5s%c%3s %c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f "\ + "%-4s%2s%2s\n", "ATOM", atoms[i,"serial"], atoms[i,"type"], " ", + "REX", " ", 1, " ", atoms[i,"x"], atoms[i,"y"], atoms[i,"z"], + 1.0, 0.0, "0", atoms[i,"type"], " " ); + } + + printf( "END\n" ); +} \ No newline at end of file