From 375524ed2a578a6e8d353373edbd24977c767790 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <>
Date: Wed, 29 Jul 2020 12:51:15 -0400
Subject: [PATCH] Tools: add PuReMD custom geometry replication to
 Remove older awk scripts. Fix issue with silica 6000 atom PDB file (CRYST1
 lines not 70 characters).

 data/benchmarks/silica/silica_6000.pdb |  2 +-
 tools/                      | 61 +++++++++++++++++++++--
 tools/replicate.bash                   | 67 --------------------------
 tools/replicate_geo.awk                | 60 -----------------------
 4 files changed, 59 insertions(+), 131 deletions(-)
 delete mode 100644 tools/replicate.bash
 delete mode 100644 tools/replicate_geo.awk

diff --git a/data/benchmarks/silica/silica_6000.pdb b/data/benchmarks/silica/silica_6000.pdb
index e5c58216..ef99b2e4 100644
--- a/data/benchmarks/silica/silica_6000.pdb
+++ b/data/benchmarks/silica/silica_6000.pdb
@@ -1,4 +1,4 @@
-CRYST1   36.477   50.174   52.110  90.00  90.00  90.00              0
+CRYST1   36.477   50.174   52.110  90.00  90.00  90.00              0 
 ATOM      1    O REX     1      56.987  39.868  41.795  1.00  0.00      0    O  
 ATOM      2    O REX     1      32.795  24.104  25.968  1.00  0.00      0    O  
 ATOM      3    O REX     1      26.543  26.261  36.254  1.00  0.00      0    O  
diff --git a/tools/ b/tools/
index c5fc3aa7..c39cf1c1 100644
--- a/tools/
+++ b/tools/
@@ -130,7 +130,7 @@ def replicate(args):
         with open(out_file, 'w') as outfile:
             # box dimensions
-            outfile.write('{:6}{:9.3f}{:9.3f}{:9.3f}{:7}{:7}{:7}{:11}{:4}'.format(
+            outfile.write('{:6}{:9.3f}{:9.3f}{:9.3f}{:7}{:7}{:7} {:11}{:4}'.format(
                 'CRYST1', x_dim * args.X_repl, y_dim * args.Y_repl, z_dim * args.Z_repl,
                 cryst_line[33:40], cryst_line[40:47], cryst_line[47:54],
                 cryst_line[54:65], cryst_line[65:69]) + linesep)
@@ -152,8 +152,63 @@ def replicate(args):
                             count = count + 1
     elif args.input_format == 'PUREMD':
-        #TODO
-        pass
+        atom_count = 0
+        atom_lines = []
+        # simulation box info
+        x_dim = 0.0
+        y_dim = 0.0
+        z_dim = 0.0
+        x_ang = 0.0
+        y_ang = 0.0
+        z_ang = 0.0
+        with open(args.input_file, 'r') as infile:
+            # BOXGEO line
+            line = infile.readline()
+            line = line.split()
+            x_dim = float(line[1])
+            y_dim = float(line[2])
+            z_dim = float(line[3])
+            x_ang = float(line[4])
+            y_ang = float(line[5])
+            z_ang = float(line[6])
+            # atom count line
+            line = infile.readline()
+            line = line.split()
+            atom_count = int(line[0])
+            # atom info lines
+            for line in infile:
+                atom_lines.append(line.split())
+        if args.output_file:
+            out_file = args.output_file
+        else:
+            out_file = 'output.geo'
+        with open(out_file, 'w') as outfile:
+            # BOXGEO line
+            outfile.write('{:6} {:f} {:f} {:f} {:f} {:f} {:f}'.format(
+                'BOXGEO', x_dim * args.X_repl, y_dim * args.Y_repl, z_dim * args.Z_repl,
+                x_ang, y_ang, z_ang) + linesep)
+            # atom count line
+            outfile.write('{:d}'.format(atom_count * args.X_repl * args.Y_repl * args.Z_repl) + linesep)
+            # atom info lines
+            count = 1
+            for x in range(args.X_repl):
+                for y in range(args.Y_repl):
+                    for z in range(args.Z_repl):
+                        for l in atom_lines:
+                            outfile.write(' {:d} {:2} {:2} {:f} {:f} {:f}'.format(
+                                count, l[1], l[2],
+                                float(l[3]) + x * x_dim,
+                                float(l[4]) + y * y_dim,
+                                float(l[5]) + z * z_dim ) + linesep)
+                            count = count + 1
 def noise(args):
diff --git a/tools/replicate.bash b/tools/replicate.bash
deleted file mode 100644
index d5bf82f3..00000000
--- a/tools/replicate.bash
+++ /dev/null
@@ -1,67 +0,0 @@
-function usage
-	echo -e "\tusage: $0 format x_dim y_dim z_dim in_file [out_file]"
-	exit 1
-function repl_geo
-	if [ -z "${5:+x}" ]; then
-		FILE=$(basename "$1")
-		FILE="${FILE%.*}.geo"
-	else
-		FILE="$5"
-	fi
-	# 1. Convert the pdb file to custom format which is an intermediate format
-	"$AWK" '{if($1=="CRYST1") print "BOXGEO", $2, $3, $4, $5, $6, $7; if($1=="ATOM") print $2, $3, $12, $6, $7, $8;}' \
-	       	"$1" >& "$FILE.cus"
-	# 2. Convert the custom format to geo format with the following command
-	#     replicate_geo.awk script can also be used to replicate the box in 
-	#     any of the x, y, z directions. They are set as 1, 1, 1 for now. 
-	#     If you want to replicate it, just change these numbers at the beginning
-	#    of the replicate_geo.awk file.
-	"$SED" -e "s/replicate_x = .*/replicate_x = $2;/" \
-	       -e "s/replicate_y = .*/replicate_y = $3;/" \
-       	       -e "s/replicate_z = .*/replicate_z = $4;/" \
-	       -i "$GEO"
-	"$AWK" -f "$GEO" "$FILE.cus" > "$FILE"
-	rm "$FILE.cus"
-	# 3. Open the geo file and insert a line below the BOXGEO line
-	#     BOXGEO is the very first line which contains the BOX geometry
-	#    The new line to be inserted contains the number of atoms in the
-	#     newly created geo file. This number should be equal to the 
-	#     total number of lines in the geo file - (2). one is the first line
-	#     which is the BOXGEO line and second one is for the newly inserted line.
-	LC=$(($(cat "$FILE" | wc -l)-1))
-	"$SED" -e "s/BOXGEO.*/&\n$LC/" -i "$FILE"
-function repl_pdb
-	exit
-GEO="$(dirname $0)/replicate_geo.awk"
-if [ "$#" -ne 5 -a "$#" -ne 6 ]; then
-	usage
-if [ "$1" == "geo" ]; then
-	repl_geo "$5" "$2" "$3" "$4" "$6"
-elif [ "$1" == "pdb" ]; then
-	repl_pdb "$5" "$2" "$3" "$4" "$6"
diff --git a/tools/replicate_geo.awk b/tools/replicate_geo.awk
deleted file mode 100644
index 2231365a..00000000
--- a/tools/replicate_geo.awk
+++ /dev/null
@@ -1,60 +0,0 @@
-# replicates an orthogonal box as many times as given
-# by replicate_x, replicate_y, replicate_z parameters
-# while reading the pdb file, some problems may arise
-# if the fields are not separated as expected. 
-# make sure to check for those if output is not as expected.
-    replicate_x = 1;
-    replicate_y = 1;
-    replicate_z = 1;
-    num_atoms = 0;
-    # collect atom info
-    if( $1 == "BOXGEO" ) {
-	box[0] = $2;
-	box[1] = $3;
-	box[2] = $4;
-	box_ang[0] = $5;
-	box_ang[1] = $6;
-	box_ang[2] = $7;
-    }
-    else if( NF == 6 ) {
-	atom_id[num_atoms] = $1; 
-	atom_type[num_atoms] = $2;
-	atom_name[num_atoms] = $3;
-	atom_pos[num_atoms, 0] = $4;
-	atom_pos[num_atoms, 1] = $5;
-	atom_pos[num_atoms, 2] = $6;
-	++num_atoms;
-    }
-    new_atoms = 0;
-    printf( "%6s %9.3f %9.3f %9.3f %9.3f %9.3f %9.3f\n",
-	    "BOXGEO", box[0]*replicate_x, box[1]*replicate_y, box[2]*replicate_z, 
-	    box_ang[0], box_ang[1], box_ang[2] );
-    for( i = 0; i < replicate_x; ++i )
-	for( j = 0; j < replicate_y; ++j )
-	    for( k = 0; k < replicate_z; ++k ){
-		base_x = i * box[0];
-		base_y = j * box[1];
-		base_z = k * box[2];
-                for( n = 0; n < num_atoms; ++n ) {
-		    ref = new_atoms % num_atoms;
-		    printf( "%d %s %s %8.3f %8.3f %8.3f\n",\
-			    new_atoms+1, atom_type[ref], atom_name[ref], 
-			    base_x + atom_pos[ref,0], 
-			    base_y + atom_pos[ref,1], 
-			    base_z + atom_pos[ref,2] );
-		    ++new_atoms;
-		}
-	    }