diff --git a/sPuReMD/src/bond_orders.c b/sPuReMD/src/bond_orders.c
index 686cf3b0318f53fcc735d73884f82bfb753dab19..7a0358e5063df9b736ea66754a98065a748675d3 100644
--- a/sPuReMD/src/bond_orders.c
+++ b/sPuReMD/src/bond_orders.c
@@ -26,16 +26,16 @@
 #include "vector.h"
 
 
-inline real Cf45( real p1, real p2 )
+static inline real Cf45( real p1, real p2 )
 {
-    return  -EXP(-p2 / 2) /
-        ( SQR( EXP(-p1 / 2) + EXP(p1 / 2) ) * (EXP(-p2 / 2) + EXP(p2 / 2)) );
+    return  -EXP(-p2 / 2.0) /
+        ( SQR( EXP(-p1 / 2.0) + EXP(p1 / 2.0) ) * (EXP(-p2 / 2.0) + EXP(p2 / 2.0)) );
 }
 
 
 #ifdef TEST_FORCES
 void Get_dBO( reax_system *system, list **lists,
-              int i, int pj, real C, rvec *v )
+        int i, int pj, real C, rvec *v )
 {
     list *bonds = (*lists) + BONDS;
     list *dBOs = (*lists) + DBO;
@@ -46,13 +46,15 @@ void Get_dBO( reax_system *system, list **lists,
     end_pj = End_Index(pj, dBOs);
 
     for ( k = start_pj; k < end_pj; ++k )
+    {
         rvec_Scale( v[dBOs->select.dbo_list[k].wrt],
-                    C, dBOs->select.dbo_list[k].dBO );
+                C, dBOs->select.dbo_list[k].dBO );
+    }
 }
 
 
 void Get_dBOpinpi2( reax_system *system, list **lists,
-                    int i, int pj, real Cpi, real Cpi2, rvec *vpi, rvec *vpi2 )
+        int i, int pj, real Cpi, real Cpi2, rvec *vpi, rvec *vpi2 )
 {
     list *bonds = (*lists) + BONDS;
     list *dBOs = (*lists) + DBO;
@@ -73,7 +75,7 @@ void Get_dBOpinpi2( reax_system *system, list **lists,
 
 
 void Add_dBO( reax_system *system, list **lists,
-              int i, int pj, real C, rvec *v )
+        int i, int pj, real C, rvec *v )
 {
     list *bonds = (*lists) + BONDS;
     list *dBOs = (*lists) + DBO;
@@ -86,14 +88,15 @@ void Add_dBO( reax_system *system, list **lists,
     //fprintf( stderr, "i=%d j=%d start=%d end=%d\n", i, pj, start_pj, end_pj );
 
     for ( k = start_pj; k < end_pj; ++k )
+    {
         rvec_ScaledAdd( v[dBOs->select.dbo_list[k].wrt],
-                        C, dBOs->select.dbo_list[k].dBO );
-
+                C, dBOs->select.dbo_list[k].dBO );
+    }
 }
 
 
 void Add_dBOpinpi2( reax_system *system, list **lists,
-                    int i, int pj, real Cpi, real Cpi2, rvec *vpi, rvec *vpi2 )
+        int i, int pj, real Cpi, real Cpi2, rvec *vpi, rvec *vpi2 )
 {
     list *bonds = (*lists) + BONDS;
     list *dBOs = (*lists) + DBO;
@@ -125,13 +128,15 @@ void Add_dBO_to_Forces( reax_system *system, list **lists,
     end_pj = End_Index(pj, dBOs);
 
     for ( k = start_pj; k < end_pj; ++k )
+    {
         rvec_ScaledAdd( system->atoms[dBOs->select.dbo_list[k].wrt].f,
-                        C, dBOs->select.dbo_list[k].dBO );
+                C, dBOs->select.dbo_list[k].dBO );
+    }
 }
 
 
 void Add_dBOpinpi2_to_Forces( reax_system *system, list **lists,
-                              int i, int pj, real Cpi, real Cpi2 )
+        int i, int pj, real Cpi, real Cpi2 )
 {
     list *bonds = (*lists) + BONDS;
     list *dBOs = (*lists) + DBO;
@@ -679,7 +684,7 @@ int Locate_Symmetric_Bond( list *bonds, int i, int j )
 }
 
 
-inline void Copy_Neighbor_Data( bond_data *dest, near_neighbor_data *src )
+static inline void Copy_Neighbor_Data( bond_data *dest, near_neighbor_data *src )
 {
     dest->nbr = src->nbr;
     dest->d = src->d;
@@ -689,7 +694,7 @@ inline void Copy_Neighbor_Data( bond_data *dest, near_neighbor_data *src )
 }
 
 
-inline void Copy_Bond_Order_Data( bond_order_data *dest, bond_order_data *src )
+static inline void Copy_Bond_Order_Data( bond_order_data *dest, bond_order_data *src )
 {
     dest->BO = src->BO;
     dest->BO_s = src->BO_s;
diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c
index ee07a2ce0f7af37575fa519aa6ad1f13134eecd1..fc7a36a0cc2a637f4651c7dcc51849f900423cfe 100644
--- a/sPuReMD/src/control.c
+++ b/sPuReMD/src/control.c
@@ -63,7 +63,6 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
     control->nbr_cut = 4.;
     control->r_cut = 10.;
     control->r_sp_cut = 10.;
-    control->max_far_nbrs = 1000;
     control->bo_cut = 0.01;
     control->thb_cut = 0.001;
     control->hb_cut = 0.0;
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index 09b92b346743180ebee0dd8a2b28d0d5f5ef8060..0d9ae4000d0246be0e1485ad3c022ab60d028d8f 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -389,7 +389,7 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system,
         control_params *control, int i, int j,
         real r_ij, MATRIX_ENTRY_POSITION pos )
 {
-    real Tap, gamij, dr3gamij_1, dr3gamij_3, ret;
+    real Tap, dr3gamij_1, dr3gamij_3, ret;
 
     ret = 0.0;
 
diff --git a/sPuReMD/src/list.c b/sPuReMD/src/list.c
index d31077c4e99ea5c31a984d6d76b7388804e5bcd7..de5b0438610721380039d412d1f9b927260dec15 100644
--- a/sPuReMD/src/list.c
+++ b/sPuReMD/src/list.c
@@ -46,55 +46,82 @@ char Make_List( int n, int num_intrs, int type, list* l )
     switch ( type )
     {
     case TYP_VOID:
-        l->select.v = (void *) malloc(l->num_intrs * sizeof(void));
-        if (l->select.v == NULL) success = 0;
+        l->select.v = (void *) malloc( l->num_intrs * sizeof(void) );
+        if ( l->select.v == NULL )
+        {
+            success = 0;
+        }
         break;
 
     case TYP_THREE_BODY:
         l->select.three_body_list = (three_body_interaction_data*)
-                                    malloc(l->num_intrs * sizeof(three_body_interaction_data));
-        if (l->select.three_body_list == NULL) success = 0;
+            malloc( l->num_intrs * sizeof(three_body_interaction_data) );
+        if ( l->select.three_body_list == NULL )
+        {
+            success = 0;
+        }
         break;
 
     case TYP_BOND:
         l->select.bond_list = (bond_data*)
                               malloc(l->num_intrs * sizeof(bond_data));
-        if (l->select.bond_list == NULL) success = 0;
+        if ( l->select.bond_list == NULL )
+        {
+            success = 0;
+        }
         break;
 
     case TYP_DBO:
         l->select.dbo_list = (dbond_data*)
-                             malloc(l->num_intrs * sizeof(dbond_data));
-        if (l->select.dbo_list == NULL) success = 0;
+            malloc( l->num_intrs * sizeof(dbond_data) );
+        if ( l->select.dbo_list == NULL )
+        {
+            success = 0;
+        }
         break;
 
     case TYP_DDELTA:
         l->select.dDelta_list = (dDelta_data*)
-                                malloc(l->num_intrs * sizeof(dDelta_data));
-        if (l->select.dDelta_list == NULL) success = 0;
+            malloc( l->num_intrs * sizeof(dDelta_data) );
+        if ( l->select.dDelta_list == NULL )
+        {
+            success = 0;
+        }
         break;
 
     case TYP_FAR_NEIGHBOR:
         l->select.far_nbr_list = (far_neighbor_data*)
-                                 malloc(l->num_intrs * sizeof(far_neighbor_data));
-        if (l->select.far_nbr_list == NULL) success = 0;
+            malloc( l->num_intrs * sizeof(far_neighbor_data) );
+        if (l->select.far_nbr_list == NULL)
+        {
+            success = 0;
+        }
         break;
 
     case TYP_NEAR_NEIGHBOR:
         l->select.near_nbr_list = (near_neighbor_data*)
-                                  malloc(l->num_intrs * sizeof(near_neighbor_data));
-        if (l->select.near_nbr_list == NULL) success = 0;
+            malloc( l->num_intrs * sizeof(near_neighbor_data) );
+        if ( l->select.near_nbr_list == NULL )
+        {
+            success = 0;
+        }
         break;
 
     case TYP_HBOND:
         l->select.hbond_list = (hbond_data*)
-                               malloc( l->num_intrs * sizeof(hbond_data) );
-        if (l->select.hbond_list == NULL) success = 0;
+            malloc( l->num_intrs * sizeof(hbond_data) );
+        if ( l->select.hbond_list == NULL )
+        {
+            success = 0;
+        }
         break;
 
     default:
-        l->select.v = (void *) malloc(l->num_intrs * sizeof(void));
-        if (l->select.v == NULL) success = 0;
+        l->select.v = (void *) malloc( l->num_intrs * sizeof(void) );
+        if ( l->select.v == NULL )
+        {
+            success = 0;
+        }
         break;
     }
 
@@ -171,33 +198,3 @@ void Delete_List( int type, list* l )
     }
 
 }
-
-
-inline int Num_Entries( int i, list* l )
-{
-    return l->end_index[i] - l->index[i];
-}
-
-
-inline int Start_Index( int i, list *l )
-{
-    return l->index[i];
-}
-
-
-inline int End_Index( int i, list *l )
-{
-    return l->end_index[i];
-}
-
-
-inline void Set_Start_Index( int i, int val, list *l )
-{
-    l->index[i] = val;
-}
-
-
-inline void Set_End_Index( int i, int val, list *l )
-{
-    l->end_index[i] = val;
-}
diff --git a/sPuReMD/src/list.h b/sPuReMD/src/list.h
index 35a4b1debf03a02e3c6812320814a61a7d6db3cc..d57ad6f39ea088fbf30ee321e4fdcd27de0a54e4 100644
--- a/sPuReMD/src/list.h
+++ b/sPuReMD/src/list.h
@@ -29,15 +29,35 @@ char Make_List( int, int, int, list* );
 
 void Delete_List( int, list* );
 
-int Num_Entries( int, list* );
 
-int Start_Index( int, list* );
+static inline int Num_Entries( int i, list* l )
+{
+    return l->end_index[i] - l->index[i];
+}
 
-int End_Index( int, list* );
 
-void Set_Start_Index( int, int, list* );
+static inline int Start_Index( int i, list *l )
+{
+    return l->index[i];
+}
 
-void Set_End_Index( int, int, list* );
+
+static inline int End_Index( int i, list *l )
+{
+    return l->end_index[i];
+}
+
+
+static inline void Set_Start_Index( int i, int val, list *l )
+{
+    l->index[i] = val;
+}
+
+
+static inline void Set_End_Index( int i, int val, list *l )
+{
+    l->end_index[i] = val;
+}
 
 
 #endif
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index a350a654a9f5b3b38282bc3e40338c73ee8a23b5..7144082d2209062cf9e55e3dda244c5751e48107 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -566,7 +566,6 @@ typedef struct
     real Tap2;
     real Tap1;
     real Tap0;
-    int max_far_nbrs;
 
     real T_init;
     real T_final;
diff --git a/sPuReMD/src/neighbors.c b/sPuReMD/src/neighbors.c
index 97280473f75d494f9ad34e4f9ab80d62bcfff33e..e3a19fe29bb354835594ab863c7b8e5443d11982 100644
--- a/sPuReMD/src/neighbors.c
+++ b/sPuReMD/src/neighbors.c
@@ -26,6 +26,7 @@
 #include "list.h"
 #include "reset_utils.h"
 #include "system_props.h"
+#include "tool_box.h"
 #include "vector.h"
 
 
@@ -292,8 +293,8 @@ int compare_far_nbrs(const void *v1, const void *v2)
 }
 
 
-inline void Set_Far_Neighbor( far_neighbor_data *dest, int nbr, real d, real C,
-                              rvec dvec, ivec rel_box/*, rvec ext_factor*/ )
+static inline void Set_Far_Neighbor( far_neighbor_data *dest, int nbr, real d, real C,
+        rvec dvec, ivec rel_box/*, rvec ext_factor*/ )
 {
     dest->nbr = nbr;
     dest->d = d;
@@ -303,8 +304,8 @@ inline void Set_Far_Neighbor( far_neighbor_data *dest, int nbr, real d, real C,
 }
 
 
-inline void Set_Near_Neighbor(near_neighbor_data *dest, int nbr, real d, real C,
-                              rvec dvec, ivec rel_box/*, rvec ext_factor*/)
+static inline void Set_Near_Neighbor(near_neighbor_data *dest, int nbr, real d, real C,
+        rvec dvec, ivec rel_box/*, rvec ext_factor*/)
 {
     dest->nbr = nbr;
     dest->d = d;
@@ -316,7 +317,7 @@ inline void Set_Near_Neighbor(near_neighbor_data *dest, int nbr, real d, real C,
 
 /* In case bond restrictions are applied, this method checks if
    atom1 and atom2 are allowed to bond with each other */
-inline int can_Bond( static_storage *workspace, int atom1, int atom2 )
+static inline int can_Bond( static_storage *workspace, int atom1, int atom2 )
 {
     int i;
 
@@ -348,7 +349,7 @@ inline int can_Bond( static_storage *workspace, int atom1, int atom2 )
 
 
 /* check if atom2 is on atom1's near neighbor list */
-inline int is_Near_Neighbor( list *near_nbrs, int atom1, int atom2 )
+static inline int is_Near_Neighbor( list *near_nbrs, int atom1, int atom2 )
 {
     int i;
 
@@ -602,8 +603,6 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control,
 
     fprintf( stderr, "Near neighbors/atom: %d (compare to 150)\n",
              num_near / system->N );
-    fprintf( stderr, "Far neighbors per atom: %d (compare to %d)\n",
-             num_far / system->N, control->max_far_nbrs );
 #endif
 
     //fprintf( stderr, "step%d: num of nearnbrs = %6d   num of farnbrs: %6d\n",
@@ -617,8 +616,8 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control,
 
 
 void Generate_Neighbor_Lists( reax_system *system, control_params *control,
-                              simulation_data *data, static_storage *workspace,
-                              list **lists, output_controls *out_control )
+        simulation_data *data, static_storage *workspace,
+        list **lists, output_controls *out_control )
 {
     int  i, j, k, l, m, itr;
     int  x, y, z;
diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c
index 2f14e38ff616e6bcbf40f0de61dab90712cda7a8..684c2cba0b6b4763abcd9b51ea548eed5d08f989 100644
--- a/sPuReMD/src/print_utils.c
+++ b/sPuReMD/src/print_utils.c
@@ -24,6 +24,7 @@
 #include "list.h"
 #include "geo_tools.h"
 #include "system_props.h"
+#include "tool_box.h"
 #include "vector.h"
 
 
diff --git a/sPuReMD/src/tool_box.c b/sPuReMD/src/tool_box.c
index 87ef382a1d02dcbb5289b4b06e1e7c834b08cc73..61ab5e7fcb5f36a5dab7c8121246dd82af50ecbf 100644
--- a/sPuReMD/src/tool_box.c
+++ b/sPuReMD/src/tool_box.c
@@ -106,7 +106,7 @@ void Fit_to_Periodic_Box( simulation_box *box, rvec *p )
 /* determine the touch point, tp, of a box to
    its neighbor denoted by the relative coordinate rl */
 /*
-inline void Box_Touch_Point( simulation_box *box, ivec rl, rvec tp )
+static inline void Box_Touch_Point( simulation_box *box, ivec rl, rvec tp )
 {
     int d;
 
@@ -124,7 +124,7 @@ inline void Box_Touch_Point( simulation_box *box, ivec rl, rvec tp )
 /* determine whether point p is inside the box */
 /* assumes orthogonal box */
 /*
-inline int is_Inside_Box( simulation_box *box, rvec p )
+static inline int is_Inside_Box( simulation_box *box, rvec p )
 {
     if ( p[0] < box->min[0] || p[0] >= box->max[0] ||
             p[1] < box->min[1] || p[1] >= box->max[1] ||
@@ -137,7 +137,7 @@ inline int is_Inside_Box( simulation_box *box, rvec p )
 
 
 /*
-inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 )
+static inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 )
 {
     rvec midp;
 
@@ -160,7 +160,7 @@ inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 )
    no need to consider periodic boundary conditions as in the serial case
    because the box of a process is not periodic in itself */
 /*
-inline void GridCell_Closest_Point( grid_cell *gci, grid_cell *gcj,
+static inline void GridCell_Closest_Point( grid_cell *gci, grid_cell *gcj,
         ivec ci, ivec cj, rvec cp )
 {
     int  d;
@@ -175,7 +175,7 @@ inline void GridCell_Closest_Point( grid_cell *gci, grid_cell *gcj,
 }
 
 
-inline void GridCell_to_Box_Points( grid_cell *gc, ivec rl, rvec cp, rvec fp )
+static inline void GridCell_to_Box_Points( grid_cell *gc, ivec rl, rvec cp, rvec fp )
 {
     int d;
 
@@ -197,7 +197,7 @@ inline void GridCell_to_Box_Points( grid_cell *gc, ivec rl, rvec cp, rvec fp )
 }
 
 
-inline real DistSqr_between_Special_Points( rvec sp1, rvec sp2 )
+static inline real DistSqr_between_Special_Points( rvec sp1, rvec sp2 )
 {
     int  i;
     real d_sqr = 0;
@@ -214,7 +214,7 @@ inline real DistSqr_between_Special_Points( rvec sp1, rvec sp2 )
 }
 
 
-inline real DistSqr_to_Special_Point( rvec cp, rvec x )
+static inline real DistSqr_to_Special_Point( rvec cp, rvec x )
 {
     int  i;
     real d_sqr = 0;
@@ -231,7 +231,7 @@ inline real DistSqr_to_Special_Point( rvec cp, rvec x )
 }
 
 
-inline int Relative_Coord_Encoding( ivec c )
+static inline int Relative_Coord_Encoding( ivec c )
 {
     return 9 * (c[0] + 1) + 3 * (c[1] + 1) + (c[2] + 1);
 }
diff --git a/sPuReMD/src/vector.c b/sPuReMD/src/vector.c
index f320d857bc42c3731204c3b41639d6460d398526..98d697aad7354e38fc68ca2ff8044b74ee2326c6 100644
--- a/sPuReMD/src/vector.c
+++ b/sPuReMD/src/vector.c
@@ -22,115 +22,6 @@
 #include "vector.h"
 
 
-/* global to make OpenMP shared (Vector_isZero) */
-unsigned int ret;
-/* global to make OpenMP shared (Dot, Norm) */
-real ret2;
-
-
-inline int Vector_isZero( const real * const v, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp single
-#endif
-    {
-        ret = TRUE;
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-
-    #pragma omp for reduction(&&: ret) schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        if ( FABS( v[i] ) > ALMOST_ZERO )
-        {
-            ret = FALSE;
-        }
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-#endif
-
-    return ret;
-}
-
-
-inline void Vector_MakeZero( real * const v, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp for schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        v[i] = ZERO;
-    }
-}
-
-
-inline void Vector_Copy( real * const dest, const real * const v, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp for schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        dest[i] = v[i];
-    }
-}
-
-
-inline void Vector_Scale( real * const dest, const real c, const real * const v, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp for schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        dest[i] = c * v[i];
-    }
-}
-
-
-inline void Vector_Sum( real * const dest, const real c, const real * const v, const real d,
-                        const real * const y, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp for schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        dest[i] = c * v[i] + d * y[i];
-    }
-}
-
-
-inline void Vector_Add( real * const dest, const real c, const real * const v, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp for schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        dest[i] += c * v[i];
-    }
-}
-
-
 void Vector_Print( FILE * const fout, const char * const vname, const real * const v,
         const unsigned int k )
 {
@@ -148,432 +39,7 @@ void Vector_Print( FILE * const fout, const char * const vname, const real * con
 }
 
 
-inline real Dot( const real * const v1, const real * const v2, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp single
-#endif
-    {
-        ret2 = ZERO;
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-
-    #pragma omp for reduction(+: ret2) schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        ret2 += v1[i] * v2[i];
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-#endif
-
-    return ret2;
-}
-
-
-inline real Norm( const real * const v1, const unsigned int k )
-{
-    unsigned int i;
-
-#ifdef _OPENMP
-    #pragma omp single
-#endif
-    {
-        ret2 = ZERO;
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-
-    #pragma omp for reduction(+: ret2) schedule(static)
-#endif
-    for ( i = 0; i < k; ++i )
-    {
-        ret2 += SQR( v1[i] );
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-
-    #pragma omp single
-#endif
-    {
-        ret2 = SQRT( ret2 );
-    }
-
-#ifdef _OPENMP
-    #pragma omp barrier
-#endif
-
-    return ret2;
-}
-
-
-inline void rvec_Copy( rvec dest, const rvec src )
-{
-    dest[0] = src[0];
-    dest[1] = src[1];
-    dest[2] = src[2];
-}
-
-inline void rvec_Scale( rvec ret, const real c, const rvec v )
-{
-    ret[0] = c * v[0];
-    ret[1] = c * v[1];
-    ret[2] = c * v[2];
-}
-
-
-inline void rvec_Add( rvec ret, const rvec v )
-{
-    ret[0] += v[0];
-    ret[1] += v[1];
-    ret[2] += v[2];
-}
-
-
-inline void rvec_ScaledAdd( rvec ret, const real c, const rvec v )
-{
-    ret[0] += c * v[0];
-    ret[1] += c * v[1];
-    ret[2] += c * v[2];
-}
-
-
-inline void rvec_Sum( rvec ret, const rvec v1 , const rvec v2 )
-{
-    ret[0] = v1[0] + v2[0];
-    ret[1] = v1[1] + v2[1];
-    ret[2] = v1[2] + v2[2];
-}
-
-
-inline void rvec_ScaledSum( rvec ret, const real c1, const rvec v1,
-                            const real c2, const rvec v2 )
-{
-    ret[0] = c1 * v1[0] + c2 * v2[0];
-    ret[1] = c1 * v1[1] + c2 * v2[1];
-    ret[2] = c1 * v1[2] + c2 * v2[2];
-}
-
-
-inline real rvec_Dot( const rvec v1, const rvec v2 )
-{
-    return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2];
-}
-
-
-inline real rvec_ScaledDot( const real c1, const rvec v1,
-                            const real c2, const rvec v2 )
-{
-    return (c1 * c2) * (v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]);
-}
-
-
-inline void rvec_Multiply( rvec r, const rvec v1, const rvec v2 )
-{
-    r[0] = v1[0] * v2[0];
-    r[1] = v1[1] * v2[1];
-    r[2] = v1[2] * v2[2];
-}
-
-
-inline void rvec_iMultiply( rvec r, const ivec v1, const rvec v2 )
-{
-    r[0] = v1[0] * v2[0];
-    r[1] = v1[1] * v2[1];
-    r[2] = v1[2] * v2[2];
-}
-
-
-inline void rvec_Divide( rvec r, const rvec v1, const rvec v2 )
-{
-    r[0] = v1[0] / v2[0];
-    r[1] = v1[1] / v2[1];
-    r[2] = v1[2] / v2[2];
-}
-
-
-inline void rvec_iDivide( rvec r, const rvec v1, const ivec v2 )
-{
-    r[0] = v1[0] / v2[0];
-    r[1] = v1[1] / v2[1];
-    r[2] = v1[2] / v2[2];
-}
-
-
-inline void rvec_Invert( rvec r, const rvec v )
-{
-    r[0] = 1. / v[0];
-    r[1] = 1. / v[1];
-    r[2] = 1. / v[2];
-}
-
-
-inline void rvec_Cross( rvec ret, const rvec v1, const rvec v2 )
-{
-    ret[0] = v1[1] * v2[2] - v1[2] * v2[1];
-    ret[1] = v1[2] * v2[0] - v1[0] * v2[2];
-    ret[2] = v1[0] * v2[1] - v1[1] * v2[0];
-}
-
-
-inline void rvec_OuterProduct( rtensor r, const rvec v1, const rvec v2 )
-{
-    unsigned int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            r[i][j] = v1[i] * v2[j];
-        }
-    }
-}
-
-
-inline real rvec_Norm_Sqr( const rvec v )
-{
-    return SQR(v[0]) + SQR(v[1]) + SQR(v[2]);
-}
-
-
-inline real rvec_Norm( const rvec v )
-{
-    return SQRT( SQR(v[0]) + SQR(v[1]) + SQR(v[2]) );
-}
-
-
-inline int rvec_isZero( const rvec v )
-{
-    if ( FABS(v[0]) > ALMOST_ZERO ||
-            FABS(v[1]) > ALMOST_ZERO ||
-            FABS(v[2]) > ALMOST_ZERO )
-    {
-        return FALSE;
-    }
-    return TRUE;
-}
-
-
-inline void rvec_MakeZero( rvec v )
-{
-    v[0] = ZERO;
-    v[1] = ZERO;
-    v[2] = ZERO;
-}
-
-
-inline void rvec_Random( rvec v )
-{
-    v[0] = Random(2.0) - 1.0;
-    v[1] = Random(2.0) - 1.0;
-    v[2] = Random(2.0) - 1.0;
-}
-
-
-inline void rtensor_Multiply( rtensor ret, rtensor m1, rtensor m2 )
-{
-    unsigned int i, j, k;
-    rtensor temp;
-
-    // check if the result matrix is the same as one of m1, m2.
-    // if so, we cannot modify the contents of m1 or m2, so
-    // we have to use a temp matrix.
-    if ( ret == m1 || ret == m2 )
-    {
-        for ( i = 0; i < 3; ++i )
-            for ( j = 0; j < 3; ++j )
-            {
-                temp[i][j] = 0;
-                for ( k = 0; k < 3; ++k )
-                    temp[i][j] += m1[i][k] * m2[k][j];
-            }
-
-        for ( i = 0; i < 3; ++i )
-            for ( j = 0; j < 3; ++j )
-                ret[i][j] = temp[i][j];
-    }
-    else
-    {
-        for ( i = 0; i < 3; ++i )
-            for ( j = 0; j < 3; ++j )
-            {
-                ret[i][j] = 0;
-                for ( k = 0; k < 3; ++k )
-                    ret[i][j] += m1[i][k] * m2[k][j];
-            }
-    }
-}
-
-
-inline void rtensor_MatVec( rvec ret, rtensor m, const rvec v )
-{
-    unsigned int i;
-    rvec temp;
-
-    // if ret is the same vector as v, we cannot modify the
-    // contents of v until all computation is finished.
-    if ( ret == v )
-    {
-        for ( i = 0; i < 3; ++i )
-        {
-            temp[i] = m[i][0] * v[0] + m[i][1] * v[1] + m[i][2] * v[2];
-        }
-
-        for ( i = 0; i < 3; ++i )
-        {
-            ret[i] = temp[i];
-        }
-    }
-    else
-    {
-        for ( i = 0; i < 3; ++i )
-        {
-            ret[i] = m[i][0] * v[0] + m[i][1] * v[1] + m[i][2] * v[2];
-        }
-    }
-}
-
-
-inline void rtensor_Scale( rtensor ret, const real c, rtensor m )
-{
-    unsigned int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            ret[i][j] = c * m[i][j];
-        }
-    }
-}
-
-
-inline void rtensor_Add( rtensor ret, rtensor t )
-{
-    int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            ret[i][j] += t[i][j];
-        }
-    }
-}
-
-
-inline void rtensor_ScaledAdd( rtensor ret, const real c, rtensor t )
-{
-    unsigned int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            ret[i][j] += c * t[i][j];
-        }
-    }
-}
-
-
-inline void rtensor_Sum( rtensor ret, rtensor t1, rtensor t2 )
-{
-    unsigned int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            ret[i][j] = t1[i][j] + t2[i][j];
-        }
-    }
-}
-
-
-inline void rtensor_ScaledSum( rtensor ret, const real c1, rtensor t1,
-                               const real c2, rtensor t2 )
-{
-    unsigned int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            ret[i][j] = c1 * t1[i][j] + c2 * t2[i][j];
-        }
-    }
-}
-
-
-inline void rtensor_Copy( rtensor ret, rtensor t )
-{
-    unsigned int i, j;
-
-    for ( i = 0; i < 3; ++i )
-    {
-        for ( j = 0; j < 3; ++j )
-        {
-            ret[i][j] = t[i][j];
-        }
-    }
-}
-
-
-inline void rtensor_Identity( rtensor t )
-{
-    t[0][0] = t[1][1] = t[2][2] = 1;
-    t[0][1] = t[0][2] = t[1][0] = t[1][2] = t[2][0] = t[2][1] = ZERO;
-}
-
-
-inline void rtensor_MakeZero( rtensor t )
-{
-    t[0][0] = t[0][1] = t[0][2] = ZERO;
-    t[1][0] = t[1][1] = t[1][2] = ZERO;
-    t[2][0] = t[2][1] = t[2][2] = ZERO;
-}
-
-
-inline void rtensor_Transpose( rtensor ret, rtensor t )
-{
-    ret[0][0] = t[0][0];
-    ret[1][1] = t[1][1];
-    ret[2][2] = t[2][2];
-
-    ret[0][1] = t[1][0];
-    ret[0][2] = t[2][0];
-
-    ret[1][0] = t[0][1];
-    ret[1][2] = t[2][1];
-
-    ret[2][0] = t[0][2];
-    ret[2][1] = t[1][2];
-}
-
-
-inline real rtensor_Det( rtensor t )
-{
-    return ( t[0][0] * (t[1][1] * t[2][2] - t[1][2] * t[2][1] ) +
-             t[0][1] * (t[1][2] * t[2][0] - t[1][0] * t[2][2] ) +
-             t[0][2] * (t[1][0] * t[2][1] - t[1][1] * t[2][0] ) );
-}
-
-
-inline real rtensor_Trace( rtensor t )
-{
-    return (t[0][0] + t[1][1] + t[2][2]);
-}
-
-
-void Print_rTensor(FILE * const fp, rtensor t)
+void Print_rTensor( FILE * const fp, rtensor t )
 {
     unsigned int i, j;
 
@@ -581,66 +47,9 @@ void Print_rTensor(FILE * const fp, rtensor t)
     {
         fprintf(fp, "[");
         for (j = 0; j < 3; j++)
+        {
             fprintf(fp, "%8.3f,\t", t[i][j]);
+        }
         fprintf(fp, "]\n");
     }
 }
-
-
-inline void ivec_MakeZero( ivec v )
-{
-    v[0] = v[1] = v[2] = 0;
-}
-
-
-inline void ivec_Copy( ivec dest, const ivec src )
-{
-    dest[0] = src[0];
-    dest[1] = src[1];
-    dest[2] = src[2];
-}
-
-
-inline void ivec_Scale( ivec dest, const real C, const ivec src )
-{
-    dest[0] = C * src[0];
-    dest[1] = C * src[1];
-    dest[2] = C * src[2];
-}
-
-
-inline void ivec_rScale( ivec dest, const real C, const rvec src )
-{
-    dest[0] = (int)(C * src[0]);
-    dest[1] = (int)(C * src[1]);
-    dest[2] = (int)(C * src[2]);
-}
-
-
-inline int ivec_isZero( const ivec v )
-{
-    if ( v[0] == 0 && v[1] == 0 && v[2] == 0 )
-    {
-        return TRUE;
-    }
-    return FALSE;
-}
-
-
-inline int ivec_isEqual( const ivec v1, const ivec v2 )
-{
-    if ( v1[0] == v2[0] && v1[1] == v2[1] && v1[2] == v2[2] )
-    {
-        return TRUE;
-    }
-
-    return FALSE;
-}
-
-
-inline void ivec_Sum( ivec dest, const ivec v1, const ivec v2 )
-{
-    dest[0] = v1[0] + v2[0];
-    dest[1] = v1[1] + v2[1];
-    dest[2] = v1[2] + v2[2];
-}
diff --git a/sPuReMD/src/vector.h b/sPuReMD/src/vector.h
index 27ceb241150bc98c7dcf65ef37fc8262d77303dc..557057489b3a52a407836360046b6a9e80d9c8df 100644
--- a/sPuReMD/src/vector.h
+++ b/sPuReMD/src/vector.h
@@ -25,61 +25,914 @@
 #include "mytypes.h"
 
 
-int Vector_isZero( const real * const, const unsigned int );
-void Vector_MakeZero( real * const, const unsigned int );
-void Vector_Copy( real * const, const real * const, const unsigned int );
-void Vector_Scale( real * const, const real, const real * const, const unsigned int );
-void Vector_Sum( real * const, const real, const real * const, const real, const real * const, const unsigned int );
-void Vector_Add( real * const, const real, const real * const, const unsigned int );
+/* file scope to make OpenMP shared (Vector_isZero) */
+static unsigned int ret_omp;
+/* file scope to make OpenMP shared (Dot, Norm) */
+static real ret2_omp;
+
+
 void Vector_Print( FILE * const, const char * const, const real * const, const unsigned int );
-real Dot( const real * const, const real * const, const unsigned int );
-real Norm( const real * const, const unsigned int );
-
-void rvec_Copy( rvec, const rvec );
-void rvec_Scale( rvec, const real, const rvec );
-void rvec_Add( rvec, const rvec );
-void rvec_ScaledAdd( rvec, const real, const rvec );
-void rvec_Sum( rvec, const rvec, const rvec );
-void rvec_ScaledSum( rvec, const real, const rvec, const real, const rvec );
-real rvec_Dot( const rvec, const rvec );
-real rvec_ScaledDot( const real, const rvec, const real, const rvec );
-void rvec_Multiply( rvec, const rvec, const rvec );
-void rvec_iMultiply( rvec, const ivec, const rvec );
-void rvec_Divide( rvec, const rvec, const rvec );
-void rvec_iDivide( rvec, const rvec, const ivec );
-void rvec_Invert( rvec, const rvec );
-void rvec_Cross( rvec, const rvec, const rvec );
-void rvec_OuterProduct( rtensor, const rvec, const rvec );
-real rvec_Norm_Sqr( const rvec );
-real rvec_Norm( const rvec );
-int rvec_isZero( const rvec );
-void rvec_MakeZero( rvec );
-void rvec_Random( rvec );
-
-void rtensor_MakeZero( rtensor );
-void rtensor_Multiply( rtensor, rtensor, rtensor );
-void rtensor_MatVec( rvec, rtensor, const rvec );
-void rtensor_Scale( rtensor, const real, rtensor );
-void rtensor_Add( rtensor, rtensor );
-void rtensor_ScaledAdd( rtensor, const real, rtensor );
-void rtensor_Sum( rtensor, rtensor, rtensor );
-void rtensor_ScaledSum( rtensor, const real, rtensor, const real, rtensor );
-void rtensor_Scale( rtensor, const real, rtensor );
-void rtensor_Copy( rtensor, rtensor );
-void rtensor_Identity( rtensor );
-void rtensor_Transpose( rtensor, rtensor );
-real rtensor_Det( rtensor );
-real rtensor_Trace( rtensor );
-
-void Print_rTensor(FILE * const, rtensor);
-
-int ivec_isZero( const ivec );
-int ivec_isEqual( const ivec, const ivec );
-void ivec_MakeZero( ivec );
-void ivec_Copy( ivec, const ivec );
-void ivec_Scale( ivec, const real, const ivec );
-void ivec_rScale( ivec, const real, const rvec );
-void ivec_Sum( ivec, const ivec, const ivec );
+
+void Print_rTensor( FILE * const, rtensor );
+
+
+static inline int Vector_isZero( const real * const v, const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp single
+#endif
+    {
+        ret_omp = TRUE;
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+
+    #pragma omp for simd reduction(&&: ret_omp) schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        if ( FABS( v[i] ) > ALMOST_ZERO )
+        {
+            ret_omp = FALSE;
+        }
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+#endif
+
+    return ret_omp;
+}
+
+
+static inline void Vector_MakeZero( real * const v, const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp for simd schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        v[i] = ZERO;
+    }
+}
+
+
+static inline void Vector_Copy( real * const dest, const real * const v, const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp for simd schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        dest[i] = v[i];
+    }
+}
+
+
+static inline void Vector_Scale( real * const dest, const real c, const real * const v,
+        const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp for simd schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        dest[i] = c * v[i];
+    }
+}
+
+
+static inline void Vector_Sum( real * const dest, const real c, const real * const v,
+        const real d, const real * const y, const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp for simd schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        dest[i] = c * v[i] + d * y[i];
+    }
+}
+
+
+static inline void Vector_Add( real * const dest, const real c, const real * const v,
+        const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp for simd schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        dest[i] += c * v[i];
+    }
+}
+
+
+static inline real Dot( const real * const v1, const real * const v2,
+        const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp single
+#endif
+    {
+        ret2_omp = ZERO;
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+
+    #pragma omp for simd reduction(+: ret2_omp) schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        ret2_omp += v1[i] * v2[i];
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+#endif
+
+    return ret2_omp;
+}
+
+
+static inline real Norm( const real * const v1, const unsigned int k )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp single
+#endif
+    {
+        ret2_omp = ZERO;
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+
+    #pragma omp for simd reduction(+: ret2_omp) schedule(simd:static)
+#endif
+    for ( i = 0; i < k; ++i )
+    {
+        ret2_omp += SQR( v1[i] );
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+
+    #pragma omp single
+#endif
+    {
+        ret2_omp = SQRT( ret2_omp );
+    }
+
+#ifdef _OPENMP
+    #pragma omp barrier
+#endif
+
+    return ret2_omp;
+}
+
+
+static inline void rvec_Copy( rvec dest, const rvec src )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        dest[i] = src[i];
+    }
+
+//    dest[0] = src[0];
+//    dest[1] = src[1];
+//    dest[2] = src[2];
+}
+
+static inline void rvec_Scale( rvec ret, const real c, const rvec v )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret[i] = c * v[i];
+    }
+
+//    ret[0] = c * v[0];
+//    ret[1] = c * v[1];
+//    ret[2] = c * v[2];
+}
+
+
+static inline void rvec_Add( rvec ret, const rvec v )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret[i] += v[i];
+    }
+
+//    ret[0] += v[0];
+//    ret[1] += v[1];
+//    ret[2] += v[2];
+}
+
+
+static inline void rvec_ScaledAdd( rvec ret, const real c, const rvec v )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret[i] += c * v[i];
+    }
+
+//    ret[0] += c * v[0];
+//    ret[1] += c * v[1];
+//    ret[2] += c * v[2];
+}
+
+
+static inline void rvec_Sum( rvec ret, const rvec v1 , const rvec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret[i] = v1[i] + v2[i];
+    }
+
+//    ret[0] = v1[0] + v2[0];
+//    ret[1] = v1[1] + v2[1];
+//    ret[2] = v1[2] + v2[2];
+}
+
+
+static inline void rvec_ScaledSum( rvec ret, const real c1, const rvec v1,
+        const real c2, const rvec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret[i] = c1 * v1[i] + c2 * v2[i];
+    }
+
+//    ret[0] = c1 * v1[0] + c2 * v2[0];
+//    ret[1] = c1 * v1[1] + c2 * v2[1];
+//    ret[2] = c1 * v1[2] + c2 * v2[2];
+}
+
+
+static inline real rvec_Dot( const rvec v1, const rvec v2 )
+{
+    int i;
+    real ret;
+
+    ret = 0.0;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(+: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret += v1[i] * v2[i];
+    }
+
+    return ret;
+
+//    return (v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]);
+}
+
+
+static inline real rvec_ScaledDot( const real c1, const rvec v1,
+        const real c2, const rvec v2 )
+{
+    int i;
+    real ret;
+
+    ret = 0.0;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(+: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret += v1[i] * v2[i];
+    }
+
+    return c1 * c2 * ret;
+
+//    return (c1 * c2) * (v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]);
+}
+
+
+static inline void rvec_Multiply( rvec r, const rvec v1, const rvec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        r[i] = v1[i] * v2[i];
+    }
+
+//    r[0] = v1[0] * v2[0];
+//    r[1] = v1[1] * v2[1];
+//    r[2] = v1[2] * v2[2];
+}
+
+
+static inline void rvec_iMultiply( rvec r, const ivec v1, const rvec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        r[i] = v1[i] * v2[i];
+    }
+
+//    r[0] = v1[0] * v2[0];
+//    r[1] = v1[1] * v2[1];
+//    r[2] = v1[2] * v2[2];
+}
+
+
+static inline void rvec_Divide( rvec r, const rvec v1, const rvec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        r[i] = v1[i] / v2[i];
+    }
+
+//    r[0] = v1[0] / v2[0];
+//    r[1] = v1[1] / v2[1];
+//    r[2] = v1[2] / v2[2];
+}
+
+
+static inline void rvec_iDivide( rvec r, const rvec v1, const ivec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        r[i] = v1[i] / v2[i];
+    }
+
+//    r[0] = v1[0] / v2[0];
+//    r[1] = v1[1] / v2[1];
+//    r[2] = v1[2] / v2[2];
+}
+
+
+static inline void rvec_Invert( rvec r, const rvec v )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        r[i] = 1.0 / v[i];
+    }
+
+//    r[0] = 1.0 / v[0];
+//    r[1] = 1.0 / v[1];
+//    r[2] = 1.0 / v[2];
+}
+
+
+static inline void rvec_Cross( rvec ret, const rvec v1, const rvec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret[i] = v1[(i + 1) % 3] * v2[(i + 2) % 3] - v1[(i + 2) % 3] * v2[(i + 1) % 3];
+    }
+
+//    ret[0] = v1[1] * v2[2] - v1[2] * v2[1];
+//    ret[1] = v1[2] * v2[0] - v1[0] * v2[2];
+//    ret[2] = v1[0] * v2[1] - v1[1] * v2[0];
+}
+
+
+static inline void rvec_OuterProduct( rtensor r, const rvec v1, const rvec v2 )
+{
+    unsigned int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            r[i][j] = v1[i] * v2[j];
+        }
+    }
+}
+
+
+static inline real rvec_Norm_Sqr( const rvec v )
+{
+    int i;
+    real ret;
+
+    ret = 0.0;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(+: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret += SQR( v[i] );
+    }
+
+    return ret;
+
+//    return SQR(v[0]) + SQR(v[1]) + SQR(v[2]);
+}
+
+
+static inline real rvec_Norm( const rvec v )
+{
+    int i;
+    real ret;
+
+    ret = 0.0;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(+: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret += SQR( v[i] );
+    }
+
+    return SQRT( ret );
+
+//    return SQRT( SQR(v[0]) + SQR(v[1]) + SQR(v[2]) );
+}
+
+
+static inline int rvec_isZero( const rvec v )
+{
+    int i, ret;
+
+    ret = TRUE;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(&&: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret = (FABS( v[i] ) > ALMOST_ZERO) && ret;
+    }
+
+    return ret;
+
+//    if ( FABS(v[0]) > ALMOST_ZERO ||
+//            FABS(v[1]) > ALMOST_ZERO ||
+//            FABS(v[2]) > ALMOST_ZERO )
+//    {
+//        return FALSE;
+//    }
+//    return TRUE;
+}
+
+
+static inline void rvec_MakeZero( rvec v )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        v[i] = ZERO;
+    }
+
+//    v[0] = ZERO;
+//    v[1] = ZERO;
+//    v[2] = ZERO;
+}
+
+
+static inline void rvec_Random( rvec v )
+{
+    v[0] = Random(2.0) - 1.0;
+    v[1] = Random(2.0) - 1.0;
+    v[2] = Random(2.0) - 1.0;
+}
+
+
+static inline void rtensor_Multiply( rtensor ret, rtensor m1, rtensor m2 )
+{
+    unsigned int i, j, k;
+    rtensor temp;
+
+    // check if the result matrix is the same as one of m1, m2.
+    // if so, we cannot modify the contents of m1 or m2, so
+    // we have to use a temp matrix.
+    if ( ret == m1 || ret == m2 )
+    {
+        for ( i = 0; i < 3; ++i )
+        {
+            for ( j = 0; j < 3; ++j )
+            {
+                temp[i][j] = 0;
+                for ( k = 0; k < 3; ++k )
+                {
+                    temp[i][j] += m1[i][k] * m2[k][j];
+                }
+            }
+        }
+
+        for ( i = 0; i < 3; ++i )
+        {
+            for ( j = 0; j < 3; ++j )
+            {
+                ret[i][j] = temp[i][j];
+            }
+        }
+    }
+    else
+    {
+        for ( i = 0; i < 3; ++i )
+        {
+            for ( j = 0; j < 3; ++j )
+            {
+                ret[i][j] = 0;
+                for ( k = 0; k < 3; ++k )
+                {
+                    ret[i][j] += m1[i][k] * m2[k][j];
+                }
+            }
+        }
+    }
+}
+
+
+static inline void rtensor_MatVec( rvec ret, rtensor m, const rvec v )
+{
+    unsigned int i;
+    rvec temp;
+
+    // if ret is the same vector as v, we cannot modify the
+    // contents of v until all computation is finished.
+    if ( ret == v )
+    {
+        for ( i = 0; i < 3; ++i )
+        {
+            temp[i] = m[i][0] * v[0] + m[i][1] * v[1] + m[i][2] * v[2];
+        }
+
+        for ( i = 0; i < 3; ++i )
+        {
+            ret[i] = temp[i];
+        }
+    }
+    else
+    {
+        for ( i = 0; i < 3; ++i )
+        {
+            ret[i] = m[i][0] * v[0] + m[i][1] * v[1] + m[i][2] * v[2];
+        }
+    }
+}
+
+
+static inline void rtensor_Scale( rtensor ret, const real c, rtensor m )
+{
+    unsigned int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            ret[i][j] = c * m[i][j];
+        }
+    }
+}
+
+
+static inline void rtensor_Add( rtensor ret, rtensor t )
+{
+    int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            ret[i][j] += t[i][j];
+        }
+    }
+}
+
+
+static inline void rtensor_ScaledAdd( rtensor ret, const real c, rtensor t )
+{
+    unsigned int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            ret[i][j] += c * t[i][j];
+        }
+    }
+}
+
+
+static inline void rtensor_Sum( rtensor ret, rtensor t1, rtensor t2 )
+{
+    unsigned int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            ret[i][j] = t1[i][j] + t2[i][j];
+        }
+    }
+}
+
+
+static inline void rtensor_ScaledSum( rtensor ret, const real c1, rtensor t1,
+        const real c2, rtensor t2 )
+{
+    unsigned int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            ret[i][j] = c1 * t1[i][j] + c2 * t2[i][j];
+        }
+    }
+}
+
+
+static inline void rtensor_Copy( rtensor ret, rtensor t )
+{
+    unsigned int i, j;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        for ( j = 0; j < 3; ++j )
+        {
+            ret[i][j] = t[i][j];
+        }
+    }
+}
+
+
+static inline void rtensor_Identity( rtensor t )
+{
+    t[0][0] = 1.0;
+    t[1][1] = 1.0;
+    t[2][2] = 1.0;
+    t[0][1] = ZERO;
+    t[0][2] = ZERO;
+    t[1][0] = ZERO;
+    t[1][2] = ZERO;
+    t[2][0] = ZERO;
+    t[2][1] = ZERO;
+}
+
+
+static inline void rtensor_MakeZero( rtensor t )
+{
+    t[0][0] = ZERO;
+    t[0][1] = ZERO;
+    t[0][2] = ZERO;
+    t[1][0] = ZERO;
+    t[1][1] = ZERO;
+    t[1][2] = ZERO;
+    t[2][0] = ZERO;
+    t[2][1] = ZERO;
+    t[2][2] = ZERO;
+}
+
+
+static inline void rtensor_Transpose( rtensor ret, rtensor t )
+{
+    ret[0][0] = t[0][0];
+    ret[1][1] = t[1][1];
+    ret[2][2] = t[2][2];
+
+    ret[0][1] = t[1][0];
+    ret[0][2] = t[2][0];
+
+    ret[1][0] = t[0][1];
+    ret[1][2] = t[2][1];
+
+    ret[2][0] = t[0][2];
+    ret[2][1] = t[1][2];
+}
+
+
+static inline real rtensor_Det( rtensor t )
+{
+    return ( t[0][0] * (t[1][1] * t[2][2] - t[1][2] * t[2][1] ) +
+            t[0][1] * (t[1][2] * t[2][0] - t[1][0] * t[2][2] ) +
+            t[0][2] * (t[1][0] * t[2][1] - t[1][1] * t[2][0] ) );
+}
+
+
+static inline real rtensor_Trace( rtensor t )
+{
+    return (t[0][0] + t[1][1] + t[2][2]);
+}
+
+
+static inline void ivec_MakeZero( ivec v )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        v[i] = 0;
+    }
+
+//    v[0] = 0;
+//    v[1] = 0;
+//    v[2] = 0;
+}
+
+
+static inline void ivec_Copy( ivec dest, const ivec src )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        dest[i] = src[i];
+    }
+
+//    dest[0] = src[0];
+//    dest[1] = src[1];
+//    dest[2] = src[2];
+}
+
+
+static inline void ivec_Scale( ivec dest, const real C, const ivec src )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        dest[i] = C * src[i];
+    }
+
+//    dest[0] = C * src[0];
+//    dest[1] = C * src[1];
+//    dest[2] = C * src[2];
+}
+
+
+static inline void ivec_rScale( ivec dest, const real C, const rvec src )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        dest[i] = (int)(C * src[i]);
+    }
+
+//    dest[0] = (int)(C * src[0]);
+//    dest[1] = (int)(C * src[1]);
+//    dest[2] = (int)(C * src[2]);
+}
+
+
+static inline int ivec_isZero( const ivec v )
+{
+    int i, ret;
+
+    ret = TRUE;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(&&: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret = (v[i] == 0) && ret;
+    }
+
+    return ret;
+
+//    if ( v[0] == 0 && v[1] == 0 && v[2] == 0 )
+//    {
+//        return TRUE;
+//    }
+//    return FALSE;
+}
+
+
+static inline int ivec_isEqual( const ivec v1, const ivec v2 )
+{
+    int i, ret;
+
+    ret = TRUE;
+
+#ifdef _OPENMP
+    #pragma omp simd reduction(&&: ret)
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        ret = (v1[i] == v2[i]) && ret;
+    }
+
+    return ret;
+
+//    if ( v1[0] == v2[0] && v1[1] == v2[1] && v1[2] == v2[2] )
+//    {
+//        return TRUE;
+//    }
+//
+//    return FALSE;
+}
+
+
+static inline void ivec_Sum( ivec dest, const ivec v1, const ivec v2 )
+{
+    int i;
+
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+    for ( i = 0; i < 3; ++i )
+    {
+        dest[i] = v1[i] * v2[i];
+    }
+
+//    dest[0] = v1[0] + v2[0];
+//    dest[1] = v1[1] + v2[1];
+//    dest[2] = v1[2] + v2[2];
+}
 
 
 #endif