
/*------------------------------------------------------------------------------

   icm.cpp
   computes the Coulomb integral within an image charge model
   written by G. Samsonidze (January 2010)

   Based on surface.cpp for generating an isosurface of the scalar field.

   The following common objects are used from Common/wfn_utils.cpp
   macros: MAX
   structures: CARTESIAN, ELEMENT
   constants: MAXCHAR, EPS9, INF9, BOHR, vertex, periodic_table
   functions: lowercase, erasechar, parse, cub_read, xsf_read, cell_set,
   normal_make, box_check, scalar_clone, inversion, isovalue_scale

   The Coulomb integral is from J. B. Neaton, M. S. Hybertsen, and S. G. Louie,
   Renormalization of Molecular Electronic Levels at Metal-Molecule Interfaces,
   Phys. Rev. Lett. 97, 216405 (2006).

   If the wavefunction overlaps with the image plane 1 / | r - r' | is averaged
   over r' in the grid cell by Monte Carlo algorithm. make sure two of the
   lattice vectors lie in the image plane and the third one is normal to it,
   otherwise the number of (r,r') pairs to be averaged increases significantly
   and the code will refuse to run.

--------------------------------------------------------------------------------

   Input is read from file icm.inp   |   example (HOMO of benzene)

   inputfilename file                |   inputfilename C6H6.b_15.cube
   inputfileformat cube|xsf          |   inputfileformat cube
   threshold [0.0,1.0)               |   threshold 0.99
   threshold_power 0|1|2             |   threshold_power 1
   coulomb_power 1|2                 |   coulomb_power 1
   mirrorplaneorigin                 |   mirrorplaneorigin
   <mpox> <mpoy> <mpoz>              |   0.0 0.0 -2.0
   mirrorplanenormal                 |   mirrorplanenormal
   <mpnx> <mpny> <mpnz>              |   0.0 0.0 1.0
   mirrorplaneunit bohr|angstrom     |   mirrorplaneunit angstrom
   uc T|F                            |   uc F
   uco                               |   uco
   <ucox> <ucoy> <ucoz>              |   0.0 0.0 0.0
   ucv                               |   ucv
   <ucv1x> <ucv1y> <ucv1z>           |   1.0 0.0 0.0
   <ucv2x> <ucv2y> <ucv2z>           |   0.0 1.0 0.0
   <ucv3x> <ucv3y> <ucv3z>           |   0.0 0.0 1.0
   ucu bohr|angstrom|latvec          |   ucu latvec
   sc T|F                            |   sc T
   sco                               |   sco
   <scox> <scoy> <scoz>              |   -0.5 -0.5 -0.5
   scv                               |   scv
   <scv1x> <scv1y> <scv1z>           |   1.0 0.0 0.0
   <scv2x> <scv2y> <scv2z>           |   0.0 1.0 0.0
   <scv3x> <scv3y> <scv3z>           |   0.0 0.0 1.0
   scu bohr|angstrom|latvec          |   scu latvec

--------------------------------------------------------------------------------

In the above example, the HOMO wavefunction of benzene is read from 
Gaussian Cube file. The wavefunction is placed in the center of the 
supercell (see the meaning of uc, uco, ucv, ucu, sc, sco, scv, scu
parameters in Visual/surface.cpp). The parts of the wavefunction 
outside an isosurface that contains 99% of the charge density are 
dropped (parameters threshold and threshold_power have the same 
meaning as isovalue and power in Visual/surface.cpp). Parameter 
coulomb_power tells the code whether the wavefunction in the Coulomb 
integral needs to be squared. Set both powers to 1 if the wavefunction 
file contains the squared amplitude as produced by ESPRESSO, and to 2 
for the linear amplitude as in PARATEC or SIESTA. The mirror plane 
is defined by parameters mirrorplaneorigin and mirrorplanenormal, 
in the above example it is parallel to the xy plane crossing the 
z axis at -2 Angstrom.

------------------------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <limits.h>
//#include <time.h>
#include "wfn_utils.h"

#ifdef PARA
#include "mpi.h"
#endif

const int NCELL = 2;
const int NRAND = 2500000;
const double HARTREE = 27.21138505;

// wtf, these are global variables!!
CARTESIAN *crand = NULL;
double ravg[2*NCELL+1][2*NCELL+1][2*NCELL+1];

int par_read(const char *pfn, char *ifn, char *iff, double *threshold, int *threshold_power, int *coulomb_power, CARTESIAN *mpo, CARTESIAN *mpn, int *mpf, bool *uc, CARTESIAN *uco, CARTESIAN *ucv, int *ucf, bool *sc, CARTESIAN *sco, CARTESIAN *scv, int *scf)
{
   int i, ierr, icount = 0, icheck = 0;
   char s1[MAXCHAR], s2[MAXCHAR], s3[MAXCHAR];
   char* trash;
   FILE *h;

   h = fopen(pfn, "r");
   if (h == NULL)
      return -1;

   while (!feof(h))
   {
      strncpy(s1, "\0", 1);
      trash = fgets(s1, MAXCHAR, h);
      parse(s1, s2, s3);
      lowercase(s2);
      if (strcmp(s2, "inputfilename") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         strncpy(ifn, s3, MAXCHAR);
         ifn[MAXCHAR - 1] = '\0';
      }
      else if (strcmp(s2, "inputfileformat") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         strncpy(iff, s3, MAXCHAR);
         iff[MAXCHAR - 1] = '\0';
      }
      else if (strcmp(s2, "threshold") == 0)
      {
         icount++;
         *threshold = atof(s3);
         if (*threshold < -EPS9 || *threshold > 1.0 - EPS9)
            icheck--;
      }
      else if (strcmp(s2, "threshold_power") == 0)
      {
         icount++;
         *threshold_power = atoi(s3);
         if (*threshold_power < 0 || *threshold_power > 2)
            icheck--;
      }
      else if (strcmp(s2, "coulomb_power") == 0)
      {
         icount++;
         *coulomb_power = atoi(s3);
         if (*coulomb_power < 1 || *coulomb_power > 2)
            icheck--;
      }
      else if (strcmp(s2, "mirrorplaneorigin") == 0)
      {
         icount++;
         ierr = fscanf(h, "%le%le%le\n", &mpo->x, &mpo->y, &mpo->z);
      }
      else if (strcmp(s2, "mirrorplanenormal") == 0)
      {
         icount++;
         ierr = fscanf(h, "%le%le%le\n", &mpn->x, &mpn->y, &mpn->z);
      }
      else if (strcmp(s2, "mirrorplaneunit") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         lowercase(s3);
         if (strcmp(s3, "bohr") == 0)
            *mpf = 0;
         else if (strcmp(s3, "angstrom") == 0)
            *mpf = 1;
         else
            icheck--;
      }
      else if (strcmp(s2, "uc") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         lowercase(s3);
         if (strcmp(s3, "f") == 0 || strcmp(s3, "false") == 0 || strcmp(s3, "n") == 0 || strcmp(s3, "no") == 0)
            *uc = false;
         else if (strcmp(s3, "t") == 0 || strcmp(s3, "true") == 0 || strcmp(s3, "y") == 0 || strcmp(s3, "yes") == 0)
            *uc = true;
         else
            icheck--;
      }
      else if (strcmp(s2, "uco") == 0)
      {
         icount++;
         ierr = fscanf(h, "%le%le%le\n", &uco->x, &uco->y, &uco->z);
      }
      else if (strcmp(s2, "ucv") == 0)
      {
         icount++;
         for (i = 0; i < 3; i++)
            ierr = fscanf(h, "%le%le%le\n", &ucv[i].x, &ucv[i].y, &ucv[i].z);
      }
      else if (strcmp(s2, "ucu") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         lowercase(s3);
         if (strcmp(s3, "bohr") == 0)
            *ucf = 0;
         else if (strcmp(s3, "angstrom") == 0)
            *ucf = 1;
         else if (strcmp(s3, "latvec") == 0)
            *ucf = 2;
         else
            icheck--;
      }
      else if (strcmp(s2, "sc") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         lowercase(s3);
         if (strcmp(s3, "f") == 0 || strcmp(s3, "false") == 0 || strcmp(s3, "n") == 0 || strcmp(s3, "no") == 0)
            *sc = false;
         else if (strcmp(s3, "t") == 0 || strcmp(s3, "true") == 0 || strcmp(s3, "y") == 0 || strcmp(s3, "yes") == 0)
            *sc = true;
         else
            icheck--;
      }
      else if (strcmp(s2, "sco") == 0)
      {
         icount++;
         ierr = fscanf(h, "%le%le%le\n", &sco->x, &sco->y, &sco->z);
      }
      else if (strcmp(s2, "scv") == 0)
      {
         icount++;
         for (i = 0; i < 3; i++)
            ierr = fscanf(h, "%le%le%le\n", &scv[i].x, &scv[i].y, &scv[i].z);
      }
      else if (strcmp(s2, "scu") == 0)
      {
         icount++;
         erasechar(s3, ' ');
         erasechar(s3, '\t');
         lowercase(s3);
         if (strcmp(s3, "bohr") == 0)
            *scf = 0;
         else if (strcmp(s3, "angstrom") == 0)
            *scf = 1;
         else if (strcmp(s3, "latvec") == 0)
            *scf = 2;
         else
            icheck--;
      }
   }

   ierr = fclose(h);
   if (ierr != 0)
      return -1;

   if (icount != 16 || icheck < 0)
      return -1;

   return 0;
}

int scalar_trunc(double threshold, int *ni, int *nj, int *nk, CARTESIAN *sco, const CARTESIAN *sfs)
{
   int i, j, k, imin, imax, jmin, jmax, kmin, kmax, ni0, nj0, nk0, ni1, nj1, nk1;
   double ***scalar0 = NULL;

   ni0 = *ni;
   nj0 = *nj;
   nk0 = *nk;

   imin = ni0;
   imax = -1;
   jmin = nj0;
   jmax = -1;
   kmin = nk0;
   kmax = -1;

   for (i = 0; i < ni0; i++)
   for (j = 0; j < nj0; j++)
   for (k = 0; k < nk0; k++)
      if (fabs(scalar[i][j][k]) > threshold)
      {
         if (i < imin)
            imin = i;
         if (i > imax)
            imax = i;
         if (j < jmin)
            jmin = j;
         if (j > jmax)
            jmax = j;
         if (k < kmin)
            kmin = k;
         if (k > kmax)
            kmax = k;
      }

   ni1 = imax - imin + 1;
   nj1 = jmax - jmin + 1;
   nk1 = kmax - kmin + 1;

   if (ni1 < 1 || nj1 < 1 || nk1 < 1)
      return -1;

   if (ni1 != ni0 || nj1 != nj0 || nk1 != nk0)
   {
      *ni = ni1;
      *nj = nj1;
      *nk = nk1;

      scalar0 = scalar;

      scalar = new double**[ni1];
      for (i = 0; i < ni1; i++)
         scalar[i] = new double*[nj1];
      for (i = 0; i < ni1; i++)
      for (j = 0; j < nj1; j++)
         scalar[i][j] = new double[nk1];

      for (i = 0; i < ni1; i++)
      for (j = 0; j < nj1; j++)
      for (k = 0; k < nk1; k++)
         scalar[i][j][k] = scalar0[i + imin][j + jmin][k + kmin];

      for (i = 0; i < ni0; i++)
      for (j = 0; j < nj0; j++)
         delete [] scalar0[i][j];
      for (i = 0; i < ni0; i++)
         delete [] scalar0[i];
      delete [] scalar0;

      sco->x += sfs[0].x * double(imin) + sfs[1].x * double(jmin) + sfs[2].x * double(kmin);
      sco->y += sfs[0].y * double(imin) + sfs[1].y * double(jmin) + sfs[2].y * double(kmin);
      sco->z += sfs[0].z * double(imin) + sfs[1].z * double(jmin) + sfs[2].z * double(kmin);
   }

   return 0;
}

int scalar_norm(int coulomb_power, int ni, int nj, int nk, const CARTESIAN *sfs, double *wfnorm)
{
   int i, j, k, ierr = 0;
   double v, w = 0.0;

   for (i = 0; i < ni; i++)
   for (j = 0; j < nj; j++)
   for (k = 0; k < nk; k++)
      if (coulomb_power == 1)
         scalar[i][j][k] = fabs(scalar[i][j][k]);
      else
         scalar[i][j][k] = scalar[i][j][k] * scalar[i][j][k];

   v = (sfs[0].x * (sfs[1].y * sfs[2].z - sfs[2].y * sfs[1].z) -
        sfs[0].y * (sfs[1].x * sfs[2].z - sfs[2].x * sfs[1].z) +
        sfs[0].z * (sfs[1].x * sfs[2].y - sfs[2].x * sfs[1].y))
     / (BOHR * BOHR * BOHR);

   if (v < EPS9)
      ierr = -1;

   for (i = 0; i < ni; i++)
   for (j = 0; j < nj; j++)
   for (k = 0; k < nk; k++)
         w += scalar[i][j][k];

   if (w < EPS9)
      ierr = -1;

   w *= v;
   *wfnorm = w;

   return ierr;
}

CARTESIAN mirror_transform(const CARTESIAN *in, const CARTESIAN *mpn)
{
  double p;
  CARTESIAN d;

  p = in->x * mpn->x + in->y * mpn->y + in->z * mpn->z;
  d.x = in->x - 2 * p * mpn->x;
  d.y = in->y - 2 * p * mpn->y;
  d.z = in->z - 2 * p * mpn->z;

  return d;
}

int mirror_plane(CARTESIAN *mpo, CARTESIAN *mpn, int mpf, const CARTESIAN *sco, const CARTESIAN *sfs, CARTESIAN *o1, CARTESIAN *v1, CARTESIAN *o2, CARTESIAN *v2)
{
   int i;
   double p;
   CARTESIAN a, d;

   p = mpn->x * mpn->x + mpn->y * mpn->y + mpn->z * mpn->z;
   if (p < EPS9)
      return -1;
   p = sqrt(p);
   if (fabs(1.0 - p) > EPS9)
   {
      mpn->x /= p;
      mpn->y /= p;
      mpn->z /= p;
   }

   if (mpf == 1)
   {
      mpo->x /= BOHR;
      mpo->y /= BOHR;
      mpo->z /= BOHR;
   }

   o1->x = sco->x / BOHR;
   o1->y = sco->y / BOHR;
   o1->z = sco->z / BOHR;
   for (i = 0; i < 3; i++)
   {
      v1[i].x = sfs[i].x / BOHR;
      v1[i].y = sfs[i].y / BOHR;
      v1[i].z = sfs[i].z / BOHR;
   }

   for (i = 0; i < 4; i++)
   {
      if (i == 0)
      {
         a.x = o1->x - mpo->x;
         a.y = o1->y - mpo->y;
         a.z = o1->z - mpo->z;
      }
      else
      {
         a.x = v1[i - 1].x;
         a.y = v1[i - 1].y;
         a.z = v1[i - 1].z;
      }

      d = mirror_transform(&a, mpn);

      if (i == 0)
      {
         o2->x = mpo->x + d.x;
         o2->y = mpo->y + d.y;
         o2->z = mpo->z + d.z;
      }
      else
      {
         v2[i - 1].x = d.x;
         v2[i - 1].y = d.y;
         v2[i - 1].z = d.z;
      }
   }

   return 0;
}

void set_cutoff(const CARTESIAN *v1, double *rcutoff, double *r2cutoff)
{
   int i;
   double l2, l2min = INF9;

   for (i = 0; i < 3; i++)
   {
      l2 = v1[i].x * v1[i].x + v1[i].y * v1[i].y + v1[i].z * v1[i].z;
      if (l2min > l2)
         l2min = l2;
   }
   *r2cutoff = l2min * double(NCELL * NCELL);
   *rcutoff = sqrt(*r2cutoff);
}

bool check_overlap(int rank, int ni, int nj, int nk, const CARTESIAN *o1, const CARTESIAN *v1, const CARTESIAN *o2, const CARTESIAN *v2, double rcutoff)
{
   bool flag_overlap = false;
   int i, nplus = 0, nminus = 0, iminus, ngrid, dplus = 0, dminus = 0;
   double vl1[3], vl2[3], vdotv, vcosv, vdotr, p[4], d[4];
   CARTESIAN r[4];

   for (i = 0; i < 3; i++)
   {
      vl1[i] = sqrt(v1[i].x * v1[i].x + v1[i].y * v1[i].y + v1[i].z * v1[i].z);
      vl2[i] = sqrt(v2[i].x * v2[i].x + v2[i].y * v2[i].y + v2[i].z * v2[i].z);
      vdotv = v1[i].x * v2[i].x + v1[i].y * v2[i].y + v1[i].z * v2[i].z;
      vcosv = vdotv / (vl1[i] * vl2[i]);
      if (fabs(vcosv - 1.0) < EPS9)
         nplus++;
      if (fabs(vcosv + 1.0) < EPS9)
      {
         nminus++;
         iminus = i;
      }
   }

   if (nplus == 2 && nminus == 1)
   {
      switch (iminus)
      {
         case 0:
            ngrid = ni;
         case 1:
            ngrid = nj;
         case 2:
            ngrid = nk;
      }

      r[0].x = o1->x;
      r[0].y = o1->y;
      r[0].z = o1->z;
      r[1].x = o1->x + v1[iminus].x * double(ngrid - 1);
      r[1].y = o1->y + v1[iminus].y * double(ngrid - 1);
      r[1].z = o1->z + v1[iminus].z * double(ngrid - 1);
      r[2].x = o2->x;
      r[2].y = o2->y;
      r[2].z = o2->z;
      r[3].x = o2->x + v2[iminus].x * double(ngrid - 1);
      r[3].y = o2->y + v2[iminus].y * double(ngrid - 1);
      r[3].z = o2->z + v2[iminus].z * double(ngrid - 1);

      for (i = 0; i < 4; i++)
      {
         vdotr = v1[iminus].x * r[i].x + v1[iminus].y * r[i].y + v1[iminus].z * r[i].z;
         p[i] = vdotr / vl1[iminus];
      }

      d[0] = p[0] - p[2];
      d[1] = p[0] - p[3];
      d[2] = p[1] - p[2];
      d[3] = p[1] - p[3];

      for (i = 0; i < 4; i++)
         if (d[i] > 0.0)
            dplus++;
         else
            dminus++;

      if (dplus == 4 || dminus == 4)
      {
         for (i = 0; i < 4; i++)
            if (fabs(d[i]) < rcutoff)
               flag_overlap = true;
      }
      else
        flag_overlap = true;
   }
   else
   {
      if (rank == 0)
         printf("    the image plane is not parallel/perpendicular to the lattice vectors\n    skipping wavefunction overlap check and Monte Carlo averaging\n    your job may fail if the wavefunction overlaps with its image\n\n");
   }

   return flag_overlap;
}

CARTESIAN grid_offset(const CARTESIAN *o1, const CARTESIAN *v1, const CARTESIAN *o2)
{
   int i, n;
   double lv2, vdotd;
   CARTESIAN d;

   d.x = o1->x - o2->x;
   d.y = o1->y - o2->y;
   d.z = o1->z - o2->z;

   for (i = 0; i < 3; i++)
   {
      lv2 = v1[i].x * v1[i].x + v1[i].y * v1[i].y + v1[i].z * v1[i].z;
      vdotd = v1[i].x * d.x + v1[i].y * d.y + v1[i].z * d.z;
      n = double_to_int(vdotd / lv2);

      d.x = d.x - v1[i].x * double(n);
      d.y = d.y - v1[i].y * double(n);
      d.z = d.z - v1[i].z * double(n);
   }

   return d;
}

void rand_init(int rank, const CARTESIAN *v2)
{
   int i, j, seed;
   //time_t seconds;
   double r[3];

#ifdef VERBOSE
   if (rank == 0)
      printf("    generating random numbers for Monte Carlo integration\n\n");
#endif

   crand = new CARTESIAN[NRAND];

   //   seconds = time(NULL);
   //   seed = (int)seconds;
   seed = 5000;
   srand(seed);

   for (i = 0; i < NRAND; i++)
   for (j = 0; j < 3; j++)
      rand();

   for (i = 0; i < NRAND; i++)
   {
      for (j = 0; j < 3; j++)
         r[j] = double(rand()) / double(RAND_MAX) - 0.5;

      crand[i].x = v2[0].x * r[0] + v2[1].x * r[1] + v2[2].x * r[2];
      crand[i].y = v2[0].y * r[0] + v2[1].y * r[1] + v2[2].y * r[2];
      crand[i].z = v2[0].z * r[0] + v2[1].z * r[1] + v2[2].z * r[2];
   }
}

void mc_average(int rank, int size, const CARTESIAN *offset, const CARTESIAN *v1)
{
   int i, j, k, l, lmin, lmax, navg;
   int nlocal[2*NCELL+1][2*NCELL+1][2*NCELL+1];
#ifdef PARA
   int ndummy[2*NCELL+1][2*NCELL+1][2*NCELL+1];
#endif
   double rinv, r2;
   double rlocal[2*NCELL+1][2*NCELL+1][2*NCELL+1];
#ifdef PARA
   double rdummy[2*NCELL+1][2*NCELL+1][2*NCELL+1];
#endif
   CARTESIAN d;

#ifdef VERBOSE
   if (rank == 0)
      printf("    averaging 1 / | r - r' | over r' in the grid cell\n\n");
#endif

   lmin = double_to_int(double(NRAND) * double(rank) / double(size));
   lmax = double_to_int(double(NRAND) * double(rank + 1) / double(size));

   for (i = -NCELL; i <= NCELL; i++)
   for (j = -NCELL; j <= NCELL; j++)
   for (k = -NCELL; k <= NCELL; k++)
   {
      d.x = offset->x + v1[0].x * double(i) + v1[1].x * double(j) + v1[2].x * double(k);
      d.y = offset->y + v1[0].y * double(i) + v1[1].y * double(j) + v1[2].y * double(k);
      d.z = offset->z + v1[0].z * double(i) + v1[1].z * double(j) + v1[2].z * double(k);

      navg = lmax - lmin;
      rinv = 0.0;
      for (l = lmin; l < lmax; l++)
      {
         r2 = (d.x - crand[l].x) * (d.x - crand[l].x) + (d.y - crand[l].y) * (d.y - crand[l].y) + (d.z - crand[l].z) * (d.z - crand[l].z);
         if (r2 > EPS9)
            rinv += 1.0 / sqrt(r2);
         else
            navg--;
      }
      nlocal[i+NCELL][j+NCELL][k+NCELL] = navg;
      rlocal[i+NCELL][j+NCELL][k+NCELL] = rinv;
   }

#ifdef PARA
   for (i = -NCELL; i <= NCELL; i++)
   for (j = -NCELL; j <= NCELL; j++)
      MPI_Allreduce(nlocal[i+NCELL][j+NCELL], ndummy[i+NCELL][j+NCELL], 2*NCELL+1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);

   for (i = -NCELL; i <= NCELL; i++)
   for (j = -NCELL; j <= NCELL; j++)
   for (k = -NCELL; k <= NCELL; k++)
      nlocal[i+NCELL][j+NCELL][k+NCELL] = ndummy[i+NCELL][j+NCELL][k+NCELL];

   for (i = -NCELL; i <= NCELL; i++)
   for (j = -NCELL; j <= NCELL; j++)
      MPI_Allreduce(rlocal[i+NCELL][j+NCELL], rdummy[i+NCELL][j+NCELL], 2*NCELL+1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD);

   for (i = -NCELL; i <= NCELL; i++)
   for (j = -NCELL; j <= NCELL; j++)
   for (k = -NCELL; k <= NCELL; k++)
      rlocal[i+NCELL][j+NCELL][k+NCELL] = rdummy[i+NCELL][j+NCELL][k+NCELL];
#endif

   for (i = -NCELL; i <= NCELL; i++)
   for (j = -NCELL; j <= NCELL; j++)
   for (k = -NCELL; k <= NCELL; k++)
      ravg[i+NCELL][j+NCELL][k+NCELL] = rlocal[i+NCELL][j+NCELL][k+NCELL] / double(nlocal[i+NCELL][j+NCELL][k+NCELL]);
}

double tab_average(const CARTESIAN *v1, const CARTESIAN *p1, const CARTESIAN *p2, const CARTESIAN *offset)
{
   bool f_offset, f_range;
   int i, n[3];
   double lv2, vdotd, rinv;
   CARTESIAN d;

   d.x = p1->x - p2->x - offset->x;
   d.y = p1->y - p2->y - offset->y;
   d.z = p1->z - p2->z - offset->z;

   for (i = 0; i < 3; i++)
   {
      lv2 = v1[i].x * v1[i].x + v1[i].y * v1[i].y + v1[i].z * v1[i].z;
      vdotd = v1[i].x * d.x + v1[i].y * d.y + v1[i].z * d.z;
      n[i] = double_to_int(vdotd / lv2);

      d.x = d.x - v1[i].x * double(n[i]);
      d.y = d.y - v1[i].y * double(n[i]);
      d.z = d.z - v1[i].z * double(n[i]);
   }

   f_offset = fabs(d.x) < EPS9 && fabs(d.y) < EPS9 && fabs(d.z) < EPS9;
   f_range = abs(n[0]) <= NCELL && abs(n[1]) <= NCELL && abs(n[2]) <= NCELL;
   if (f_offset && f_range)
      rinv = ravg[n[0]+NCELL][n[1]+NCELL][n[2]+NCELL];
   else
   {
     if(~f_offset) fprintf(stderr, "\nError: f_offset failed in tab_average.\n");
     if(~f_range)  fprintf(stderr, "\nError: f_range failed in tab_average.\n");
     rinv = -1.0;
   }

   return rinv;
}

int coulomb_integral(int rank, int size, int ni, int nj, int nk, const CARTESIAN *o1, const CARTESIAN *v1, const CARTESIAN *o2, 
		     const CARTESIAN *v2, const CARTESIAN *offset, double r2cutoff, double *energy)
{
  int i1, j1, k1, i2, j2, k2;
#ifdef VERBOSE
   int pnew, pold = -1;
#endif
   double r2, rinv, v, w = 0.0;
#ifdef PARA
   double rdummy;
#endif
   CARTESIAN p1, p2;

   v = v1[0].x * (v1[1].y * v1[2].z - v1[2].y * v1[1].z) -
       v1[0].y * (v1[1].x * v1[2].z - v1[2].x * v1[1].z) +
       v1[0].z * (v1[1].x * v1[2].y - v1[2].x * v1[1].y);

   if (v < EPS9)
   {
      fprintf(stderr, "\nError: cell volume = 0 in coulomb_integral\n\n");
      return -1;
   }

   for (i1 = 0; i1 < ni; i1++)
   for (j1 = 0; j1 < nj; j1++)
   for (k1 = 0; k1 < nk; k1++)
   {

#ifdef PARA
      if ((i1 * nj * nk + j1 * nk + k1) % size != rank)
         continue;
#endif

#ifdef VERBOSE
      if (rank == 0)
      {
         pnew = double_to_int(100.0 * double(i1 * nj * nk + j1 * nk + k1) / double(ni * nj * nk));
         if (pnew != pold)
         {
            printf("    completed %3i%%\n", pnew);
            pold = pnew;
         }
      }
#endif

      p1.x = o1->x + v1[0].x * double(i1) + v1[1].x * double(j1) + v1[2].x * double(k1);
      p1.y = o1->y + v1[0].y * double(i1) + v1[1].y * double(j1) + v1[2].y * double(k1);
      p1.z = o1->z + v1[0].z * double(i1) + v1[1].z * double(j1) + v1[2].z * double(k1);

      for (i2 = 0; i2 < ni; i2++)
      for (j2 = 0; j2 < nj; j2++)
      for (k2 = 0; k2 < nk; k2++)
      {
         p2.x = o2->x + v2[0].x * double(i2) + v2[1].x * double(j2) + v2[2].x * double(k2);
         p2.y = o2->y + v2[0].y * double(i2) + v2[1].y * double(j2) + v2[2].y * double(k2);
         p2.z = o2->z + v2[0].z * double(i2) + v2[1].z * double(j2) + v2[2].z * double(k2);

         r2 = (p1.x - p2.x) * (p1.x - p2.x) + (p1.y - p2.y) * (p1.y - p2.y) + (p1.z - p2.z) * (p1.z - p2.z);

         if (r2 > r2cutoff)
            rinv = 1.0 / sqrt(r2);
         else
            rinv = tab_average(v1, &p1, &p2, &(*offset));

         if (rinv < -EPS9)
	   {
	     fprintf(stderr, "\nError: rinv < 0 in coulomb_integral\n\n");
	     return -1;
	   }

         w += scalar[i1][j1][k1] * scalar[i2][j2][k2] * rinv;
      }
   }

#ifdef PARA
   MPI_Barrier(MPI_COMM_WORLD);
#endif

   w *= 0.5 * HARTREE * v * v;
#ifdef PARA
   MPI_Reduce(&w, &rdummy, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD);
   w = rdummy;
#endif
   *energy = w;

#ifdef VERBOSE
   if (rank == 0)
      printf("\n");
#endif

   return 0;
}

int terminate(int ierr, int ni, int nj)
{
   int i, j;

   if (as != NULL) delete [] as;
   if (ap != NULL) delete [] ap;

   if (scalar != NULL)
   {
      for (i = 0; i < ni; i++)
      for (j = 0; j < nj; j++)
         delete [] scalar[i][j];
      for (i = 0; i < ni; i++)
         delete [] scalar[i];
      delete [] scalar;
   }

   if (crand != NULL) delete [] crand;

#ifdef PARA
   if (ierr != 0)
      MPI_Abort(MPI_COMM_WORLD, ierr);
   MPI_Finalize();
#endif

return ierr;
}

int main(int argc, char *argv[])
{
   bool uc, sc, flag_overlap;
   int ucf, scf, mpf, threshold_power, coulomb_power, rank, size, i, j, k;
#ifdef PARA
   int idummy, info;
#endif
   int ierr = 0, na = 0, ni = 0, nj = 0, nk = 0;
   double threshold, wfnorm, energy, isovalue = 0.0, rcutoff, r2cutoff;
   char pfn[MAXCHAR] = "icm.inp";
   char ifn[MAXCHAR], iff[MAXCHAR];
   CARTESIAN sfo, sfv[3], sfs[3], uco, ucv[3], sco, scv[3], mpo, mpn, o1, v1[3], o2, v2[3], offset;

#ifdef PARA
   info = MPI_Init(&argc, &argv);
   if (info != MPI_SUCCESS)
   {
      fprintf(stderr, "\nMPI initialization failed\n\n");
      return terminate(-1, ni, nj);
   }
   info = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   info = MPI_Comm_size(MPI_COMM_WORLD, &size);
#else
   rank = 0;
   size = 1;
#endif

   if (rank == 0)
      ierr = par_read(pfn, ifn, iff, &threshold, &threshold_power, &coulomb_power, &mpo, &mpn, &mpf, &uc, &uco, ucv, &ucf, &sc, &sco, scv, &scf);
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr != 0)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed to read %s\n\n", pfn);
      return terminate(-1, ni, nj);
   }

   if (rank == 0)
   {
      if (strcmp(iff, "cube") == 0)
      {
         ierr = cub_read(ifn, &na, &ni, &nj, &nk, &sfo, sfv, sfs);
      }
      else if (strcmp(iff, "xsf") == 0)
      {
         ierr = xsf_read(ifn, &na, &ni, &nj, &nk, &sfo, sfv, sfs);
      }
      else
      {
         ierr = -2;
      }
   }
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr == -2)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: unrecognized input format %s\n\n", iff);
      return terminate(-1, ni, nj);
   }
   else if (ierr == -1)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed to read %s\n\n", ifn);
      return terminate(-1, ni, nj);
   }

   if (rank == 0)
      cell_set(sfo, sfv, uc, &uco, ucv, ucf, sc, &sco, scv, scf);

   if (rank == 0)
      if (sc)
         ierr = scalar_clone(&ni, &nj, &nk, &sfo, sfs, uco, ucv, sco, scv);
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr != 0)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed to build supercell\n\n");
      return terminate(-1, ni, nj);
   }

   if (rank == 0)
      if (threshold > EPS9)
      {
         isovalue = isovalue_scale(ni, nj, nk, threshold_power, threshold);
         ierr = scalar_trunc(isovalue, &ni, &nj, &nk, &sco, sfs);
      }
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr != 0)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed in scalar_trunc\n\n");
      return terminate(-1, ni, nj);
   }

   if (rank == 0)
      ierr = scalar_norm(coulomb_power, ni, nj, nk, sfs, &wfnorm);
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr != 0)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed in scalar_norm\n\n");
      return terminate(-1, ni, nj);
   }

   if (rank == 0)
      ierr = mirror_plane(&mpo, &mpn, mpf, &sco, sfs, &o1, v1, &o2, v2);
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr != 0)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed in mirror_plane\n\n");
      return terminate(-1, ni, nj);
   }

   if (rank == 0)
   {
      printf("\n    grid  = %i %i %i\n", ni, nj, nk);
      printf("    Origin and step vectors for r\n");
      printf("    o1    = %.6f %.6f %.6f\n", o1.x, o1.y, o1.z);
      for (i = 0; i < 3; i++)
         printf("    v1[%i] = %.6f %.6f %.6f\n", i, v1[i].x, v1[i].y, v1[i].z);
      printf("    Origin and step vectors for r'\n");
      printf("    o2    = %.6f %.6f %.6f\n", o2.x, o2.y, o2.z);
      for (i = 0; i < 3; i++)
         printf("    v2[%i] = %.6f %.6f %.6f\n", i, v2[i].x, v2[i].y, v2[i].z);
      printf("    wfn isovalue  = %.15f\n", isovalue);
      printf("    wfn norm      = %.15f\n\n", wfnorm);
   }

#ifdef PARA
   info = MPI_Bcast(&ni, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
   info = MPI_Bcast(&nj, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
   info = MPI_Bcast(&nk, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);

   info = MPI_Bcast(&o1, 3, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD);
   info = MPI_Bcast(v1, 9, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD);
   info = MPI_Bcast(&o2, 3, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD);
   info = MPI_Bcast(v2, 9, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD);

   if (rank != 0)
   {
      scalar = new double**[ni];
      for (i = 0; i < ni; i++)
         scalar[i] = new double*[nj];
      for (i = 0; i < ni; i++)
      for (j = 0; j < nj; j++)
         scalar[i][j] = new double[nk];
   }

   for (i = 0; i < ni; i++)
   for (j = 0; j < nj; j++)
      info = MPI_Bcast(scalar[i][j], nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD);
#endif

   set_cutoff(v1, &rcutoff, &r2cutoff);

   flag_overlap = check_overlap(rank, ni, nj, nk, &o1, v1, &o2, v2, rcutoff);

   if (flag_overlap)
   {
      offset = grid_offset(&o1, v1, &o2);
      rand_init(rank, v2);
      mc_average(rank, size, &offset, v1);
   }
   else
   {
      for (i = -NCELL; i <= NCELL; i++)
      for (j = -NCELL; j <= NCELL; j++)
      for (k = -NCELL; k <= NCELL; k++)
         ravg[i+NCELL][j+NCELL][k+NCELL] = -1.0;
   }

   ierr = coulomb_integral(rank, size, ni, nj, nk, &o1, v1, &o2, v2, &offset, r2cutoff, &energy);
#ifdef PARA
   info = MPI_Allreduce(&ierr, &idummy, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   ierr = idummy;
#endif
   if (ierr != 0)
   {
      if (rank == 0)
	fprintf(stderr, "\nError: failed in coulomb_integral with ierr = %i\n\n", ierr);
      return terminate(-1, ni, nj);
   }
   if (rank == 0)
      printf(" coulomb integral = %.15f eV\n\n", energy);

   return terminate(0, ni, nj);
}

