/* some of the following code was modified by J. M. Lees from
   the standard GMT distribution software:
   The modifications mainly involved isolating
   the gridding and contouring from the gmt modules
   so they code be incorporated in Xmap

   Information for installing GMT 2.1.4
   @(#)README	2.16 8/16/93 
   Copyright (c) 1991-1993, P. Wessel & W. H. F. Smith
*/

/* --- feature switches --- */

#define _POSIX_SOURCE 1

/* --- system headers --- */

#include <string.h>

/* --- local headers --- */

#include "geotouch.h"
#include "contour.h"

/* --- typedefs and global variables --- */

/* Data point and index to node it currently constrains  */

struct DATA
  {
  float x;
  float y;
  float z;
  int index;
  };          

/* Coefficients in Taylor series for Laplacian(z) a la I. C. geotouch.hBriggs (1974) */

struct BRIGGS
  {
  double b[6];
  };

/* Used to find top ten list of faster grid dimensions -*/

struct SUGGESTION
  {
  int nx;
  int ny;
  double factor; /* Speed up by a factor of factor  */
  };

#define MAX_GRID_DIM 200

/*
struct SURF_GRD
  {
  double xmin;
  double xmax;
  double ymin;
  double ymax;
  double xinc;
  double yinc;
  int nx;
  int ny;
  float val[MAX_GRID_DIM*MAX_GRID_DIM];
  };
*/

#define OUTSIDE 2000000000      /* Index number indicating data is outside useale area */


/* --- function prototypes --- */

int remove_planar_trend(struct DATA *data);
void set_grid_parameters(void);
int throw_away_unusables(struct DATA *data);
int rescale_z_values(struct DATA *data);
int gcd_euclid(int a, int b);
int get_prime_factors(int n, int f[]);
void smart_divide(void);
void set_offset(void);
void set_index (struct DATA *data);
void set_coefficients(void);
void find_nearest_point(struct DATA *data);
int iterate(int mode);
void fill_in_forecast(void);
int replace_planar_trend(void);
void write_output(struct SURF_GRD *s_grd);
void suggest_sizes_for_surface(int nx, int ny, int *new_lees_nx,
			       int *new_lees_ny);
struct DATA *read_data(PT *datapt, int lg);

/* --- functions --- */

void print_data(PT *dpt, int l, char *file)
  {
  FILE *fp;
  int i;
  
  fprintf(stderr,"in print_data: length=%d\n", l);
  fp = fopen(file, "w");
  for(i = 0; i < l; i++)
    fprintf(fp,"%11f%11f%11f\n", dpt[i].x, dpt[i].y, dpt[i].z);
  fclose(fp);
  }

struct SURF_GRD *surf_init(double xmin, double xmax, double ymin,
			   double ymax, double xinc, double yinc,
			   int nx, int ny)
  {
  struct SURF_GRD *surf_grd;
  
  if(!(surf_grd = (struct SURF_GRD *)malloc(sizeof(struct SURF_GRD))))
    {
    fprintf(stderr, "malloc error\n");
    exit(1);
    }
  surf_grd->xmin = xmin;
  surf_grd->xmax = xmax;
  surf_grd->ymin = ymin;
  surf_grd->ymax = ymax;
  surf_grd->xinc = xinc;
  surf_grd->yinc = yinc;
  surf_grd->nx = nx;
  surf_grd->ny = ny;
  return(surf_grd);
  }

struct SURF_GRD *surf(PT *datapt, int length, double Ix, double Iy,
		      double Rw, double Re, double Rs, double Rn,
		      double A, double C, double N, double T, double Z)
  {
  struct SURF_GRD *S_grd;
  struct DATA *data;
    
  xmin = Rw, xmax = Re, ymin = Rs, ymax = Rn;
  epsilon = A;                               /* aspect ratio, default: 1 */
  converge_limit = C;                        /* conv. limit, default: 0 */
  xstep = Ix, ystep = Iy;
  max_iterations = N;                        /* get # of iteration from N */
  tension = T;            
  relax_new = Z;                             /* parameter for accelerating */

  xinc = (xmax - xmin) / xstep;
  yinc = (ymax - ymin) / ystep;

  if(xmin >= xmax || ymin >= ymax || xinc <= 0.0 || yinc <= 0.0)
    {
    fprintf(stderr,"error in xmin,xmax,ymin,ymax\n");
    exit(-1);
    }

  if(tension != 0.0)
    boundary_tension = interior_tension = tension;

  relax_old = 1.0 - relax_new;
  nx = xstep + 1;         /* number of nodes on x axis */
  ny = ystep + 1;




#if 0
  /* New stuff here for v4.3:  Check out the grid dimensions:  */
  grid = gcd_euclid(nx-1, ny-1);             /* gcd of nx-1 and ny-1 */

  if (grid == 1) {
    fprintf(stderr,
    suggest_sizes_for_surface(nx-1, ny-1, &new_lees_nx, &new_lees_ny);
/*    nx = new_lees_nx;
    ny = new_lees_ny;
*/
	    "surface:  WARNING:  Your grid dimensions %d %d are mutually prime.\n",nx,ny);

 /*     if(new_lees_nx > 0) nx = new_lees_nx;
      if(new_lees_ny > 0) ny = new_lees_ny;
*/
    fprintf(stderr," new_lees_nx = %d new_lees_ny= %d\n", nx, ny);        
  }
#endif



  mx = nx + 4;
  my = ny + 4;
  r_xinc = 1.0 / xinc;
  r_yinc = 1.0 / yinc;
  
  S_grd=surf_init(xmin,xmax,ymin,ymax,xinc,yinc,nx,ny);

  /* New idea: set grid = 1, read data, setting index.  Then throw
     away data that can't be used in end game, constraining
     size of briggs->b[6] structure.  */

  grid = 1;
  set_grid_parameters();
  data = read_data(datapt, length);       
  throw_away_unusables(data);
  remove_planar_trend(data);
  rescale_z_values(data);
  
  /* Set up factors and reset grid to first value  */

  grid = gcd_euclid(nx - 1, ny - 1);

  n_fact = get_prime_factors(grid, factors);

  set_grid_parameters();

  while(block_nx < 4 || block_ny < 4)
    {
    smart_divide();
    set_grid_parameters();
    }

  set_offset();
  set_index(data);

  /* Now the data are ready to go for the first iteration.  */
  
  /* Allocate more space  */

  if(!(briggs = (struct BRIGGS *)malloc(npoints*sizeof(struct BRIGGS))))
    {
    fprintf(stderr, "malloc error\n");
    exit(1);
    }

  if(!(iu = (char *)malloc(mx * my * sizeof(char))))
    {
    fprintf(stderr, "malloc error\n");
    exit(1);
    }
  if(!(u = (float *) malloc(mx*my*sizeof(float))))
    {
    fprintf(stderr, "malloc error\n");
    exit(1);
    }
  
  set_coefficients();
  old_grid = grid;
  find_nearest_point (data);
  iterate(1);

  while(grid > 1)
    {
    smart_divide();
    set_grid_parameters();
    set_offset();
    set_index(data);
    fill_in_forecast();
    iterate(0);
    old_grid = grid;
    find_nearest_point(data);
    iterate(1);
    }

  replace_planar_trend();
  
  free((void *)briggs);
  free((void *)iu);

  write_output(S_grd);

  free((void *) u);
  free((void *)data);

  return(S_grd);
  }

void set_coefficients(void)
  {
  double e_4, loose, a0;

  loose = 1.0 - interior_tension;
  e_2 = sqr(epsilon);
  e_4 = sqr(e_2);
  eps_p2 = e_2;
  eps_m2 = inv(e_2);
  one_plus_e2 = 1.0 + e_2;
  two_plus_ep2 = 2.0 + 2.0 * eps_p2;
  two_plus_em2 = 2.0 + 2.0 * eps_m2;

  x_edge_const = 4 * one_plus_e2 - 2 * (interior_tension / loose);
  e_m2 = inv(e_2);
  y_edge_const = 4 * (1.0 + e_m2) - 2 * (interior_tension * e_m2 / loose);
  a0 = inv((6 * e_4 * loose + 10 * e_2 * loose + 8 * loose - 2 * one_plus_e2)
	   + 4*interior_tension*one_plus_e2);
  a0_const_1 = 2 * loose * (1.0 + e_4);
  a0_const_2 = 2.0 - interior_tension + 2 * loose * e_2;
	
  coeff[1][4] = coeff[1][7] = -loose;
  coeff[1][0] = coeff[1][11] = -loose * e_4;
  coeff[0][4] = coeff[0][7] = -loose * a0;
  coeff[0][0] = coeff[0][11] = -loose * e_4 * a0;
  coeff[1][5] = coeff[1][6] = 2 * loose * one_plus_e2;
  coeff[0][5] = coeff[0][6] = (2 * coeff[1][5] + interior_tension) * a0;
  coeff[1][2] = coeff[1][9] = coeff[1][5] * e_2;
  coeff[0][2] = coeff[0][9] = coeff[0][5] * e_2;
  coeff[1][1] = coeff[1][3] = coeff[1][8] = coeff[1][10] = -2 * loose * e_2;
  coeff[0][1] = coeff[0][3] = coeff[0][8] = coeff[0][10] = coeff[1][1] * a0;

  e_2 *= 2;		/* We will need these in boundary conditions  */
  e_m2 *= 2;
	
  ij_sw_corner = 2 * my + 2;	/*  Corners of array of actual data  */
  ij_se_corner = ij_sw_corner + (nx - 1) * my;
  ij_nw_corner = ij_sw_corner + (ny - 1);
  ij_ne_corner = ij_se_corner + (ny - 1);
  }

void set_offset(void)
  {
  int add_w[5], add_e[5], add_s[5], add_n[5], add_w2[5], add_e2[5],
    add_s2[5], add_n2[5], i, j, kase;

  add_w[0] = -my; add_w[1] = add_w[2] = add_w[3] = add_w[4] = -grid_east;
  add_w2[0] = -2 * my; 
  add_w2[1] = -my - grid_east;
  add_w2[2] = add_w2[3] = add_w2[4] = -2 * grid_east;
  add_e[4] = my;
  add_e[0] = add_e[1] = add_e[2] = add_e[3] = grid_east;
  add_e2[4] = 2 * my;
  add_e2[3] = my + grid_east;
  add_e2[2] = add_e2[1] = add_e2[0] = 2 * grid_east;
  add_n[4] = 1;
  add_n[3] = add_n[2] = add_n[1] = add_n[0] = grid;
  add_n2[4] = 2;
  add_n2[3] = grid + 1;
  add_n2[2] = add_n2[1] = add_n2[0] = 2 * grid;
  add_s[0] = -1;
  add_s[1] = add_s[2] = add_s[3] = add_s[4] = -grid;
  add_s2[0] = -2;
  add_s2[1] = -grid - 1;
  add_s2[2] = add_s2[3] = add_s2[4] = -2 * grid;

  for(i = 0, kase = 0; i < 5; i++)
    {
    for (j = 0; j < 5; j++, kase++)
      {
      offset[kase][0] = add_n2[j];
      offset[kase][1] = add_n[j] + add_w[i];
      offset[kase][2] = add_n[j];
      offset[kase][3] = add_n[j] + add_e[i];
      offset[kase][4] = add_w2[i];
      offset[kase][5] = add_w[i];
      offset[kase][6] = add_e[i];
      offset[kase][7] = add_e2[i];
      offset[kase][8] = add_s[j] + add_w[i];
      offset[kase][9] = add_s[j];
      offset[kase][10] = add_s[j] + add_e[i];
      offset[kase][11] = add_s2[j];
      }
    }
  }

/* Fills in bilinear estimates into new node locations
 *  after grid is divided.   
 */

void fill_in_forecast (void)
  {
  int i, j, ii, jj, index_0, index_1, index_2, index_3, index_new;
  double delta_x, delta_y, a0, a1, a2, a3, old_size;

  old_size = inv((double)old_grid);

  /* first do from southwest corner */

  for(i = 0; i < nx - 1; i += old_grid)
    {
    for(j = 0; j < ny - 1; j += old_grid)
      {
      /* get indices of bilinear square */

      index_0 = ij_sw_corner + i * my + j;
      index_1 = index_0 + old_grid * my;
      index_2 = index_1 + old_grid;
      index_3 = index_0 + old_grid;

      /* get coefficients */

      a0 = u[index_0];
      a1 = u[index_1] - a0;
      a2 = u[index_3] - a0;
      a3 = u[index_2] - a0 - a1 - a2;

      /* find all possible new fill ins */

      for(ii = i; ii < i + old_grid; ii += grid)
	{
	delta_x = (ii - i) * old_size;
	for (jj = j;  jj < j + old_grid; jj += grid)
	  {
	  index_new = ij_sw_corner + ii * my + jj;
	  if(index_new == index_0) continue;
	  delta_y = (jj - j) * old_size;
	  u[index_new] = a0 + a1 * delta_x + delta_y * ( a2 + a3 * delta_x);
	  iu[index_new] = 0;
	  }
	}
      iu[index_0] = 5;
      }
    }

  /* now do linear guess along east edge */

  for (j = 0; j < (ny - 1); j += old_grid)
    {
    index_0 = ij_se_corner + j;
    index_3 = index_0 + old_grid;
    for (jj = j;  jj < j + old_grid; jj += grid)
      {
      index_new = ij_se_corner + jj;
      delta_y = (jj - j) * old_size;
      u[index_new] = u[index_0] + delta_y * (u[index_3] - u[index_0]);
      iu[index_new] = 0;
      }
    iu[index_0] = 5;
    }

  /* now do linear guess along north edge */

  for(i = 0; i < (nx - 1); i += old_grid)
    {
    index_0 = ij_nw_corner + i * my;
    index_1 = index_0 + old_grid * my;
    for (ii = i;  ii < i + old_grid; ii += grid)
      {
      index_new = ij_nw_corner + ii * my;
      delta_x = (ii - i) * old_size;
      u[index_new] = u[index_0] + delta_x * (u[index_1] - u[index_0]);
      iu[index_new] = 0;
      }
    iu[index_0] = 5;
    }

  /* now set northeast corner to fixed and we're done */

  iu[ij_ne_corner] = 5;
  }

/* Routine for qsort to sort data structure for fast access to data by node
 * location. Sorts on index first, then on radius to node corresponding to
 * index, so that index goes from low to high, and so does radius.
 */

int compare_points(const void *a, const void *b)
  {
  int block_i, block_j, index_1, index_2;
  double x0, y0, dist_1, dist_2;
  struct DATA *point_1 = (struct DATA *)a, *point_2 = (struct DATA *)b;

  index_1 = point_1->index;
  index_2 = point_2->index;

  if(index_1 < index_2)	return (-1);
  else if(index_1 > index_2) return (1);
  else if(index_1 == OUTSIDE) return (0);
  else
    {
    /* Points are in same grid cell, find the one nearest to grid point */

    block_i = point_1->index / block_ny;
    block_j = point_1->index % block_ny;
    x0 = xmin + block_i * grid_xinc;
    y0 = ymin + block_j * grid_yinc;
    dist_1 = ((point_1->x - x0) * (point_1->x - x0) + (point_1->y - y0)
	      * (point_1->y - y0));
    dist_2 = ((point_2->x - x0) * (point_2->x - x0) + (point_2->y - y0)
	      * (point_2->y - y0));

    if(dist_1 < dist_2)	return (-1);
    else if(dist_1 > dist_2) return (1);
    else return (0);
    }
  }

  /* Divide grid by its largest prime factor */

void smart_divide(void)
  {
  grid /= factors[n_fact - 1];
  n_fact--;
  }

/* recomputes data[k].index for new value of grid,
 * sorts data on index and radii, and throws away
 * data which are now outside the useable limits.
 */

void set_index(struct DATA *data) 
  {
  int i, j, k, k_skipped = 0;

  for(k = 0; k < npoints; k++)
    {
    i = floor(((data[k].x - xmin) * r_grid_xinc) + 0.5);
    j = floor(((data[k].y - ymin) * r_grid_yinc) + 0.5);
    if(i < 0 || i >= block_nx || j < 0 || j >= block_ny)
      {
      data[k].index = OUTSIDE;
      k_skipped++;
      }
    else data[k].index = i * block_ny + j;
    }

  qsort((void *)data, (size_t)npoints, sizeof(struct DATA), compare_points);
  npoints -= k_skipped;
  }

void find_nearest_point(struct DATA *data)
  {
  int i, j, k, last_index = -1, block_i, block_j, iu_index, briggs_index;
  double x0, y0, dx, dy, xys, xy1, btemp, b0, b1, b2, b3, b4, b5;
  
  small = 0.05 * ((grid_xinc < grid_yinc) ? grid_xinc : grid_yinc);
  
  for(i = 0; i < nx; i += grid)	/* Reset grid info */
    for(j = 0; j < ny; j += grid)
      iu[ij_sw_corner + i * my + j] = 0;
  
  briggs_index = 0;
  for(k = 0; k < npoints; k++)		/* Find constraining value  */
    {
    if(data[k].index != last_index)
      {
      block_i = data[k].index / block_ny;
      block_j = data[k].index % block_ny;
      last_index = data[k].index;
      iu_index = ij_sw_corner + (block_i * my + block_j) * grid;
      x0 = xmin + block_i * grid_xinc;
      y0 = ymin + block_j * grid_yinc;
      dx = (data[k].x - x0) * r_grid_xinc;
      dy = (data[k].y - y0) * r_grid_yinc;
      if((fabs(dx) < small) && (fabs(dy) < small))
	{
	iu[iu_index] = 5;
	u[iu_index] = data[k].z;
	}
      else
	{
	if(dx >= 0.0)
	  {
	  if(dy >= 0.0) iu[iu_index] = 1;
	  else iu[iu_index] = 4;
	  }
	else
	  {
	  if(dy >= 0.0) iu[iu_index] = 2;
	  else iu[iu_index] = 3;
	  }

	dx = fabs(dx), dy = fabs(dy);
	btemp = 2 * one_plus_e2 / ((dx + dy) * (1.0 + dx + dy));
	b0 = 1.0 - 0.5 * (dx + (sqr(dx))) * btemp;
	b3 = 0.5 * (e_2 - (dy + sqr(dy)) * btemp);
	xys = 1.0 + dx + dy;
	xy1 = 1.0 / xys;
	b1 = (e_2 * xys - 4 * dy) * xy1;
	b2 = 2 * (dy - dx + 1.0) * xy1;
	b4 = b0 + b1 + b2 + b3 + btemp;
	b5 = btemp * data[k].z;
	briggs[briggs_index].b[0] = b0;
	briggs[briggs_index].b[1] = b1;
	briggs[briggs_index].b[2] = b2;
	briggs[briggs_index].b[3] = b3;
	briggs[briggs_index].b[4] = b4;
	briggs[briggs_index].b[5] = b5;
	briggs_index++;
	}
      }
    }
  }

void set_grid_parameters(void)
  {
			
  block_ny = (ny - 1) / grid + 1;
  block_nx = (nx - 1) / grid + 1;
  grid_xinc = grid * xinc;
  grid_yinc = grid * yinc;
  grid_east = grid * my;
  r_grid_xinc = 1.0 / grid_xinc;
  r_grid_yinc = 1.0 / grid_yinc;
  }

struct DATA *read_data(PT *datapt, int lg)
  {
  int i, j, k, l, ix, iy;
  double xn, yn, zz, zmin = 1.0e38, zmax = -1.0e38;
  struct DATA *data;

  if(!(data = (struct DATA *) malloc(MAX_PTS*sizeof(struct DATA))))
    {
    fprintf(stderr,"malloc error\n");
    exit(1);
    }
	
  ix = 0, iy = 1;              /* Set up which columns have x and y */
  l = 0;

  for(k = 0; k < lg; k++)
    {
    xn = datapt[k].x;
    yn = datapt[k].y;
    zz = datapt[k].z;
    if(nan(zz)) continue;

    i = floor(((xn - xmin) * r_grid_xinc) + 0.5);
    if(i < 0 || i >= block_nx) continue;
    j = floor(((yn - ymin) * r_grid_yinc) + 0.5);
    if(j < 0 || j >= block_ny) continue;
    data[l].index = i * block_ny + j;
    data[l].x = xn;
    data[l].y = yn;
    data[l].z = zz;
    if(zmin > zz) zmin = zz;
    if(zmax < zz) zmax = zz;
    l++;
    }
  npoints = l;
  if(converge_limit == 0.0) converge_limit = 0.001 * z_scale; 
  return(data);
  }

void write_output(struct SURF_GRD *s_grd)
  {
  int index, i, j;

  index = ij_sw_corner;
  for(i = 0; i < nx; i++, index += my)
    for (j = 0; j < ny; j++)
      s_grd->val[j * nx + i] = u[index + ny - j - 1];
  }
	
int iterate(int mode)
  {
  int i, j, k, ij, kase, briggs_index, x_case, y_case, x_w_case, x_e_case,
    y_s_case, y_n_case, iteration_count = 0;
  double current_limit = (converge_limit / grid), change, max_change = 0.0,
    busum, sum_ij, b0, b1, b2, b3, b4, b5,
    x_0_const = (4.0 * (1.0 - boundary_tension) / (2.0 - boundary_tension)),
    x_1_const = ((3 * boundary_tension - 2.0) / (2.0 - boundary_tension)),
    y_denom = (2 * epsilon * (1.0 - boundary_tension) + boundary_tension),
    y_0_const = (4 * epsilon * (1.0 - boundary_tension) / y_denom),
    y_1_const = ((boundary_tension - 2 * epsilon * (1.0 - boundary_tension))
		 / y_denom);

  do
    {
    briggs_index = 0;	/* Reset the constraint table stack pointer  */
    max_change = -1.0;

    /* Fill in auxiliary boundary values (in new way) */
		
    /* First set d2[]/dn2 = 0 along edges:  */
    /* New experiment : (1-T)d2[]/dn2 + Td[]/dn = 0  */

    for (i = 0; i < nx; i += grid)
      {
      /* set d2[]/dy2 = 0 on south side:  */
      ij = ij_sw_corner + i * my;
      /* u[ij - 1] = 2 * u[ij] - u[ij + grid];  */
      u[ij - 1] = y_0_const * u[ij] + y_1_const * u[ij + grid];
      /* set d2[]/dy2 = 0 on north side:  */
      ij = ij_nw_corner + i * my;
      /* u[ij + 1] = 2 * u[ij] - u[ij - grid];  */
      u[ij + 1] = y_0_const * u[ij] + y_1_const * u[ij - grid];
      }

    for(j = 0; j < ny; j += grid)
      {
      /* set d2[]/dx2 = 0 on west side:  */
      ij = ij_sw_corner + j;
      /* u[ij - my] = 2 * u[ij] - u[ij + grid_east];  */
      u[ij - my] = x_1_const * u[ij + grid_east] + x_0_const * u[ij];
      /* set d2[]/dx2 = 0 on east side:  */
      ij = ij_se_corner + j;
      /* u[ij + my] = 2 * u[ij] - u[ij - grid_east];  */
      u[ij + my] = x_1_const * u[ij - grid_east] + x_0_const * u[ij];
      }

    /* Now set d2[]/dxdy = 0 at each corner:  */

    ij = ij_sw_corner;
    u[ij - my - 1] = (u[ij + grid_east - 1] + u[ij - my + grid]
		      - u[ij + grid_east + grid]);

    ij = ij_nw_corner;
    u[ij - my + 1] = (u[ij + grid_east + 1] + u[ij - my - grid]
		      - u[ij + grid_east - grid]);

    ij = ij_se_corner;
    u[ij + my - 1] = (u[ij - grid_east - 1] + u[ij + my + grid]
		      - u[ij - grid_east + grid]);

    ij = ij_ne_corner;
    u[ij + my + 1] = (u[ij - grid_east + 1] + u[ij + my - grid]
		      - u[ij - grid_east - grid]);

    /* Now set (1-T)dC/dn + Tdu/dn = 0 at each edge :  */
    /* New experiment:  only dC/dn = 0  */

    x_w_case = 0;
    x_e_case = block_nx - 1;
    for(i = 0; i < nx; i += grid, x_w_case++, x_e_case--)
      {
      if(x_w_case < 2) x_case = x_w_case;
      else if(x_e_case < 2) x_case = 4 - x_e_case;
      else x_case = 2;

      /* South side :  */
      kase = x_case * 5;
      ij = ij_sw_corner + i * my;
      u[ij + offset[kase][11]] = (u[ij + offset[kase][0]] + eps_m2
				  * (u[ij + offset[kase][1]]
				     + u[ij + offset[kase][3]]
				     - u[ij + offset[kase][8]]
				     - u[ij + offset[kase][10]])
				  + two_plus_em2
				  * (u[ij + offset[kase][9]]
				     - u[ij + offset[kase][2]]));

      /* + tense * eps_m2 * (u[ij + offset[kase][2]]
	 - u[ij + offset[kase][9]]) / (1.0 - tense);  */

      /* North side :  */
      kase = x_case * 5 + 4;
      ij = ij_nw_corner + i * my;
      u[ij + offset[kase][0]] =	-(-u[ij + offset[kase][11]] + eps_m2
				  * (u[ij + offset[kase][1]]
				     + u[ij + offset[kase][3]]
				     - u[ij + offset[kase][8]]
				     - u[ij + offset[kase][10]])
				  + two_plus_em2
				  * (u[ij + offset[kase][9]]
				     - u[ij + offset[kase][2]]));

      /*  - tense * eps_m2 * (u[ij + offset[kase][2]]
	  - u[ij + offset[kase][9]]) / (1.0 - tense);  */
      }
		
    y_s_case = 0;
    y_n_case = block_ny - 1;
    for(j = 0; j < ny; j += grid, y_s_case++, y_n_case--)
      {
      if(y_s_case < 2) y_case = y_s_case;
      else if(y_n_case < 2) y_case = 4 - y_n_case;
      else y_case = 2;

      /* West side :  */
      kase = y_case;
      ij = ij_sw_corner + j;
      u[ij+offset[kase][4]] = (u[ij + offset[kase][7]] + eps_p2
			       * (u[ij + offset[kase][3]]
				  + u[ij + offset[kase][10]]
				  - u[ij + offset[kase][1]]
				  - u[ij + offset[kase][8]])
			       + two_plus_ep2
			       * (u[ij + offset[kase][5]]
				  - u[ij + offset[kase][6]]));

      /* + tense * (u[ij + offset[kase][6]]
	 - u[ij + offset[kase][5]]) / (1.0 - tense);  */

      /* East side :  */
      kase = 20 + y_case;
      ij = ij_se_corner + j;
      u[ij + offset[kase][7]] = -(-u[ij + offset[kase][4]] + eps_p2
				  * (u[ij + offset[kase][3]]
				     + u[ij + offset[kase][10]]
				     - u[ij + offset[kase][1]]
				     - u[ij + offset[kase][8]])
				  + two_plus_ep2
				  * (u[ij + offset[kase][5]]
				     - u[ij + offset[kase][6]]));

      /*  - tense * (u[ij + offset[kase][6]]
	  - u[ij + offset[kase][5]]) / (1.0 - tense);  */
      }

			
    /* That's it for the boundary points.  Now loop over all data  */

    x_w_case = 0;
    x_e_case = block_nx - 1;
    for(i = 0; i < nx; i += grid, x_w_case++, x_e_case--)
      {
      if(x_w_case < 2) x_case = x_w_case;
      else if(x_e_case < 2) x_case = 4 - x_e_case;
      else x_case = 2;

      y_s_case = 0;
      y_n_case = block_ny - 1;
      ij = ij_sw_corner + i * my;

      for(j = 0; j < ny; j += grid, ij += grid, y_s_case++, y_n_case--)
	{
	if(iu[ij] == 5) continue;	/* Point is fixed  */
	if(y_s_case < 2) y_case = y_s_case;
	else if(y_n_case < 2) y_case = 4 - y_n_case;
	else y_case = 2;

	kase = x_case * 5 + y_case;
	sum_ij = 0.0;
	if(iu[ij] == 0)		/* Point is unconstrained  */
	  {
	  for(k = 0; k < 12; k++)
	    {
	    sum_ij += (u[ij + offset[kase][k]] * coeff[0][k]);
	    }
	  }
	else				/* Point is constrained  */
	  {
	  b0 = briggs[briggs_index].b[0];
	  b1 = briggs[briggs_index].b[1];
	  b2 = briggs[briggs_index].b[2];
	  b3 = briggs[briggs_index].b[3];
	  b4 = briggs[briggs_index].b[4];
	  b5 = briggs[briggs_index].b[5];
	  briggs_index++;
	  if(iu[ij] < 3)
	    {
	    if(iu[ij] == 1)	/* Point is in quadrant 1  */
	      busum = (b0 * u[ij + offset[kase][10]]
		       + b1 * u[ij + offset[kase][9]]
		       + b2 * u[ij + offset[kase][5]]
		       + b3 * u[ij + offset[kase][1]]);
	    else	       	/* Point is in quadrant 2  */
	      busum = (b0 * u[ij + offset[kase][8]]
		       + b1 * u[ij + offset[kase][9]]
		       + b2 * u[ij + offset[kase][6]]
		       + b3 * u[ij + offset[kase][3]]);
	    }
	  else
	    {
	    if(iu[ij] == 3)	/* Point is in quadrant 3  */
	      busum = (b0 * u[ij + offset[kase][1]]
		       + b1 * u[ij + offset[kase][2]]
		       + b2 * u[ij + offset[kase][6]]
		       + b3 * u[ij + offset[kase][10]]);
	    else		/* Point is in quadrant 4  */
	      busum = (b0 * u[ij + offset[kase][3]]
		       + b1 * u[ij + offset[kase][2]]
		       + b2 * u[ij + offset[kase][5]]
		       + b3 * u[ij + offset[kase][8]]);
	    }

	  for(k = 0; k < 12; k++)
	    sum_ij += (u[ij + offset[kase][k]] * coeff[1][k]);

	  sum_ij = ((sum_ij + a0_const_2 * (busum + b5))
		    / (a0_const_1 + a0_const_2 * b4));
	  }

	/* New relaxation here  */
	sum_ij = u[ij] * relax_old + sum_ij * relax_new;

	change = fabs(sum_ij - u[ij]);
	u[ij] = sum_ij;
	if(change > max_change) max_change = change;
	}
      }
    iteration_count++;
    total_iterations++;
    max_change *= z_scale;	/* Put max_change into z units  */
    if(long_verbose)
      fprintf(stderr,"%4d\t%c\t%8d\t%10g\t%10g\t%10d\n",
	      grid, mode_type[mode], iteration_count, max_change,
	      current_limit, total_iterations);
    }
  while((max_change > current_limit) && (iteration_count < max_iterations));

  return(iteration_count);
  }

int remove_planar_trend(struct DATA *data)
  {
  int i;
  double a, b, c, d, xx, yy, zz, sx, sy, sz, sxx, sxy, sxz, syy, syz;

  sx = sy = sz = sxx = sxy = sxz = syy = syz = 0.0;

  for(i = 0; i < npoints; i++)
    {
    xx = (data[i].x - xmin) * r_xinc;
    yy = (data[i].y - ymin) * r_yinc;
    zz = data[i].z;
    
    sx += xx;
    sy += yy;
    sz += zz;
    sxx +=(xx * xx);
    sxy +=(xx * yy);
    sxz +=(xx * zz);
    syy +=(yy * yy);
    syz +=(yy * zz);
    }

  d = (npoints * sxx * syy + 2 * sx * sy * sxy
       - npoints * sxy * sxy - sx * sx * syy - sy * sy * sxx);

  if(d == 0.0)
    {
    plane_c0 = plane_c1 = plane_c2 = 0.0;
    return(0);
    }

  a = (sz * sxx * syy + sx * sxy * syz + sy * sxy * sxz
       - sz * sxy * sxy - sx * sxz * syy - sy * syz * sxx);
  b = (npoints * sxz * syy + sz * sy * sxy + sy * sx * syz
       - npoints * sxy * syz - sz * sx * syy - sy * sy * sxz);
  c = (npoints * sxx * syz + sx * sy * sxz + sz * sx * sxy
       - npoints * sxy * sxz - sx * sx * syz - sz * sy * sxx);

  plane_c0 = a / d;
  plane_c1 = b / d;
  plane_c2 = c / d;

  for(i = 0; i < npoints; i++)
    {
    xx = (data[i].x - xmin) * r_xinc;
    yy = (data[i].y - ymin) * r_yinc;
    data[i].z -= (plane_c0 + plane_c1 * xx + plane_c2 * yy);
    }

  return(0);
  }

int replace_planar_trend(void)
  {
  int i, j, ij;

  for(i = 0; i < nx; i++)
    {
    for(j = 0; j < ny; j++)
      {
      ij = ij_sw_corner + i * my + j;
      u[ij] = (u[ij] * z_scale) + (plane_c0 + plane_c1 * i + plane_c2 * j);
      }
    }
  return(0);
  }

/* This is a new routine to eliminate data which will become
 * unusable on the final iteration, when grid = 1.
 * It assumes grid = 1 and set_grid_parameters has been
 * called.  We sort, mark redundant data as OUTSIDE, and
 * sort again, chopping off the excess.
 *
 * Experimental modification 5 Dec 1988 by Smith, as part
 * of a new implementation using core memory for b[6]
 * coefficients, eliminating calls to temp file.
 */

int throw_away_unusables(struct DATA *data)
  {
  int last_index, n_outside, k;

  /* Sort the data  */

  qsort((void *)data, (size_t)npoints, sizeof(struct DATA), compare_points);

  /* If more than one datum is indexed to same node,
   * only the first should be kept. Mark the additional ones as OUTSIDE */

  last_index = -1;
  n_outside = 0;
  for(k = 0; k < npoints; k++)
    {
    if(data[k].index == last_index)
      {
      data[k].index = OUTSIDE;
      n_outside++;
      }
    else last_index = data[k].index;
    }

  /* Sort again; this time the OUTSIDE points will be thrown away  */

  qsort((void *)data, npoints, sizeof (struct DATA), compare_points);
  npoints -= n_outside;

  return(0);
  }

int rescale_z_values(struct DATA *data)
  {
  int i;
  double ssz = 0.0;

  for(i = 0; i < npoints; i++)
    ssz += (data[i].z * data[i].z);

  /* Set z_scale = rms(z):  */

  z_scale = sqrt(ssz / npoints);
  r_z_scale = 1.0 / z_scale;

  for (i = 0; i < npoints; i++) data[i].z *= r_z_scale;
  return(0);
  }

/* Routine to guess a number proportional to the operations
 * required by surface working on a user-desired grid of
 * size nx by ny, where nx = (xmax - xmin)/dx, and same for
 * ny.  (That is, one less than actually used in routine.)
 *
 * This is based on the following untested conjecture:
 * 	The operations are proportional to T = nxg*nyg*L,
 *	where L is a measure of the distance that data
 *	constraints must propagate, and nxg, nyg are the
 * 	current size of the grid.
 *	For nx,ny relatively prime, we will go through only
 * 	one grid cycle, L = max(nx,ny), and T = nx*ny*L.
 *	But for nx,ny whose greatest common divisor is a highly
 * 	composite number, we will have L equal to the division
 * 	step made at each new grid cycle, and nxg,nyg will
 * 	also be smaller than nx,ny.  Thus we can hope to find
 *	some nx,ny for which the total value of T is small.
 *
 * The above is pure speculation and has not been derived
 * empirically.  In actual practice, the distribution of the
 * data, both spatially and in terms of their values, will
 * have a strong effect on convergence.
 *
 * W. H. F. Smith, 26 Feb 1992.  */

double guess_surface_time(int nx, int ny)
  {
  int gcd;		/* Current value of the gcd  */
  int nxg, nyg;		/* Current value of the grid dimensions  */
  int nfactors;		/* Number of prime factors of current gcd  */
  int factor;  		/* Currently used factor  */

  /* Doubles are used below, even though the values will be integers,
     because the multiplications might reach sizes of O(n**3)  */

  double t_sum;		/* Sum of values of T at each grid cycle  */
  double length;	/* Current propagation distance.  */

  gcd = gcd_euclid(nx, ny);
  if(gcd > 1)
    {
    nfactors = get_prime_factors(gcd, factors);
    nxg = nx / gcd;
    nyg = ny / gcd;
    if(nxg < 3 || nyg < 3)
      {
      factor = factors[nfactors - 1];
      nfactors--;
      gcd /= factor;
      nxg *= factor;
      nyg *= factor;
      }
    }
  else
    {
    nxg = nx;
    nyg = ny;
    }
  length = max2(nxg, nyg);
  t_sum = nxg * (nyg * length);	/* Make it double at each multiply  */

  /* Are there more grid cycles ?  */

  while(gcd > 1)
    {
    factor = factors[nfactors - 1];
    nfactors--;
    gcd /= factor;
    nxg *= factor;
    nyg *= factor;
    length = factor;
    t_sum += nxg * (nyg * length);
    }
  return(t_sum);
  }

/* Calls guess_surface_time for a variety of trial grid
 * sizes, where the trials are highly composite numbers
 * with lots of factors of 2, 3, and 5.  The sizes are
 * within the range (nx,ny) - (2*nx, 2*ny).  Prints to
 * stderr the values which are an improvement over the
 * user's original nx,ny.
 * Should be called with nx=(xmax-xmin)/dx, and ditto
 * for ny; that is, one smaller than the lattice used
 * in surface.c
 *
 * W. H. F. Smith, 26 Feb 1992.
 */

void suggest_sizes_for_surface(int nx,int  ny, int *new_lees_nx,
			       int *new_lees_ny)
  {
  double users_time;			/* Time for user's nx, ny  */
  double current_time;			/* Time for current nxg, nyg  */
  int nxg, nyg;				/* Guessed by this routine  */
  int nx2, ny2, nx3, ny3, nx5, ny5;	/* For powers  */
  int xstop, ystop;			/* Set to 2*nx, 2*ny  */
  int n_sug = 0;			/* N of suggestions found  */
  struct SUGGESTION *sug = NULL;

  int compare_sugs(const void *, const void *);
  
  users_time = guess_surface_time(nx, ny);
  xstop = 2 * nx;
  ystop = 2 * ny;

  for(nx2 = 2; nx2 <= xstop; nx2 *= 2)
    {
    for(nx3 = 1; nx3 <= xstop; nx3 *= 3)
      {
      for(nx5 = 1; nx5 <= xstop; nx5 *= 5)
	{
	nxg = nx2 * nx3 * nx5;
	if(nxg < nx || nxg > xstop) continue;

	for(ny2 = 2; ny2 <= ystop; ny2 *= 2)
	  {
	  for(ny3 = 1; ny3 <= ystop; ny3 *= 3)
	    {
	    for(ny5 = 1; ny5 <= ystop; ny5 *= 5)
	      {
	      nyg = ny2 * ny3 * ny5;
	      if(nyg < ny || nyg > ystop) continue;

	      current_time = guess_surface_time(nxg, nyg);
	      if(current_time < users_time)
		{
		n_sug++;
		if(!(sug = XmapNewArray(struct SUGGESTION, n_sug)))
		  {
		  fprintf(stderr, "malloc error\n");
		  exit(1);
		  }
		sug[n_sug - 1].nx = nxg;
		sug[n_sug - 1].ny = nyg;
		sug[n_sug - 1].factor = users_time/current_time;
		/* if(sug[i].nx != 0) *new_lees_nx = sug[i].nx; */
		/* if(sug[i].ny != 0) *new_lees_ny = sug[i].ny; */
		}
	      }
	    }
	  }
	}
      }
    }

  if(n_sug)
    {
    qsort((void *)sug, (size_t)n_sug, sizeof(struct SUGGESTION), compare_sugs);

    /*
       for(i = 0; i < n_sug && i < 10; i++)
       {
       fprintf(stderr, "surface:  HINT:  Choosing nx = %ld, ny = %ld "
       "might cut run time by a factor of %.8lg\n",
       sug[i].nx, sug[i].ny, sug[i].factor);
       }
       */

    free((void *)sug);
    }
  else
    fprintf(stderr, "surface: Cannot suggest any nx,ny"
	    " better than your -R -I define.\n");
  }

int compare_sugs(const void *a, const void *b)
  {
  struct SUGGESTION *point_1 = (struct SUGGESTION *)a,
    *point_2 = (struct SUGGESTION *)b;

  /* Sorts sugs into DESCENDING order! */

  if(point_1->factor < point_2->factor) return(1);
  else if(point_1->factor > point_2->factor) return(-1);
  else return(0);
  }

/* Fills the integer array f with the prime factors of n.
 * Returns the number of locations filled in f, which is
 * one if n is prime.
 *
 * f[] should have been malloc'ed to enough space before
 * calling prime_factors().  We can be certain that f[32]
 * is enough space, for if n fits in a long, then n < 2**32,
 * and so it must have fewer than 32 prime factors.  I think
 * that in general, ceil(log2((double)n)) is enough storage
 * space for f[].
 *
 * Tries 2,3,5 explicitly; then alternately adds 2 or 4
 * to the previously tried factor to obtain the next trial
 * factor.  This is done with the variable two_four_toggle.
 * With this method we try 7,11,13,17,19,23,25,29,31,35,...
 * up to a maximum of sqrt(n).  This shortened list results
 * in 1/3 fewer divisions than if we simply tried all integers
 * between 5 and sqrt(n).  We can reduce the size of the list
 * of trials by an additional 20% by removing the multiples
 * of 5, which are equal to 30m +/- 5, where m >= 1.  Starting
 * from 25, these are found by alternately adding 10 or 20.
 * To do this, we use the variable ten_twenty_toggle.
 *
 * W. H. F. Smith, 26 Feb 1992, after D.E. Knuth, vol. II 
 */

int get_prime_factors(int n, int f[])
  {
  int current_factor;	/* The factor currently being tried  */
  int max_factor;	/* Don't try any factors bigger than this  */
  int n_factors = 0;	/* Returned; one if n is prime  */
  int two_four_toggle = 0; /* Used to add 2 or 4 to get next trial factor  */
  int ten_twenty_toggle = 0;	/* Used to add 10 or 20 to skip_five  */
  int skip_five = 25;	/* Used to skip multiples of 5 in the list  */
  int m;		/* Used to keep a working copy of n  */


  /* Initialize m and max_factor  */

  m = abs(n);
  if(m < 2) return(0);
  max_factor = floor(sqrt((double)m));

  /* First find the 2s  */
  current_factor = 2;
  while(!(m % current_factor))
    {
    m /= current_factor;
    f[n_factors] = current_factor;
    n_factors++;
    }
  if(m == 1) return(n_factors);

  /* Next find the 3s  */
  current_factor = 3;
  while(!(m % current_factor))
    {
    m /= current_factor;
    f[n_factors] = current_factor;
    n_factors++;
    }
  if(m == 1) return(n_factors);

  /* Next find the 5s  */
  current_factor = 5;
  while(!(m % current_factor))
    {
    m /= current_factor;
    f[n_factors] = current_factor;
    n_factors++;
    }
  if (m == 1) return(n_factors);

  /* Now try all the rest  */

  while((m > 1) && (current_factor <= max_factor))
    {
    /* Current factor is either 2 or 4 more than previous value  */

    if(two_four_toggle)
      {
      current_factor += 4;
      two_four_toggle = 0;
      }
    else
      {
      current_factor += 2;
      two_four_toggle = 1;
      }

    /* If current factor is a multiple of 5, skip it.  But first,
       set next value of skip_five according to 10/20 toggle:  */

    if(current_factor == skip_five)
      {
      if(ten_twenty_toggle)
	{
	skip_five += 20;
	ten_twenty_toggle = 0;
	}
      else
	{
	skip_five += 10;
	ten_twenty_toggle = 1;
	}
      continue;
      }

    /* Get here when current_factor is not a multiple of 2,3 or 5:  */

    while(!(m % current_factor))
      {
      m /= current_factor;
      f[n_factors] = current_factor;
      n_factors++;
      }
    }

  /* Get here when all factors up to floor(sqrt(n)) have been tried.  */

  if(m > 1)
    {
    /* m is an additional prime factor of n  */
    f[n_factors] = m;
    n_factors++;
    }
  return(n_factors);
  }

/* gcd_euclid.c  Greatest common divisor routine
 * Returns the greatest common divisor of u and v by Euclid's method.
 * I have experimented also with Stein's method, which involves only
 * subtraction and left/right shifting; Euclid is faster, both for
 * integers of size 0 - 1024 and also for random integers of a size
 * which fits in a long integer.  Stein's algorithm might be better
 * when the integers are HUGE, but for our purposes, Euclid is fine.
 *
 * Walter H. F. Smith, 25 Feb 1992, after D. E. Knuth, vol. II 
 */

int gcd_euclid(int a, int b)
  {
  int u,v,r;

  u = max2(sgn(a), sgn(b));
  v = min2(sgn(a), sgn(b));

  while(v > 0)
    {
    r = u % v;	/* Knuth notes that u < 2v 40% of the time;  */
    u = v;	/* thus we could have tried a subtraction  */
    v = r;	/* followed by an if test to do r = u%v  */
    }
  return(u);
  }
