/* Weighted histogram analysis routines. */

#include "wham.h"

#define F_MAX 256
#define TOL 1.0e-5

int main(int argc, char *argv[])
{
   FILE *f_param, *f_hist, *f_free, *f_prob;
   char *param_file, **hist_file;
   int i, j, n_mol, n_bins, n_files, i_bin, keep_going,
      *n_samples, **histogram;
   double pi, deviation, energy, numerator, denominator, ratio,
      change, rel_change, param, param_min, param_max, param_incr,
      *prob, *prob_unc, *free, *free_unc, 
      *z, *z_new, *param_eq, *k_bias, **weight;

   /* Get command-line input, which consists of the name of the file
      containing input parameters. */
   if (argc != 2) {
      fprintf(stderr, "Usage: %s param_file\n", argv[0]);
      exit(1);
   }
   param_file = gmalloc((strlen(argv[1])+1) * sizeof(char));
   strcpy(param_file, argv[1]);

   printf("\nWeighted histogram analysis program\n");
   fflush(NULL);

   /* Read default parameters from defaults file. */
   f_param = gfopen(param_file, "r");
   fscanf(f_param, "%d\n", &n_files);
   hist_file = allocate_2d_array(n_files, F_MAX + 1, sizeof(char));
   for (i = 0; i < n_files; ++i)
      fscanf(f_param, "%s\n", hist_file[i]);
   fclose(f_param);

   /* Read in histograms and calculate weights. */
   n_mol = 400;
   n_bins = 2000;
   n_samples = allocate_1d_array(n_files, sizeof(int));
   for (i = 0; i < n_files; ++i)
      n_samples[i] = 1000000;
   histogram = allocate_2d_array(n_files, n_bins, sizeof(int));
   weight = allocate_2d_array(n_files, n_bins, sizeof(double));
   param_eq = allocate_1d_array(n_files, sizeof(double));
   k_bias = allocate_1d_array(n_files, sizeof(double));
   pi = acos(-1.0);
   for (i = 0; i < n_files; ++i) {
      param_eq[i] = cos(5.0 * i * pi / 180.0);
      k_bias[i] = 50000;
   }
   k_bias[0] = 0.0;
   for (i = 0; i < n_files; ++i) {
      f_hist = gfopen(hist_file[i], "r");
      for (i_bin = 0; i_bin < n_bins; ++i_bin) {
         fscanf(f_hist, "%lf %d\n", &param, &histogram[i][i_bin]);
         deviation = param - param_eq[i];
         energy = 0.5 * k_bias[i] * SQR(deviation);
         weight[i][i_bin] = exp(-energy);
      }
      fclose(f_hist);
   }

   /* Initialize partition functions. */
   z = allocate_1d_array(n_files, sizeof(double));
   z_new = allocate_1d_array(n_files, sizeof(double));
   for (i = 0; i < n_files; ++i)
      z[i] = 1.0;

   /* Iteratively estimate partition functions until self-consistency is
      achieved. */
   do {

      /* Zero new estimates of partition functions. */
      for (i = 1; i < n_files; ++i)
         z_new[i] = 0.0;

      /* Loop over bins. */
      for (i_bin = 0; i_bin < n_bins; ++i_bin) {

         /* Calculate integrand and add contributions to estimated
            partition functions. */
         numerator = 0.0;
         denominator = 0.0;
         for (j = 0; j < n_files; ++j) {
            numerator += histogram[j][i_bin];
            denominator += weight[j][i_bin] * n_samples[j] / z[j];
         }
         ratio = numerator / denominator;
         for (i = 1; i < n_files; ++i)
            z_new[i] += weight[i][i_bin] * ratio;
      }

      /* Test for convergence. */
      keep_going = 0;
      for (i = 1; i < n_files; ++i) {
         change = z_new[i] - z[i];
         rel_change = change / z[i];
         if (ABS(rel_change) > TOL) {
            keep_going = 1;
            break;
         }
      }
for (i = 1; i < n_files; ++i)
   printf("%d %g %g\n", i, z[i], z_new[i]);
fflush(NULL);

      /* Update estimates. */
      for (i = 1; i < n_files; ++i)
         z[i] = z_new[i];

   } while (keep_going);

   /* Calculate best estimate of probability distribution. */
   prob = allocate_1d_array(n_bins, sizeof(double));
   prob_unc = allocate_1d_array(n_bins, sizeof(double));
   free = allocate_1d_array(n_bins, sizeof(double));
   free_unc = allocate_1d_array(n_bins, sizeof(double));
   for (i_bin = 0; i_bin < n_bins; ++i_bin) {
      numerator = 0.0;
      denominator = 0.0;
      for (j = 0; j < n_files; ++j) {
         numerator += histogram[j][i_bin];
         denominator += weight[j][i_bin] * n_samples[j] / z[j];
      }
      ratio = numerator / denominator;
      prob[i_bin] = ratio / z[0];
      prob_unc[i_bin] = sqrt(prob[i_bin] / denominator);
   }

   /* Calculate free energy per molecule. */
   for (i_bin = 0; i_bin < n_bins; ++i_bin)
      if (prob[i_bin] > 0.0) {
         free[i_bin] = - log(prob[i_bin]) / n_mol;
         free_unc[i_bin] = prob_unc[i_bin] / prob[i_bin];
      }
      else {
         free[i_bin] = 0.0;
         free_unc[i_bin] = 0.0;
      }

   /* Write results to output files. */
   f_free = gfopen("free.wham", "w");
   f_prob = gfopen("prob.wham", "w");
   param_min = 0.0;
   param_max = 1.0;
   param_incr = (param_max - param_min) / n_bins;
   for (i_bin = 0; i_bin < n_bins; ++i_bin) {
      if (prob[i_bin] > 0.0) {
         param = param_min + (i_bin + 0.5) * param_incr;
         fprintf(f_free, "%g %g %g\n", param, free[i_bin], free_unc[i_bin]);
         fprintf(f_prob, "%g %g %g\n", param, prob[i_bin], prob_unc[i_bin]);
      }
   }
   fclose(f_free);
   fclose(f_prob);

   exit(0);
}

#undef F_MAX
#undef TOL
