#include <time.h>       // clock() etc.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifdef _WIN32          // NOT the Unix environment
// Use a macro to get double seconds from clock()
#define cpuClock() ((double)clock()/CLOCKS_PER_SEC)
#else
#include "cpuTimes.c"
#endif
// Diagonal permutation sets solution and lowerLimit
#define MAX_SIZE 32
// Allow space for appending value to solution vector
int solution[MAX_SIZE+1], lowerLimit;
int size, benefit[MAX_SIZE][MAX_SIZE];
enum { FALSE, TRUE };
int DEBUG, FORK_TRACE;

#include <unistd.h>     // fork stuff
#include <sys/wait.h>   // to allow wait

#include <sys/types.h>  // required for low-level I/O
#include <fcntl.h>      // File control definitions, like O_RDWR

void explore(int, int*, int);
void forkRun(int*, int);

int main (int argc, char** argv)
{
   char* filename = argc == 1 ? "Asg5.in" : argv[1];
   int   nProcs = argc > 2 ? atoi(argv[2]) : 4;
   FILE *input = fopen(filename, "r");
   int antisum, row, col, k, *work;
   double start, elapsed;

   // Set global variables
   DEBUG = FALSE;
   FORK_TRACE = FALSE;
   lowerLimit = 0;

   if (input == NULL)
   {  printf("Failed to open %s\n", filename); exit(0);  }
   
   fscanf(input, "%d", &size);
   work = (int*) calloc(size, sizeof *work);
   antisum  = 0;  k = size;
   for (row = lowerLimit = 0; row < size; row++)
   {  solution[row] = row;
      for (col = 0; col < size; col++)
         fscanf (input, "%d", &benefit[row][col]);
      lowerLimit += benefit[row][row];
      antisum  += benefit[row][--k];
   }
   if (antisum > lowerLimit)
   {  lowerLimit = antisum;
      for (row = 0, k = size; row < size; row++)
         solution[row] = --k;
   }
   memcpy(work, solution, size * sizeof *work);
   start = wallClock();
   forkRun(work, nProcs);
   elapsed = wallClock() - start;
   printf("%d processes found value %d\n", nProcs, lowerLimit);
   for (row = 0; row < size; row++)
      printf("%3d", solution[row]);
   printf("\n%3.3f seconds\n", elapsed);
   return 0;
}

void swap(int *x, int j, int k)
{  int temp = x[j]; x[j] = x[k]; x[k] = temp;  }

int colMaxSum(int start, int work[])
{  int sum = 0, k;
   for (k = start; k < size; k++)
   {
      int columnMaximum = benefit[start][work[k]],
          row;
      for (row = start+1; row < size; row++)
         if (columnMaximum < benefit[row][work[k]])
            columnMaximum = benefit[row][work[k]];
      sum += columnMaximum;
   }
   return sum;
}

void explore(int index, int vect[], int prefix)
{
   int k,       // Loop variable for swapping [index]..[size-1]
       j,       // Spare loop variable
       hold;    // Value of fixed portion of a permutation,
                // then used undoing the right rotation
   for ( k = index; k < size; k++ )
   {  int col, upperBound;
      swap(vect, index, k);
   // Add together column maxima
      upperBound = colMaxSum(index+1, vect);

      hold = prefix + benefit[index][vect[index]];

      // Selecting [size-2] also fixes [size-1] ---
      // a permutation has been completed.
      if (index == size-2)
      {
         if ( lowerLimit < hold+upperBound )
         {  // Add in the last piece
            hold += benefit[size-1][vect[size-1]];
            if (DEBUG)
            {  printf("Replacing %d with %d: ", lowerLimit, hold);
               for (j = 0; j < size; j++)
                  printf("%3d", vect[j]);
               putchar('\n');
            }
            lowerLimit = hold;
            memcpy(solution, vect, size * sizeof *vect);
//          System.arraycopy(vect, 0, solution, 0, size);
         }
         else if (DEBUG)
         {  printf("Reject %d: ", hold+upperBound);
            for (j = 0; j < size; j++)
               printf("%3d", vect[j]);
            putchar('\n');
         }
      }
      else if (hold + upperBound > lowerLimit)
         explore(index+1, vect, hold);
      else if (DEBUG)
      {  printf("Prune at %d, upper limit %d: ",
                index, hold + upperBound);
         for (j = 0; j < size; j++)
            printf("%3d", vect[j]);
         putchar('\n');
      }
   }
   // Undo the one-cell rightward rotation done above
   hold = vect[index];
   for (k = index+1; k < size; k++)
      vect[k-1] = vect[k];
   vect[size-1] = hold;
}

void forkRun(int *vect, int nProcs)
{  int proc, k, lo = 0, hi;
   int Child;
   char template[] = "AsgXXXXXX",
        clean[18] = "/bin/rm ";
   int fd = mkstemp(template);
   int Width = (size+1) * sizeof *solution;
   int *trial;
   double start = wallClock(), elapsed;

   if ( fd < 0 )
   {  fprintf (stderr, "ERROR opening file %s\n", template); exit(777);  }
   // Break the problem into nProc pieces:
   hi = size / nProcs;
   for (proc = 1; proc < nProcs;)
   {  
      Child = fork();
      if ( Child != 0 )
         break;
      lo = hi;
      hi = size * ++proc / nProcs;
   }
   if (FORK_TRACE)
   {  printf("Process %d:  %d <= k < %d\n",
             proc, lo, hi);
      fflush(stdout);
   }
   // Working core:  explore each option for this set
   for (k = lo; k < hi; k++)
   {  swap(vect, 0, k);
      explore(1, vect, benefit[0][vect[0]]);
      swap(vect, 0, k);
   }
   // Set solution value into place for storage
   solution[size] = lowerLimit;
   elapsed = wallClock() - start;
   // Original process
   if ( proc == 1 )     // The for loop BEGINS at 1
   {
      if (FORK_TRACE)
      {  printf ("Process %d found max %d in %3.3f seconds\n",
               proc, lowerLimit, elapsed);
         fflush(stdout);
      }
      wait(NULL);
   }
   // One of the processes generated by fork
   else
   {
      if (FORK_TRACE)
      {  printf ("Process %d found max %d in %3.3f seconds\n",
               proc, lowerLimit, elapsed);
         fflush(stdout);
      }
      if ( Child != 0 ) // Wait for the child to terminate
         wait(NULL);
      // Send this result to the shared file
      k = write (fd, &solution, Width);
      // This child has finished its job and can exit
      exit(0);
   }
   // Original process now processes the other solutions.

   // Generate the required space for the vector's solution value
   trial = (int*) calloc (size+1, sizeof *trial);
   k = lseek(fd, 0, SEEK_SET);
   // read returns the number of bytes read
   k = read(fd, trial, Width);
   while ( k > 0 ) // Read returns zero at EOF
   {
      if (trial[size] > lowerLimit)
      {  if (FORK_TRACE)
            printf("Updating solution from %d to %d\n",
                   lowerLimit, trial[size]);
         // Note:  lowerLimit and solution are global
         lowerLimit = trial[size];
         memcpy(solution, trial, size * sizeof *trial);
      }
      else if (FORK_TRACE)
         printf("No update, %d vs. %d\n",
                lowerLimit, trial[size]);
      // Get the next solution from file
      k = read(fd, trial, Width);
   }
   // Clean up:  delete temporary file, return heap space.
   strcat(clean, template);
   system(clean);
   free(trial);
}
