/*                    Linear Assignment Problem
 *
 * Given a set of n agents and n tasks, assign tasks to agent such that
 * every agent has a unique task --- and, of course, every task has
 * someone performing it.  Find the optimal such assignment --- for our
 * purposes here, the one with the largest total benefit.
 *
 * There is a global matrix benefit[MAX_SIZE][MAX_SIZE] which contains
 * for each value of row and col the benefit of assigning agent row to
 * task col.  Because of how C behaves, this has been set to a maximum
 * size, given in a #define.  Also at the global level is a vector to
 * receive the solution and a variable to hold the value of the solution
 * --- this is updated as the backtracking discovered permutations with
 * a better value that the presently stored permutation.
 *
 * The initial solution is generated as one of the two trivial solutions
 * --- either the diagonal solution (0, 1, 2, ...) or the antidiagonal
 * solution (...,2, 1, 0), whichever has the higher value --- as the sum
 * over k of benefit[k][solution[k]].  As a known solution, this
 * provides a lower limit when considering potential permutations.
 *
 * One can also quickly obtain an upper limit on a partial permutation.
 * While we are positioning values as [index], the known value for the
 * permutation will be the sum up to index of benefit[k][solution[k]].
 * Rows from [index+1] to [size-1] have not been assigned yet, and
 * correspondingly columns from perm[index+1] to perm[size-1] have not
 * been assigned.  One can obtain the column maximum value for each of
 * those columns and add them together.  The complete permutation cannot
 * possibly have a value greater than the total of its fixed part and
 * these column maxima, though it well might have one less.  If this
 * number is less than the already-known solution (lowerLimit), one can
 * immediately prune the decision tree --- discard this partial solution.
 *
 * Backtracking solution with threads beginning with the various
 * permutations based on setting [0].  A limited number of threads are
 * started and they self-schedule for the permutations that they process.
 *
 * Language:  Java, version 5 or later (Scanner, printf)
 *
 * Program author:  Tim Rolfe
 */
import java.util.*;
import java.io.*;

public class SelfSchedThreadSet
{
   // Diagonal/antidiagonal permutation sets solution and lowerLimit
   int solution[], lowerLimit;
   int size, benefit[][];
   // Self-scheduling is based on this persistent pending vector and
   // persistent indication of which cell is up next.
   int pending[], kNext = 0;
   static boolean DEBUG = false,
                  TRACE =  true;
   static Random gen;   // Used by shuffle
   static boolean THR_TRACE = false;
   Compute[] engine;

   public static void main (String[] args)throws Exception
   {  new SelfSchedThreadSet().process(args);  }

   void setup()
   {  int row, col, antisum, k;

      lowerLimit = 0;  antisum = 0; k = size;
      for (row = lowerLimit = 0; row < size; row++)
      {  shuffle(benefit[row], size);
         solution[row] = row;
         lowerLimit += benefit[row][row];
         antisum    += benefit[row][--k];   // Note predecrement of k
      }
   // If the antidiagonal sum is the larger, make that the
   // initial solution and lowerLimit.
      if (antisum > lowerLimit)
      {  lowerLimit = antisum;
         for (row = 0, k = size; row < size; row++)
            solution[row] = --k;            // Note predecrement of k
      }
   }

   void process(String[] args) throws Exception
   {
      int nRuns = args.length < 2 ? 1000 : Integer.parseInt(args[1]),
          row, col, run;
      int work[];
      long start, elapsed;

      int nThreads = args.length > 2 ? Integer.parseInt(args[2]) : 4;
      engine = new Compute[nThreads];

      size = args.length == 0 ? 10 : Integer.parseInt(args[0]);
      solution = new int[size];
      gen = new Random(size * nRuns);  // Force the sequence
      // Initialize as identical.  Shuffle randomizes the rows
      benefit  = new int[size][size];
      for (row = 0; row < size; row++)
         for (col = 0; col < size; col++)
            benefit[row][col] = col;

   // Capture the processing time.
      start = System.nanoTime();
      for (run = 1; run <= nRuns; run++)
      {  setup();  // Shuffle benefit and generate new initial state
      // Generate the working permutation from the solution generated
         work = (int[]) solution.clone();
         threadRun(work, nThreads);
         if (TRACE)
         {  for (row = 0; row < size; row++)
               System.out.printf("%3d", solution[row]);
            System.out.printf(" ==> %d\n", lowerLimit);
         }
      }
      elapsed = System.nanoTime() - start;
      System.out.printf(
         "Backtracking with %d threads for %d, average %3.3f msec\n",
         nThreads, size, (1E-06*elapsed)/nRuns);
   }

   int colMaxSum(int start, int []work)
   {  int sum = 0;
      for (int k = start; k < size; k++)
      {
         int row, col = work[k],
             columnMaximum = benefit[start][col];
         for (row = start+1; row < size; row++)
            if (columnMaximum < benefit[row][col])
                columnMaximum = benefit[row][col];
         sum += columnMaximum;
      }
      return sum;
   }

   static void swap(int[] x, int j, int k)
   {  int temp = x[j]; x[j] = x[k]; x[k] = temp;  }

   void explore(int index, int []vect, int prefix)
   {
      int k,       // Loop variable for swapping [index]..[size-1]
          hold;    // Value of fixed portion of a permutation,
                   // then used undoing the right rotation
      for ( k = index; k < size; k++ )
      {  int col, upperBound;
         swap(vect, index, k);
      // Add together column maxima
         upperBound = colMaxSum(index+1, vect);

         hold = prefix + benefit[index][vect[index]];

         // Selecting [size-2] also fixes [size-1] ---
         // a permutation has been completed.
         if (index == size-2)
         {
            if ( lowerLimit < hold+upperBound )
            {  // Add in the last piece
               hold += benefit[size-1][vect[size-1]];
               update (vect, hold);
            }
            else if (DEBUG)
            {  StringBuilder msg = new StringBuilder();
               msg.append(String.format("Reject %d: ", hold+upperBound));
               for (int j = 0; j < size; j++)
                  msg.append(String.format("%3d", vect[j]));
               System.out.println(msg.toString());
            }
         }
         else if (hold + upperBound > lowerLimit)
            explore(index+1, vect, hold);
         else if (DEBUG)
         {  StringBuilder msg = new StringBuilder();
            msg.append(String.format("Prune at %d, upper limit %d: ",
                     index, hold + upperBound));
            for (int j = 0; j < size; j++)
               msg.append(String.format("%3d", vect[j]));
            System.out.println(msg.toString());
         }
      }
      // Undo the one-cell rightward rotation done above
      hold = vect[index];
      for (k = index+1; k < size; k++)
         vect[k-1] = vect[k];
      vect[size-1] = hold;
   }

   // Insure that only one thread at a time accesses this.  Return
   // a boolean false when all permutations have been distributed.
   // These are the global variables:  int pending[], kNext;
   synchronized boolean getProblem(int []work)
   {  if (kNext >= size) return false;
      swap(pending, 0, kNext++);
      System.arraycopy(pending, 0, work, 0, size);
      return true;
   }

   // Insure that only one thread at a time updates the solution
   synchronized void update(int []work, int score)
   {
      if (DEBUG)
      {  StringBuilder msg = new StringBuilder();
         msg.append(String.format("Replacing %d with %d: ",
                    lowerLimit, score));
         for (int j = 0; j < size; j++)
            msg.append(String.format("%3d", work[j]));
         System.out.println(msg.toString());
      }
      lowerLimit = score;
      System.arraycopy(work, 0, solution, 0, size);
   }

   void threadRun(int []perm, int N_THREADS) throws Exception
   {  int k;

      pending = (int[]) perm.clone();
      kNext = 0;
      for (k = 0; k < N_THREADS; k++)
      {  engine[k] = new Compute();
         engine[k].start();
      }
      for (k = 0; k < N_THREADS; k++)
         engine[k].join();
      lastID = 0;  // Set up for next run
   }

   static void shuffle(int[] x, int n)
   {  int temp;

      while ( n > 1 )
      {  int k = gen.nextInt(n--);
         temp = x[n];
         x[n] = x[k];
         x[k] = temp;
      }
   }

   static int lastID = 0;    // Used within Compute
   class Compute extends Thread
   {
      int        myID;

      public Compute()
      {  myID = ++lastID;  }

      public void run()
      {
         int []perm = new int[size];

         while (getProblem(perm))
         {
            if (THR_TRACE)
               System.out.printf ("Thread %d working on %d\n",
                  myID, perm[0]);
            explore(1, perm, benefit[0][perm[0]]);
         }
      }
   } // end class Compute
}
