/*                    Linear Assignment Problem
 *
 * Given a set of n agents and n tasks, assign tasks to agent such that
 * every agent has a unique task --- and, of course, every task has
 * someone performing it.  Find the optimal such assignment --- for our
 * purposes here, the one with the largest total benefit.
 *
 * There is a global matrix benefit[MAX_SIZE][MAX_SIZE] which contains
 * There is a global matrix benefit[][] generated once the size is
 * known.  Also at the global level is a vector to receive the solution
 * and a variable to hold the value of the solution --- this is updated
 * as the backtracking discovers permutations with a better value than
 * the presently stored permutation.
 *
 * The initial solution is generated as one of the two trivial solutions
 * --- either the diagonal solution (0, 1, 2, ...) or the antidiagonal
 * solution (...,2, 1, 0), whichever has the higher value --- as the sum
 * over k of benefit[k][solution[k]].  As a known solution, this
 * provides a lower limit when considering potential permutations.
 *
 * One can also quickly obtain an upper limit on a partial permutation.
 * While we are positioning values as [index], the known value for the
 * permutation will be the sum up to index of benefit[k][solution[k]].
 * Rows from [index+1] to [size-1] have not been assigned yet, and
 * correspondingly columns from perm[index+1] to perm[size-1] have not
 * been assigned.  One can obtain the column maximum value for each of
 * those columns and add them together.  The complete permutation cannot
 * possibly have a value greater than the total of its fixed part and
 * these column maxima, though it well might have one less.  If this
 * number is less than the already-known solution (lowerLimit), one can
 * immediately prune the decision tree --- discard this partial solution.
 *
 * Backtracking solution, using the above pruning, with threads doing
 * the work --- and sharing lowerLimit for pruning the decision trees.
 *
 * Language:  Java, version 5 or later (Scanner, printf)
 *
 * Author:  Timothy Rolfe
 */
import java.util.*;
import java.io.*;

public class BackThread
{  // Diagonal/antidiagonal permutation sets solution and lowerLimit
   int solution[], lowerLimit;
   int size, benefit[][];
   int nThreads;
   Compute[] engine;
   static boolean DEBUG = false;
   static boolean THR_TRACE = false;

   public static void main (String[] args) throws Exception
   {  new BackThread().process(args);  }

   void process(String[] args) throws Exception
   {  String filename = args.length == 0 ? "A5" : args[0];
      Scanner input = new Scanner( new File(filename) );
      int antisum, row, col, j, k, work[];
      long start, elapsed;

      nThreads = args.length > 1 ? Integer.parseInt(args[1]) : 4;
      engine = new Compute[nThreads];

      size = input.nextInt();
      solution = new int[size];
      benefit  = new int[size][size];
   // Read in the data. generate the diagonal solution
   // (row == col) and generate the diagonal total and
   // the antidiagonal total (row + col == size-1).
      antisum  = 0;  k = size;
      for (row = lowerLimit = 0; row < size; row++)
      {  solution[row] = row;
         for (col = 0; col < size; col++)
            benefit[row][col] = input.nextInt();
         lowerLimit += benefit[row][row];
         antisum  += benefit[row][--k];     // Note predecrement of k
      }
   // If the antidiagonal sum is the larger, make that the
   // initial solution and lowerLimit.
      if (antisum > lowerLimit)
      {  lowerLimit = antisum;
         for (row = 0, k = size; row < size; row++)
            solution[row] = --k;            // Note predecrement of k
      }
   // Capture the processing time.
      start = System.nanoTime();
      threadRun();
      elapsed = System.nanoTime() - start;

      System.out.printf("Backtracking with %d threads: %s\n",
                        nThreads, lowerLimit);
      for (row = 0; row < size; row++)
         System.out.printf("%3d", solution[row]);
      System.out.printf("\n%3.3f seconds\n", 1E-09*elapsed);
   }

// Isolate the logic for the total of column maxima
   int colMaxSum(int start, int []work)
   {  int sum = 0;
      for (int k = start; k < size; k++)
      {//Initial guess:  maximum right HERE.
          int row, col = work[k],
             columnMaximum = benefit[start][col];
      // Update as required.
         for (row = start+1; row < size; row++)
            if (columnMaximum < benefit[row][col])
                columnMaximum = benefit[row][col];
      // Add into the developing sum.
         sum += columnMaximum;
      }
      return sum;
   }

// Used by explore in generating the permutations
   static void swap(int[] x, int j, int k)
   {  int temp = x[j]; x[j] = x[k]; x[k] = temp;  }

   // Insure that only one thread at a time updates the solution
   synchronized void update(int []work, int score)
   {
      if (DEBUG)
      {  StringBuilder msg = new StringBuilder();
         msg.append(String.format("Replacing %d with %d: ",
                    lowerLimit, score));
         for (int j = 0; j < size; j++)
            msg.append(String.format("%3d", work[j]));
         System.out.println(msg.toString());
      }
      lowerLimit = score;
      System.arraycopy(work, 0, solution, 0, size);
   }

   void explore(int index, int []vect, int prefix)
   {  int k,       // Loop variable for swapping [index]..[size-1]
          hold;    // Value of fixed portion of a permutation,
                   // then used undoing the right rotation
      for ( k = index; k < size; k++ )
      {  int col, upperBound;
         swap(vect, index, k);
      // Add together column maxima
         upperBound = colMaxSum(index+1, vect);

         hold = prefix + benefit[index][vect[index]];

         // Selecting [size-2] also fixes [size-1] ---
         // a permutation has been completed.
         if (index == size-2)
         {  if ( lowerLimit < hold+upperBound )
            {  // Add in the last piece
               hold += benefit[size-1][vect[size-1]];
               update (vect, hold);
            }
            else if (DEBUG)
            {  StringBuilder msg = new StringBuilder();
               msg.append(String.format("Reject %d: ", hold+upperBound));
               for (int j = 0; j < size; j++)
                  msg.append(String.format("%3d", vect[j]));
               System.out.println(msg.toString());
            }
         }
      // Otherwise we still just have a partial permutation.
      // If it has potential for finding a better one, pursue it.
         else if (hold + upperBound > lowerLimit)
            explore(index+1, vect, hold);
         else if (DEBUG)
         {  StringBuilder msg = new StringBuilder();
            msg.append(String.format("Prune at %d, upper limit %d: ",
                     index, hold + upperBound));
            for (int j = 0; j < size; j++)
               msg.append(String.format("%3d", vect[j]));
            System.out.println(msg.toString());
         }
      }
      // Undo the one-cell rightward rotation done above
      hold = vect[index];
      for (k = index+1; k < size; k++)
         vect[k-1] = vect[k];
      vect[size-1] = hold;
   }

   void threadRun()
   {  int thread, lo = 0, hi;

      for (thread = 0; thread < nThreads; thread++)
      {  hi = size * (thread+1) / nThreads;
         engine[thread] = new Compute(lo, hi,  thread);
         lo = hi;
      }
      try
      {  for (thread = nThreads; thread-- > 0;)
            engine[thread].start();
          
         for (thread = 0; thread < nThreads; thread++)
            engine[thread].join();
      }
      catch (Exception e)
      {  e.printStackTrace();  }
   }

   // Inner class accesses outer class methods
   // and data.
   class Compute extends Thread
   {  int start, finish,   // Indices this thread is working on
          position;        // Which thread this is.
      int[] vect;          // Working solution, generated in constructor
      Compute(int lo, int hi, int thread)
      {  start = lo; finish = hi; position = thread;
         if (THR_TRACE)
            System.out.printf("Thread %d for range %d <= k < %d\n",
                              position, start, finish );
      // Generate the working permutation from the solution generated
      // This insure that ALL threads start with the same vector.
         vect = (int[]) solution.clone();
      }
   
      public void run()
      {
         long begin = System.nanoTime();
         
         if (THR_TRACE)
            System.out.printf("Thread %d started\n", position);
         for (int k = start; k < finish; k++)
         {  swap(vect, 0, k);
            explore(1, vect, benefit[0][vect[0]]);
            swap(vect, 0, k);
         }
         if (THR_TRACE)
            System.out.printf("%d thread, %3.3f seconds\n",
                  position, (System.nanoTime()-begin)*1E-09);
      }
   }
}
