/* Linear Assignment Problem --- parallel solution of the Branch-and-Bound
 * implementation.
 *
 * Separate the priority queue portion (which is implemented in the
 * master process) from the state expansion portion (which is in the
 * slave processes).  Communication is through a parameter vector of
 * State objects.  The threads execute the "expand" method, which
 * receives as [0] the state to be expanded and then fills the array
 * with the states that this expands into.  As its value, expand
 * returns the number of states in the state array.  Communication
 * between the threads and the master process is similarly through a
 * vector of state objects.  The thread passes the vector along with
 * a parameter indicating the number of states in the vector.  The
 * master process then handles those states, discarding them, adding
 * them to the priority queue, or updating the solution based on the
 * current lowerLimit.  On the return, the next state to be expanded
 * is returned through [0] --- but the method also returns a boolean
 * to indicate whether there IS a state to be expanded.  If the
 * priority queue is empty a false is returned.  If any threads are
 * still active at that point, there may be states to expand later,
 * so that the thread simply sleeps for a few milliseconds before
 * making another attempt (reporting zero states in the vector).
 *
 * Language:  Java, version 5 or later (Scanner, printf)
 *
 * Note:  the class MyState is an inner class:  it has full access
 * to all the data and methods of the outer class, while the outer
 * class similarly has access to all the data of the inner class.
 *
 * Author:  Timothy Rolfe
 */
import java.util.*;
import java.io.*;

public class PQ_Thread
{
   // Diagonal permutation sets solution, maxValue AND lowerLimit
   static int solution[], lowerLimit;
   static int size, benefit[][];
   static boolean DEBUG  = false;
   static boolean THREAD =  true;
   static boolean THR_TRACE = false;
   static boolean TERMINATE = false;
   Compute[] engine;
   Queue<MyState> work;

   static void swap(int[] x, int p, int q)
   {  int temp = x[p]; x[p] = x[q];  x[q] = temp;  }

   static int colMaxSum(int start, int []work)
   {  int sum = 0;
      for (int k = start; k < size; k++)
      {
         int row, col = work[k],
             columnMaximum = benefit[start][col];
         for (row = start+1; row < size; row++)
            if (columnMaximum < benefit[row][col])
                columnMaximum = benefit[row][col];
         sum += columnMaximum;
      }
      return sum;
   }

   // In entry, inOut[0] contains the state to be expanded
   // On exit, inOut contains the states generated, and the
   // number generated is returned as the function value;
   static public int expand(MyState inOut[])
   {
      MyState inWork = inOut[0];
      int vector[] = inWork.vector;
      int index    = inWork.index,
          value    = inWork.value;
      int n        = 0, // Number of states put INTO inOut
          newValue,     // Adding in benefit[index][vector[index]]
          myMax,        // Upper limit from column maxima
          k;            // Loop variable

      if (DEBUG)
      {  StringBuilder msg = new StringBuilder();

         msg.append(String.format("Expanding [%d] bound %d: ",
                    index, inWork.myMax));
         for (k = 0; k < size; k++)
            msg.append(String.format("%3d", vector[k]));
         System.out.println(msg.toString());
      }
      for (k = index; k < size; k++)
      {
         swap (vector, index, k);
         newValue = value + benefit[index][vector[index]];
         myMax = newValue + colMaxSum(index+1, vector);
         if (myMax < lowerLimit)
         {
            if (DEBUG)
               System.out.printf("Discard %d < %d\n",
                  myMax, lowerLimit);
            continue;
         }
         inWork = new MyState (vector, index+1, newValue, myMax);
         inOut[n++] = inWork;
         if (DEBUG)
            System.out.printf("Adding %s as no. %d\n", inWork, n);
      }
      return n;
   }

   void displaySolution(int []solution, int lowerLimit)
   {
      int idx;

      if (THREAD)
         System.out.printf("BandB with %d threads: %s\n",
               engine.length, lowerLimit);
      else
         System.out.printf("BandB: %s\n", lowerLimit);
      for (idx = 0; idx < solution.length; idx++)
         System.out.printf ("%3d", solution[idx]);
      System.out.println();
   }

   public static void main (String[] args) throws Exception
   {  new PQ_Thread().process(args);  }

   void process (String[] args) throws Exception
   {
      String filename = args.length == 0 ? "A5" : args[0];
      int nThreads = args.length > 1 ? Integer.parseInt(args[1]) : 4;
      Scanner input = new Scanner( new File(filename) );
      int row, col, j, k, work[];
      int antiSum = 0;
      long start, elapsed;

      size = input.nextInt();
      solution = new int[size];
      benefit = new int[size][size];
      for (row = lowerLimit = 0, k = size; row < size; row++)
      {  solution[row] = row;
         for (col = 0; col < size; col++)
            benefit[row][col] = input.nextInt();
         lowerLimit += benefit[row][row];
         antiSum  += benefit[row][--k];
      }
      if (antiSum > lowerLimit)
      {  lowerLimit = antiSum;
         for (j = 0, k = size; j < size; j++)
            solution[j] = --k;
      }
      work = (int[]) solution.clone();
      start = System.nanoTime();
      if (THREAD)
         runThreads(nThreads);
      else
         findMax(work);
      elapsed = System.nanoTime() - start;
      displaySolution(solution, lowerLimit);
      System.out.printf("Time:  %3.3f seconds\n", 1E-09*elapsed);
   }

   void findMax(int[] perm)
   {
      MyState[] inOut = new MyState[size]; // Vector of states from expand
      int     nEntries,                // Actual entries in the vector
              entry;                   // Loop variable

      work = new PriorityQueue<MyState>(); // static variable
      work.add ( new MyState ( perm, 0, 0, colMaxSum(0, perm) ) );
      while ( ! work.isEmpty() )
      {
         inOut[0] = work.remove();
         if (inOut[0].myMax <= lowerLimit)
         {  if (DEBUG)
               System.out.println("Discard unprocessed " + inOut[0]);
            continue;
         }
         nEntries = expand(inOut);
         if (nEntries == 0)
            continue;
         if (inOut[0].index < size-1)
         {  for (entry = 0; entry < nEntries; entry++)
            {  if (inOut[entry].myMax <= lowerLimit)
               {  if (DEBUG)
                     System.out.printf("Discarding %d states, value <= %d\n",
                        nEntries-entry, inOut[entry].myMax);
//                continue;
               }
               work.add(inOut[entry]);
            }
         }
         else
         {  for (entry = 0; entry < nEntries; entry++)
            {  if (inOut[entry].myMax > lowerLimit)
                  update(inOut[entry]);
               else if (DEBUG)
                  System.out.printf("Discarding %s\n",
                        inOut[entry]);
            }
         }
      }
   }

   // NOTE:  This is called when better.index == size-2, so that
   // better.myMax is actually the full value of the state
   static synchronized void update(MyState better)
   {
      if (DEBUG)
         System.out.printf("Replacing %d with %d:  %s\n",
            lowerLimit, better.myMax, better);
      lowerLimit = better.myMax;
      System.arraycopy(better.vector, 0, solution, 0, size);
   }

   void runThreads(int nThreads)
   {  int k, n;
      MyState[] init = new MyState[size];

      // Initialize global variables
      engine = new Compute[nThreads];
      work   = new PriorityQueue<MyState>(128);

      // Initialize the priority queue based
      // on fixing subscript 0
      init[0] = new MyState ( solution, 0, 0,
                    colMaxSum(0, solution));
      n = expand(init);
      // Initial setting of the queue
      for (k = 0; k < n; k++)
         work.offer(init[k]);
      // The thread creation must be separate
      // from the start since threads may
      // examine the Engine vector.
      for (k = 0; k < nThreads; k++)
         engine[k] = new Compute(k);
      for (k = 0; k < nThreads; k++)
         engine[k].start();
      try
      {  for (k = 0; k < nThreads; k++)
            engine[k].join();
      }
      catch (Exception e)
      {  e.printStackTrace();  }
   }

   // Note:  this has direct access to data in MyState objects
   // The job vector contains nEntries states to be entered into
   // the priority queue.
   synchronized boolean jobcentral(int nEntries, MyState[] job)
   {  int k;

      if (nEntries > 0)
      {  // For partial permutations, either discard or enter into pq
         if (job[0].index < size-1)
         {  for (k = 0; k < nEntries; k++)
            {  if (job[k].myMax <= lowerLimit)
               {  if (DEBUG)
                     System.out.printf("Discarding state with value %d\n",
                        job[k].myMax);
               }
               else
                  work.add(job[k]);
            }
         }
         // Complete permutation, discard or update solution
         else
         {  for (k = 0; k < nEntries; k++)
            {  if (job[k].myMax > lowerLimit)
                  update(job[k]);
               else if (DEBUG)
                  System.out.printf("Discarding %s\n",
                        job[k]);
            }
         }
      }
      // If the queue is empty, continue processing if any of
      // the threads are still active.  Otherwise terminate.
      if (work.isEmpty())
      {  boolean done = true;

         for (k = 0; k < engine.length; k++)
            if (engine[k].active)
            {  done = false; break;  }
         TERMINATE = done;
         return false;  // Tell thread to wait a bit
      }
      // Implicit "if (!work.isEmpty())"
      job[0] = work.remove();
      return true;      // Tell thread to process
   }

   // NOTE:  this inner class accesses
   // outer class data and methods and
   // so cannot be static
   class Compute extends Thread
   {  boolean active;
      int     myPosition;

      Compute(int myPosition)
      {  active = false;  this.myPosition = myPosition;  }

      public void run()
      {  MyState[] job = new MyState[size];
         int nValid = 0;

         try  // Thread.sleep may throw an exception
         {  while (!TERMINATE)
            {  // jobcentral return true if it
               // returns a MyState object in job
               active = jobcentral(nValid, job);
               if (active)
               {  if (THR_TRACE)
                     System.out.printf("Thread %d expanding %s\n",
                           myPosition, job[0]);
                  nValid = expand(job);
               }
               else   // Set to no solutions, wait a few milliseconds.
               {  nValid = 0;  Thread.sleep(5);  }
            }
         }
         catch (Exception e)
         {  e.printStackTrace();  }
      }
   }  // end class Compute

   static class MyState implements Comparable
   {  public int []vector;
      public int   index;
      public int   value;
      public int   myMax;

      public MyState (int []vector, int index, int value, int myMax)
      {  this.vector = (int[]) vector.clone();
         this.index  = index;
         this.value  = value;
         this.myMax  = myMax;
      }

      // Note:  inverted comparison for max-priority-queue behavior
      public int compareTo(Object o)
      {  return ((MyState)o).value - this.value;  }

      public String toString()
      {  StringBuilder msg = new StringBuilder();

         msg.append(String.format("[%d] value %d, max %d:  ",
            index, value, myMax) );
         for (int k = 0; k < vector.length; k++)
            msg.append(String.format("%3d", vector[k]) );
         return msg.toString();
      }
   } // end class MyState

} // end class PQ_Thread
