#include <tc.h>

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>

#include "gr_gauss.h"

//#define DEBUG

/* number of lines given to worker for processing at a time
  (parallelism grain size) */
#define SLICESZ 128
#if N%SLICESZ
#  error "SLICESZ must be aliquot part of N, to keep the code simple"
#endif

#define NSLICES  (N/SLICESZ)

struct line_tptr {
  struct line tptr v;
};

struct line_tptrs_cslice {
  int from;
  int to;
  struct line_tptr a[SLICESZ];
};

struct char80 {
    char v[80];
};

struct slice {
    struct line line[SLICESZ];	/* the lines of this slice */
};

#ifndef TSYSTEM
#define tmesg(x...) fprintf(stdout,x)
#else
#define exit(n) abort()
#endif

void
dump (int row, const struct line *linep)
{
    int i, j;

    tmesg ("%d:   ", row);
    for (i=0; i<N+1; i++)
      tmesg ("%d ", !!BIT_TEST (linep->x, i));
    tmesg ("\n");
}


void usage ()
{
    tmesg ("Usage: gr_gauss INFILE\n");
    exit (1);
}


struct line_tptr_array {
  struct line_tptr a[N];
};

static int quality[N];

static struct line_tptr *winner_killers(int column)
{
  static struct line_tptr_array *_winner_killers;
  if (_winner_killers == NULL) {
    struct line_tptr_array tptr aa =
	tnew(struct line_tptr_array);
    int i;
    tvalidate(*aa);
    _winner_killers = aa;
    for (i = 0; i < N; i++) _winner_killers->a[i].v = NULL;
  }
  return _winner_killers->a + column;
}

void
print_line_solution (const struct line *lp, int j)
{
    if (! BIT_TEST (lp->x, j)) {
	tmesg("Kaboom!\n");
    }
    tmesg("x[%04d]=%d\n", j, !! BIT_TEST(lp->x, N));
}


/* Killer claim registration object.  Workers suggest their killers
   for each column.  The first killer claim for a column becomes the
   winner.  All subsequent claims for the same column are rejected,
   and the pointer to winner killer is returned.  The workers that
   can't find killer candidate for the bit in their slice call
   solicit_killer instead. */

tfun void
claim_killers (struct line_tptrs_cslice suggested_killers,
	       tout struct line_tptrs_cslice *out_winner_killers)
{
    int i, from = suggested_killers.from;

    out_winner_killers->from = suggested_killers.from;
    out_winner_killers->to = suggested_killers.to;

    for (i = out_winner_killers->from; i < out_winner_killers->to; i++) {
      struct line_tptr *global_killer = winner_killers(i);
      if (global_killer->v == NULL) {
	global_killer->v = suggested_killers.a[i-from].v;
	quality[i] = suggested_killers.to;
      } else {
	if (quality[i] < out_winner_killers->to)
	  out_winner_killers->to = quality[i];
      }
      out_winner_killers->a[i-from].v = global_killer->v;
    }
}


/* Once workers have finished, winner_killers contain diagonal matrix
   which can be relatively easily converted to solution.  This is done
   as the last sequental step of the program. */

tfun void
last_step (tout int *finished)
{

    int j, k = N-1;

    for (j = 0; j < N; j++)
      tptrwait(winner_killers(j)->v);

    print_line_solution (winner_killers(k)->v, k);
    while (--k >= 0) {
	/* convert to diagonal form as we go */
	if (BIT_TEST (winner_killers(k+1)->v->x, N)) {
	    for (j = k; j >= 0; j--)
		if (BIT_TEST (winner_killers(j)->v->x, k+1)) {
		    BIT_XOR (winner_killers(j)->v->x, k+1);
		    BIT_XOR (winner_killers(j)->v->x, N);
		}
	}
	else {
	    for (j = k; j >= 0; j--)
		BIT_CLR (winner_killers(j)->v->x, k+1);
	}
	print_line_solution (winner_killers(k)->v, k);
    }
    *finished = 1;
}


static struct char80 _file_n;
static char *file_n;
static FILE *f;

tfun void
open_input (struct char80 infile_n, tout int *finished)
{
    _file_n = infile_n;
    file_n = _file_n.v;
    f = fopen (file_n, "rb");
    if (!f) {
	tmesg ("%s: %s\n", file_n, strerror (errno));
	exit (1);
    }
    *finished = 1;
}


/* Read one slice from the file.  Fortunately, the order of lines in
   the input file does not matter, so we can simply read the file
   sequentally, no position */

tfun void
read_slice (tout struct slice *slicep)
{
    if (fread (slicep->line, sizeof (slicep->line), 1, f) != 1) {
	if (feof (f)) {
	    tmesg ("%s shorter than expected\n", file_n);
	    exit (1);
	}
	if (ferror (f)) {
	    tmesg ("%s: %s\n", file_n, strerror (errno));
	    exit (1);
	}
    }
}

tfun void
close_input (tout int *finished)
{
    int x;
    fread (&x, 1, 1, f);
    if (! feof (f)) {
	tmesg ("warning: %s is larger than expected\n", file_n);
    }
    fclose (f);
    *finished = 1;
}

/* Process the matrix slice.  This function uses Gaussian technique to
   leave exactly one "1" in columns [0...N-1] of every line. */

tfun void
worker (tout int *finished)
{
    struct slice tptr slicep = tnew(struct slice);

    int n_registered_killers = 0;
    int i,j,j1, from = 0;

    int registered_killers[(SLICESZ+INTBITS-1)/INTBITS];
    memset (registered_killers, 0, sizeof(registered_killers));

    TCALL ((read_slice (slicep)), (0));
    tptrwait(slicep);

    while (n_registered_killers != SLICESZ) {
      tval struct line_tptrs_cslice killers;
      struct line_tptrs_cslice tptr my_killers = 
	tnew(struct line_tptrs_cslice);

      int sellines[SLICESZ];

      int marked_killers[(SLICESZ+INTBITS-1)/INTBITS];
      memset (marked_killers, 0, sizeof(marked_killers));


#ifdef DEBUG      
      tmesg("n_registered_killers = %d\n", n_registered_killers);
#endif

      for (i = from; i < N; i++) {

#ifdef DEBUG
	tmesg("- for i = %d -\n", i);
	for (j = 0; j < SLICESZ; j++) dump(j,&slicep->line[j]);
#endif
	
	/* Find first 1 bit in column i, except in the lines
	   already registered as killers */
	for (j = 0; j < SLICESZ; j++) {
	  if (BIT_TEST (registered_killers, j))
	    continue;
	  if (BIT_TEST (marked_killers, j)) {
	    //tmesg("%d already marked\n", j);
	    continue;
	  }
	  if (BIT_TEST (slicep->line[j].x, i)) {
	    //tmesg("line %d has 1 in %d column::: %d\n", j, i, slicep->line[j].x[0]);
	    // dump(j, &slicep->line[j]);
	    break;
	  }
	}
	if (j == SLICESZ) {
	  if (i == from) { from++; continue; }
	  else           break;
	}

#ifdef DEBUG
	tmesg("marking line %d\n", j);
#endif

	BIT_SET(marked_killers, j);
	sellines[i-from] = j;

	for (j1 = 0; j1 < SLICESZ; j1++) {
	  if (BIT_TEST (registered_killers, j1))
	    continue;
	  if (j1 == j)
	    continue;
	  if (BIT_TEST (slicep->line[j1].x, i)) {
	    //tmesg("xoring...\n");
	    line_xor (&slicep->line[j1], &slicep->line[j]);
	  }
	}

      }
      my_killers->from = from;
      my_killers->to = i;

#ifdef DEBUG
      tmesg("request column:line\n");
      for (i = my_killers->from; i < my_killers->to; i++) 
	tmesg("%d:%d ", i,sellines[i-from]);
      tmesg("\n");
#endif


      for (i = my_killers->from; i < my_killers->to; i++) {
	my_killers->a[i-from].v = tnew(struct line);
	*(my_killers->a[i-from].v) = slicep->line[sellines[i-from]];
      }
      tvalidate(*my_killers);
      TCALL ((claim_killers(*my_killers, &killers)), (0));
      /* Now wait for results of our claim */

#ifdef DEBUG
      tmesg("reply column:line\n");
      for (i = killers.from; i < killers.to; i++) 
	tmesg("%d:%d ", i,sellines[i-from]);
      tmesg("\n");
#endif

      for (i = killers.from; i < killers.to; i++) {
#if 0
	tmesg("my = %p, real = %p\n", 
	      killers.a[i-from].v, my_killers->a[i-from].v);
#endif
	if (killers.a[i-from].v == my_killers->a[i-from].v) {
	  /* Our killer candidate succeeded the registration */
	  BIT_SET (registered_killers, sellines[i-from]);
	  n_registered_killers++;
	}
	else {
	  /* Our killer candidate rejected, we got a pointer
	     to succesful killer instead.  Kill our killer
	     with winner killer */
	  line_xor (&slicep->line[sellines[i-from]], killers.a[i-from].v);
	}
      }

      from = killers.to;
    }

    *finished = 1;

    /* slice is not tfreed upon return because the center will
       retreive lines from it via winner killers tptrs */
}


int
tmain (int argc, char* argv[])
{
    int j,n,k;
    struct { tval int x; } finished[NSLICES];
    struct char80 infile_n;
    tval int dummy0, dummy1, dummy2;

    if (argc != 2)
	usage ();
    if (strlen (argv[1]) > sizeof (struct char80)) {
	tmesg ("Sorry, too long input file name\n");
	exit (1);
    }
    strcpy (infile_n.v, argv[1]);

    /* read input file and start workers as we read */
    TCALL ((open_input (infile_n, &dummy0)), (0));
    (volatile int)dummy0;

    for (n = 0; n < NSLICES; n++) {
	TCALL((worker (&finished[n].x)), (n/2,GR_CI_FIXED));
    }

    /* wait for all workers to finish */
    for (k = 0; k < NSLICES; k++) {
	(volatile int)finished[k].x;
    }

    tmesg("Workers completed\n");

    TCALL ((close_input (&dummy1)), (0));
    (volatile int)dummy1;

#if 0
    /* collect and print the results */
    TCALL ((last_step (&dummy2)), (0));
    (volatile int)dummy2;
#endif

    return 0;
}