#include "intercomm.h"
#include "general_bsparti.h"
#include "simple_ttable.h"
#include "global.h"
#include "pgme.h"
#include "region.h"
#include "util.h"
#include "error.h"
#include <stdlib.h>
#include <string.h>

#define min(a,b) ((a<b)?a:b)
#define max(a,b) ((a>b)?a:b)


int grow_tree(Decomp_TREE* tree, block** blocks, int size, int depth, int* r) {
  int i, j, k, p, d, wrong;
  int upper[MAX_lbB_DIM];
  block** sorted;
  int* buckets, *counts;
  int remaining[MAX_lbB_DIM];

  for (j = 0; j < tree->nDims; ++j)
      remaining[j] = r[j];

  /* we've partitioned all the dimensions */
  if (depth == tree->nDims) {
    tree->num_of_partitions = 0;
    tree->keys = 0;
    tree->children = 0;
    tree->block_info = *blocks;
    return 1;
  }
  
  /* find the upper bound of all the regions */
  memset(&upper[0], 0, sizeof(int)*tree->nDims);
  for (i = 0; i < tree->nDims; ++i) {
    for (j = 0; j < size; ++j) {
      upper[i] = max(upper[i], blocks[j]->end_global[i]);
    }
  }
  
  sorted = (block**)calloc(size, sizeof(block*)); /* blocks sorted on their global lower bound */
  buckets = (int*)calloc(size, sizeof(int));      /* logical buckets for sorted */
  counts = (int*)calloc(size, sizeof(int));       /* the number of blocks per bucket */

  /*
   * go through the remaining (unpartitioned) dimensions and organize
   * blocks based on their starting and ending points
   * use this information to find the next dimension to partition
   * based on the logic that all blocks that share a starting position
   * and an ending position define a valid partition
   */

  for (d = 0; d < tree->nDims; ++d) {
    if (remaining[d] == 1)
      continue;
    i = k = p = 0;
    while (k < upper[d]) {
      buckets[p] = i; 
      counts[p] = 0;
      for (j = 0; j < size; ++j) {
	if (blocks[j]->start_global[d] == k) {
	  sorted[i++] = blocks[j];
	  ++counts[p];
	}
      }
      wrong = 0;
      for (j = buckets[p]; j < buckets[p] + counts[p]; ++j) {
	if (sorted[j]->end_global[d] != sorted[buckets[p]]->end_global[d]) {
	  wrong = 1;
	}
      }      
      if (wrong) {
	break; 
      } else { 
	p++;
      }
      k = sorted[i-1]->end_global[d]+1;
    }
    if (!wrong) {     
      remaining[d] = 1;
      break;
    }
  }

  /* make sure we accounted for every block */
  if (i != size) {
    free(counts);
    free(buckets);
    free(sorted);
    return 0;
  }

  tree->num_of_partitions = p;
  tree->keys = (int*)calloc(p, sizeof(int)); /* Jae-Yong is apparently not using this field! */
  tree->children = (Decomp_TREE*)calloc(p, sizeof(Decomp_TREE));
  tree->block_info = 0;
  for (j = 0; j < p; ++j) {
    tree->children[j].nDims = tree->nDims;
    if (!grow_tree(&(tree->children[j]), &(sorted[buckets[j]]), counts[j], depth+1, remaining)) {
      free(counts);
      free(buckets);
      free(sorted);
      return 0;
    }
  }

  free(counts);
  free(buckets);
  free(sorted);
  return 1;
}

IC_Desc* IC_Create_bdecomp_desc(int rank, int* blocks, int* tasks, int count) {
  IC_Desc* desc;
  block** b;
  int i, j;
  decomp_Birreg* decomp;
  int remaining[MAX_lbB_DIM];

  setcall("IC_Create_bdecomp_desc");
  if (rank < 1 || blocks == 0 || tasks == 0 || count < 1) {
    errormsg("invalid parameters");
    return 0;
  }
  
  memset(&(remaining[0]), 0, sizeof(int)*MAX_lbB_DIM);

  desc = (IC_Desc*)malloc(sizeof(IC_Desc));
  desc->type = IC_BDECOMP;
  desc->spec = decomp = Allocate_decomp_Birreg(rank);
  
  /* convert the array into blocks */
  b = (block**)calloc(count, sizeof(block*));
  for (i = 0; i < count; ++i) {
    b[i] = Allocate_Block(rank);
    for (j = 0; j < rank; ++j) {
      b[i]->size[j] = blocks[i*2*rank+rank+j] - blocks[i*2*rank+j] + 1;
      b[i]->start_global[j] = blocks[i*2*rank+j];
      b[i]->end_global[j] = blocks[i*2*rank+rank+j];
      b[i]->coord[j] = -1;
    }
    b[i]->proc = tasks[i];
    if (tasks[i] >= NumNodeMyPgme()) {
      errormsg("invalid task assignment");
      for (j = 0; j < i; ++j)
	Free_Block(b[j]);
      free(b);
      Free_Decomp_Birreg(decomp);
      free(desc);
      return 0;
    }
  }

  decomp->root->nDims = rank;
  decomp->col_majeur = 0;
  if (!grow_tree(decomp->root, b, count, 0, remaining)) {
    errormsg("unusable block bound specification");
    for (j = 0; j < count; ++j)
      Free_Block(b[j]);
    free(b);
    Free_Decomp_Birreg(decomp);
    free(desc);
    return 0;
  }
  
  free(b);
  
  return desc;
}

IC_Desc* IC_Create_ttable_desc(int* globals, int* locals, int* tasks, int count) {
  IC_Desc* desc;
  int i, lo, hi;
  simple_ttable* table;

  setcall("IC_Create_ttable_desc");
  if (globals == 0 || locals == 0 || tasks == 0 || count < 1) {
    errormsg("invalid parameters");
    return 0;
  }
  
  desc = (IC_Desc*)malloc(sizeof(IC_Desc));
  desc->type = IC_TTABLE;
  desc->spec = table = Allocate_Simple_TTABLE(count);
  
  table->nData = count;
  lo = hi = globals[0];
  for (i = 0; i < count; ++i) {
    table->global_index[i] = globals[i];
    table->proc_num[i] = tasks[i];
    if (tasks[i] >= NumNodeMyPgme()) {
      errormsg("invalid task assignment");
      FreeSimpleTable(table);
      free(desc);
      return 0;
    }
    table->local_offset[i] = locals[i];
    lo = min(lo, globals[i]);
    hi = max(hi, globals[i]);
  }
  table->global_low_bound = lo;
  table->global_high_bound = hi;

  return desc;
}

void IC_Free_desc(IC_Desc* desc) {
  if (desc) {
    if (desc->type == IC_BDECOMP)
      Free_Decomp_Birreg((decomp_Birreg*)desc->spec);
    if (desc->type == IC_TTABLE)
      FreeSimpleTable((simple_ttable*)desc->spec);
    free(desc);
  }
}
