#include "mbp_ctest.h"
#include <bsparti.h>
#include <stdlib.h>
#include <stdio.h>

int sl1[] = {0,0}; int su1[] = {1,1}; int ss1[] = {1,1};
int rl1[] = {0,0}; int ru1[] = {1,1}; int rs1[] = {1,1};

int sl2[] = {2,2}; int su2[] = {3,3}; int ss2[] = {1,1};
int rl2[] = {2,2}; int ru2[] = {3,3}; int rs2[] = {1,1};

int sl3[] = {4,4}; int su3[] = {5,5}; int ss3[] = {1,1};
int rl3[] = {4,4}; int ru3[] = {5,5}; int rs3[] = {1,1};

int sl4[] = {6,6}; int su4[] = {7,7}; int ss4[] = {1,1};
int rl4[] = {6,6}; int ru4[] = {7,7}; int rs4[] = {1,1};

#if MBP_VERSION > 1
void parti_bisect(parti_decomp_t* decomp) {
  int i, *dim, *cnt, **idx;

  dim = (int*)calloc(decomp->rank, sizeof(int));
  cnt = (int*)calloc(decomp->rank, sizeof(int));
  idx = (int**)calloc(decomp->rank, sizeof(int*));
  for (i = 0; i < decomp->rank; ++i) {
    dim[i] = i;
    cnt[i] = 2;
    idx[i] = (int*)calloc(2, sizeof(int)); 
    idx[i][0] = decomp->size[i]/2; idx[i][1] = decomp->size[i];
  }
  
  parti_section(decomp, dim, cnt, idx);

  for (i = 0; i < decomp->rank; ++i) {
    free(idx[i]);
  }
  free(idx);
  free(cnt);
  free(dim);
}

int* create_bdecomp(IC_Desc** desc, int p, int r, int* local_sizes) {
  parti_decomp_t* decomp;
  parti_darray_t* darray;

  int size[RANK] = {DIMX, DIMY};
  int igcl[RANK] = {0, 0};
  int igcu[RANK] = {0, 0};
  int cnt, i, j;
  int* A;

  decomp = parti_create_idecomp(RANK, size);
  parti_bisect(decomp);
  darray = parti_ialign(decomp, igcl, igcu);
  cnt = parti_local_size(darray, local_sizes);
  
  /* initialize memory */
  A = (int*)calloc(local_sizes[0]*local_sizes[1], sizeof(int));
  for (i = 0; i < local_sizes[0]; ++i) {
    for (j = 0; j < local_sizes[1]; ++j) {
      *(A+i*local_sizes[1]+j) = 100*r + 10*i + j;
    }
  }
  
  *desc = IC_Translate_parti_descriptor(darray);
  
  /* debugging */
  {
    decomp_Birreg* decomp = (*desc)->spec;
    print_irr(decomp->root);
  }
  
  parti_free_darray(darray);
  parti_free_decomp(decomp);
  
  return A;   
}

#else
int* create_bdecomp(IC_Desc** desc, int p, int r, int* local_sizes) {
  int ad[RANK] = {0, 1};
  int ds[RANK] = {DIMX, DIMY};
  int igl[RANK] = {0, 0};
  int igr[RANK] = {0, 0};
  int egl[RANK] = {0, 0};
  int egr[RANK] = {0, 0};
  int ef[RANK] = {0, 0};
  int dd[RANK] = {0, 1};
  
  int* A;
  int i, j;

  VPROC* vp;
  DECOMP* dc;
  DARRAY* da;

  vp = vProc(1, &p);
  dc = create_decomp(RANK, ds);
  embed(dc, vp, 0, p-1);
  distribute(dc, "BB");
  da = align(dc, RANK, ad, ds, igl, igr, egl, egr, ef, dd);
  laSizes(da, local_sizes);
  
  /* initialize memory */
  A = (int*)calloc(local_sizes[0]*local_sizes[1], sizeof(int));
  for (i = 0; i < local_sizes[0]; ++i) {
    for (j = 0; j < local_sizes[1]; ++j) {
      *(A+i*local_sizes[1]+j) = 100*r + 10*i + j;
    }
  }

  *desc = IC_Translate_parti_descriptor(da);

  /* debugging */
  {
    decomp_Birreg* decomp = (*desc)->spec;
    print_irr(decomp->root);
  }
  
/*   delete_DARRAY(da); */
/*   delete_DECOMP(dc); */
/*   delete_VPROC(vp); */
/*   cleanup_after_PARTI(); */

  return A;
}
#endif

void print_array(FILE* fp, int* A, int sizes[]) {
  int i, j;
  
  for (i = 0; i < sizes[0]; ++i) {
    for (j = 0; j < sizes[1]; ++j) {
      fprintf(fp, "%3d ", *(A+i*sizes[1]+j));
    }
    fprintf(fp, "\n");
  }
}
