#include "common.h"
#include "global.h"
#include <pvm3.h>

#define INIT_ROUTES 4999

DARRAY *BP_create_darray(int BP_DIM1, int BP_DIM2)
{
  DARRAY *da;
  VPROC  *vproc;
  DECOMP *decomp;
  
  int     sizeVP;
  int     Gsize[BP_Ndim];
  int     size[BP_Ndim];
  
  int array_dim[BP_Ndim]       ={0,1};
  int int_gcell_l[BP_Ndim]       ={0,0};
  int int_gcell_r[BP_Ndim]       ={0,0};
  int ext_gcell_left[BP_Ndim]  ={0,0};
  int ext_gcell_right[BP_Ndim] ={0,0};
  int extra_flag[BP_Ndim]      ={0,0};
  int decomp_dimension[BP_Ndim]={0,1};
  
#ifdef MPI
  MPI_Comm_size(MPI_COMM_WORLD, &sizeVP);
#else
  sizeVP = parti_pvm_numnodes();
#endif
  Gsize[0] = BP_DIM1;
  Gsize[1] = BP_DIM2;
  
  vproc  = vProc(1,&sizeVP);
  decomp = create_decomp(BP_Ndim,Gsize);
  embed(decomp,vproc,0,sizeVP-1); 
  distribute(decomp,"BB");
  
  size[0]=Gsize[0];    
  size[1]=Gsize[1];
  
  da=align(decomp,BP_Ndim,array_dim,size,int_gcell_l, int_gcell_r, 
	   ext_gcell_left,ext_gcell_right,
	   extra_flag,decomp_dimension);
  
  return(da);
}

DARRAY *BP_create_darray3(int BP_DIM1, int BP_DIM2, int BP_DIM3)
{
  DARRAY *da;
  VPROC  *vproc;
  DECOMP *decomp;
  
  int     sizeVP;
  int     Gsize[BP_Ndim3];
  int     size[BP_Ndim3];
  
  int array_dim[BP_Ndim3]       ={0,1,2};
  int int_gcell_l[BP_Ndim3]       ={0,0,0};
  int int_gcell_r[BP_Ndim3]       ={0,0,0};
  int ext_gcell_left[BP_Ndim3]  ={0,0,0};
  int ext_gcell_right[BP_Ndim3] ={0,0,0};
  int extra_flag[BP_Ndim3]      ={0,0,0};
  int decomp_dimension[BP_Ndim3]={0,1,2};
  
  sizeVP = parti_pvm_numnodes(); 
  Gsize[0] = BP_DIM1;
  Gsize[1] = BP_DIM2;
  Gsize[2] = BP_DIM3;
  
  vproc  = vProc(1,&sizeVP);
  decomp = create_decomp(BP_Ndim3,Gsize);
  embed(decomp,vproc,0,sizeVP-1); 
  distribute(decomp,"BBB");
  
  size[0]=Gsize[0];    
  size[1]=Gsize[1];
  size[2]=Gsize[2];
  
  da=align(decomp,BP_Ndim3,array_dim,size,int_gcell_l, int_gcell_r, 
	   ext_gcell_left,ext_gcell_right,
	   extra_flag,decomp_dimension);
  
  return(da);
}

DARRAY *BP_create_darray4(int BP_DIM1, int BP_DIM2, int BP_DIM3, int BP_DIM4)
{
  DARRAY *da;
  VPROC  *vproc;
  DECOMP *decomp;
  
  int     sizeVP;
  int     Gsize[BP_Ndim4];
  int     size[BP_Ndim4];
  
  int array_dim[BP_Ndim4]       ={0,1,2,3};
  int int_gcell_l[BP_Ndim4]     ={0,0,0,0};
  int int_gcell_r[BP_Ndim4]     ={0,0,0,0};
  int ext_gcell_left[BP_Ndim4]  ={0,0,0,0};
  int ext_gcell_right[BP_Ndim4] ={0,0,0,0};
  int extra_flag[BP_Ndim4]      ={0,0,0,0};
  int decomp_dimension[BP_Ndim4]={0,1,2,3};
  
  sizeVP = parti_pvm_numnodes(); 
  Gsize[0] = BP_DIM1;
  Gsize[1] = BP_DIM2;
  Gsize[2] = BP_DIM3;
  Gsize[3] = BP_DIM4;
  
  vproc  = vProc(1,&sizeVP);
  decomp = create_decomp(BP_Ndim4,Gsize);
  embed(decomp,vproc,0,sizeVP-1); 
  distribute(decomp,"BBBB");
  
  size[0]=Gsize[0];    
  size[1]=Gsize[1];
  size[2]=Gsize[2];
  size[3]=Gsize[3];
  
  da=align(decomp,BP_Ndim4,array_dim,size,int_gcell_l, int_gcell_r, 
	   ext_gcell_left,ext_gcell_right,
	   extra_flag,decomp_dimension);
  
  return(da);
}

region_s *Alloc_RangeRegion(int base, int NbElem)
{
  int *bind;
  int i;
  
  bind = (int*)malloc(sizeof(int)*NbElem);
  for (i = 0; i < NbElem; i++) {
    bind[i] = i + base;
  }

  return Alloc_R_Enum(NbElem,bind,R_replicated,Mem_Pointer);
}

int Print_LZ_Info_2D(int* left, int* right, int* stride,int LZ_count,int BP_DIM1,int BP_DIM2)
{
  
  int i,j,k;
  
  printf("REGION INFO(LINEARIZATION) Values --------- \n");
  for(i = left[0]; i <=right[0]; i++)
    for(j = left[1]; j <=right[1]; j++)
      {
	printf(" LZ[%d] = %d \n",LZ_count, i* BP_DIM2 + j);
	LZ_count++;
      }
  return LZ_count;
  
}

int Print_LZ_Info_3D(int* left, int* right, int* stride,int LZ_count,int BP_DIM1,int BP_DIM2,int BP_DIM3)
{
  
  int i,j,k;
  
  printf("REGION INFO(LINEARIZATION) Values --------- \n");
  for(i = left[0]; i <=right[0]; i++)
    for(j = left[1]; j <=right[1]; j++)
      for(k = left[2]; k <=right[2]; k++)
	{
	  printf(" LZ[%d] = %d \n",LZ_count, i* BP_DIM2*BP_DIM3 + j* BP_DIM3 +k);
	  LZ_count++;
	}
  return LZ_count;
  
}

void Init_PVM_routes(pgme_s* my_pgme, pgme_s* other_pgme)
{
  int i;
  
  int numproc;
  int numproc_other;
  int gb1,gb2;
  
  gb1 = 345;
  
  numproc =  NumNodePgme(my_pgme);
  numproc_other =  NumNodePgme(other_pgme);
  
  /* Routes between two programs */
  
  /* Send a garbage message to all processes of other program. */  

  for (i = 0; i < numproc_other ; i++)
    {
      pvm_initsend(PvmDataDefault);
      pvm_pkint(&gb1, 1, 1);
      pvm_send(TidPgme(other_pgme,i), INIT_ROUTES);
    }
  
  /* Receive a garbage message from all processes of other program. */ 
  
  for (i = 0; i < numproc_other ; i++)
    {
      pvm_recv(TidPgme(other_pgme,i), INIT_ROUTES);
      pvm_upkint(&gb2, 1, 1);
      printf("gb %d %d \n", gb1,gb2);
    }

  /* Routes within a program */
  
  /* Send a garbage message to all processes within the same program. */ 
  
  for(i = 0; i < numproc ; i++)
    {
      pvm_initsend(PvmDataDefault);
      pvm_pkint(&gb1, 1, 1);
      pvm_send(TidPgme(my_pgme,i), INIT_ROUTES);
      fprintf(stderr,"in 111\n");
    }
  
  /* Receive a garbage message from all processes within the same program. */
  
  for(i = 0; i < numproc ; i++)
    {
      pvm_recv(TidPgme(my_pgme,i), INIT_ROUTES);
      pvm_upkint(&gb2,1,1);
      printf("gb %d %d \n", gb1,gb2);
    }
}

void Initialize_distributed_array(decomp_Birreg* irr_dd,double* BP_array,int bp_dim1, int bp_dim2)
{
  int i,j,k,m;
  int index =0;
  
  int me;
  int num_blocks;
  block** block_array1;
  int num_my_blocks;
  block** my_blocks;
  
  me =  me=MyPosMyPgme();
  
  num_blocks = get_num_of_blocks(irr_dd); 
  block_array1 = (leaf_Node**)malloc(num_blocks * sizeof(leaf_Node*));
  store_block_array_ptr(irr_dd,block_array1);
  my_blocks = get_my_blocks(me, block_array1, num_blocks, &num_my_blocks,2);
  
  for(i =0; i < num_my_blocks; i++)
    {
      for(j = my_blocks[i]->start_global[0];  j <=  my_blocks[i]->end_global[0]; j++)
	for(k = my_blocks[i]->start_global[1];  k <=  my_blocks[i]->end_global[1]; k++)
	  {
	    BP_array[index++] = j*bp_dim2 + k;
	  }
    } 
}

void Initialize_distributed_array_column_major(decomp_Birreg* irr_dd,double* BP_array,int bp_dim1, int bp_dim2)
{
  int i,j,k,m;
  int index =0;
  
  int me;
  int num_blocks;
  block** block_array1;
  int num_my_blocks;
  block** my_blocks;
  
  me =  me=MyPosMyPgme();
  
  num_blocks = get_num_of_blocks(irr_dd); 
  block_array1 = (leaf_Node**)malloc(num_blocks * sizeof(leaf_Node*));
  store_block_array_ptr(irr_dd,block_array1);
  my_blocks = get_my_blocks(me, block_array1, num_blocks, &num_my_blocks,2);
  
  for(i =0; i < num_my_blocks; i++)
    {
      for(j = my_blocks[i]->start_global[1];  j <=  my_blocks[i]->end_global[1]; j++)
	for(k = my_blocks[i]->start_global[0];  k <=  my_blocks[i]->end_global[0]; k++)
	  {
	    BP_array[index++] = k*bp_dim2 + j;
	  }
    }
}

  
void Initialize_distributed_array3(decomp_Birreg* irr_dd,double* BP_array,int bp_dim1, int bp_dim2, int bp_dim3)
{
  int i,j,k,m;
  int index =0;
  
  int me;
  int num_blocks;
  block** block_array1;
  int num_my_blocks;
  block** my_blocks;
  
  me =  me=MyPosMyPgme();
  
  num_blocks = get_num_of_blocks(irr_dd); 
  block_array1 = (leaf_Node**)malloc(num_blocks * sizeof(leaf_Node*));
  store_block_array_ptr(irr_dd,block_array1);
  my_blocks = get_my_blocks(me, block_array1, num_blocks, &num_my_blocks,2);
  
  for(i =0; i < num_my_blocks; i++)
    {
      for(j = my_blocks[i]->start_global[0];  j <=  my_blocks[i]->end_global[0]; j++)
	for(k = my_blocks[i]->start_global[1];  k <=  my_blocks[i]->end_global[1]; k++)
	  for(m = my_blocks[i]->start_global[2];  m <=  my_blocks[i]->end_global[2]; m++)
	    {
	      BP_array[index++] = j*bp_dim2*bp_dim3 + k*bp_dim3 + m;
	    }
    }
}
  
void Initialize_distributed_array3_column_major(decomp_Birreg* irr_dd,double* BP_array,int bp_dim1, int bp_dim2, int bp_dim3)
{
  int i,j,k,m;
  int index =0;
  
  int me;
  int num_blocks;
  block** block_array1;
  int num_my_blocks;
  block** my_blocks;
  
  me =  me=MyPosMyPgme();
  
  num_blocks = get_num_of_blocks(irr_dd); 
  block_array1 = (leaf_Node**)malloc(num_blocks * sizeof(leaf_Node*));
  store_block_array_ptr(irr_dd,block_array1);
  my_blocks = get_my_blocks(me, block_array1, num_blocks, &num_my_blocks,2);
  
  for(i =0; i < num_my_blocks; i++)
    {
      for(j = my_blocks[i]->start_global[2];  j <=  my_blocks[i]->end_global[2]; j++)
	for(k = my_blocks[i]->start_global[1];  k <=  my_blocks[i]->end_global[1]; k++)
	  for(m = my_blocks[i]->start_global[0];  m <=  my_blocks[i]->end_global[0]; m++)
	    {
	      BP_array[index++] = m*bp_dim2*bp_dim3 + k*bp_dim3 + j;
	    }
    }
}

void collect_print_whole_array(int me, decomp_Birreg* irr_dd, pgme_s* my_pgme, int bp_dim1, int bp_dim2, int bp_dim3, int* da_size, double* BP_array)
{
  int num_blocks;
  int nDims;
  leaf_Node** block_array;
  double** buffer_array;
  int i,j,k,m;
  int size;
  int num_elements;
  int local_index;
  double* whole_array;
  
  if(me == 0) {
    num_blocks = get_num_of_blocks(irr_dd);
    nDims = irr_dd->nDims;
    block_array = (leaf_Node**)malloc(num_blocks * sizeof(leaf_Node*));
    store_block_array_ptr(irr_dd,block_array);
    buffer_array = (double**)malloc(num_blocks*sizeof(double*));
    
    for(i=0; i < num_blocks; i++)
      {
	if( block_array[i]->proc != me){
	  size = 1;
	  for( j = 0; j < nDims; j++)
	    size *= block_array[i]->size[j];
	  
	  pvm_recv(TidPgme(my_pgme,block_array[i]->proc),7777);
	  
	  buffer_array[i] = (double*)malloc(size*sizeof(double));
	  pvm_upkdouble(buffer_array[i],size,1);
	}
	else buffer_array[i] = 0;
      }
    
    if(nDims ==2) 
      num_elements =bp_dim1*bp_dim2;
    
    if(nDims ==3) 
      num_elements= bp_dim1*bp_dim2*bp_dim3;
    
    whole_array = (double*)malloc(num_elements*sizeof(double));
    
    for(i = 0; i < num_blocks; i++)
      { 
	if(nDims == 2) {
	  local_index = 0;
	  for(j = block_array[i]->start_global[0]; j <= block_array[i]->end_global[0];j++)
	    for(k = block_array[i]->start_global[1]; k <= block_array[i]->end_global[1]; k++)
	      {
		if(block_array[i]->proc!= me){
		  whole_array[j*bp_dim2+k] = buffer_array[i][local_index];
		}
		else 
		  whole_array[j*bp_dim2+k] = BP_array[local_index];
		
		local_index++;
	      }
	}
	else if(nDims==3) {
	  local_index = 0;
	  for(j = block_array[i]->start_global[0]; j <= block_array[i]->end_global[0];j++)
	    for(k = block_array[i]->start_global[1]; k <= block_array[i]->end_global[1]; k++)
	      for(m = block_array[i]->start_global[2]; m <=block_array[i]->end_global[2]; m++)
		{
		  if(block_array[i]->proc!= me){ 
		    whole_array[j*bp_dim2*bp_dim3+k*bp_dim3+m] = buffer_array[i][local_index];
		  }
		  else {
		    whole_array[j*bp_dim2*bp_dim3+k*bp_dim3+m] = BP_array[local_index];
		  }
		  local_index++;
		}
	}
      }
    
    
    {
      int count =0;
      FILE *OutFile;
      
      OutFile=fopen("/tmp/output.data","a");
      if (OutFile == NULL)
	{
	  printf("\n /tmp/output.data not open \n");
	  OutFile=stdout;
	}
      
      for( j = 0; j< num_elements;j++)
	if(whole_array[j] != -9999){
	  fprintf(OutFile,"Elem[%d] = %f\n",j,whole_array[j]);
	  count++;
	}
      printf("Count = %d \n", count);
      
    }
  }
  else 
    { 
      nDims = irr_dd->nDims;
      num_elements = 1;
      
      for(i = 0; i < nDims; i++)
	num_elements *= da_size[i];
      
      pvm_initsend (GlueEncoding);
      pvm_pkdouble(BP_array,num_elements,1);
      pvm_send(TidPgme(my_pgme,0),7777);
    }  
}


void collect_print_whole_array_column_major(int me, decomp_Birreg* irr_dd, pgme_s* my_pgme, int bp_dim1, int bp_dim2, int bp_dim3, int* da_size, double* BP_array)
{
  int num_blocks;
  int nDims;
  leaf_Node** block_array;
  double** buffer_array;
  int i,j,k,m;
  int size;
  int num_elements;
  int local_index;
  double* whole_array;
  
  if(me == 0) {
    num_blocks = get_num_of_blocks(irr_dd);
    nDims = irr_dd->nDims;
    block_array = (leaf_Node**)malloc(num_blocks * sizeof(leaf_Node*));
    store_block_array_ptr(irr_dd,block_array);
    buffer_array = (double**)malloc(num_blocks*sizeof(double*));
    
    for(i=0; i < num_blocks; i++)
      {
	if( block_array[i]->proc != me){
	  size = 1;
	  for( j = 0; j < nDims; j++)
	    size *= block_array[i]->size[j];
	  
	  pvm_recv(TidPgme(my_pgme,block_array[i]->proc),7777);
	  
	  buffer_array[i] = (double*)malloc(size*sizeof(double));
	  pvm_upkdouble(buffer_array[i],size,1);
	}
	else buffer_array[i] = 0;
      }
    
    if(nDims ==2) 
      num_elements =bp_dim1*bp_dim2;
    
    if(nDims ==3) 
      num_elements= bp_dim1*bp_dim2*bp_dim3;
    
    whole_array = (double*)malloc(num_elements*sizeof(double));
    
    for(i = 0; i < num_blocks; i++)
      { 
	if(nDims == 2) {
	  local_index = 0;
	  /*
	  for(j = block_array[i]->start_global[1]; j <= block_array[i]->end_global[1];j++)
	    for(k = block_array[i]->start_global[0]; k <= block_array[i]->end_global[0]; k++)
	      {
		if(block_array[i]->proc!= me){
		  whole_array[j*bp_dim1+k] = buffer_array[i][local_index];
		}
		else 
		  whole_array[j*bp_dim1+k] = BP_array[local_index];
		
		local_index++;
	      }

	  */


	  for(j = block_array[i]->start_global[1]; j <= block_array[i]->end_global[1];j++)
	    for(k = block_array[i]->start_global[0]; k <= block_array[i]->end_global[0]; k++)
	      {
		if(block_array[i]->proc!= me){
		  whole_array[k*bp_dim2+j] = buffer_array[i][local_index]; 
		}
		else 
		  whole_array[k*bp_dim2+j] = BP_array[local_index];          
		
		local_index++;
	      }





	}
	else if(nDims==3) {
	  local_index = 0;
	  for(j = block_array[i]->start_global[2]; j <= block_array[i]->end_global[2];j++)
	    for(k = block_array[i]->start_global[1]; k <= block_array[i]->end_global[1]; k++)
	      for(m = block_array[i]->start_global[0]; m <=block_array[i]->end_global[0]; m++)
		{
		  if(block_array[i]->proc!= me){ 

		    whole_array[m*bp_dim2*bp_dim3+k*bp_dim3+j] = buffer_array[i][local_index];
		  }
		  else {

		    whole_array[m*bp_dim2*bp_dim3+k*bp_dim3+j] = BP_array[local_index];
		  }
		  local_index++;
		}
	}
      }
    
    
    {
      int count =0;
      FILE *OutFile;
      
      OutFile=fopen("/tmp/output.data","a");
      if (OutFile == NULL)
	{
	  printf("\n /tmp/output.data not open \n");
	  OutFile=stdout;
	}
      
      for( j = 0; j< num_elements;j++)
	if(whole_array[j] != -9999){
	  fprintf(OutFile,"Elem[%d] = %f\n",j,whole_array[j]);
	  count++;
	}
      printf("Count = %d \n", count);
      
    }
  }
  else 
    { 
      nDims = irr_dd->nDims;
      num_elements = 1;
      
      for(i = 0; i < nDims; i++)
	num_elements *= da_size[i];
      
      pvm_initsend (GlueEncoding);
      pvm_pkdouble(BP_array,num_elements,1);
      pvm_send(TidPgme(my_pgme,0),7777);
    }  
}

pgme_s *InitPgme(char *nom, int numnode) {
  pgme_s *pgme;
  int mytid;
  int i, me;
  
  me = parti_pvm_mynode();

  if ((mytid = pvm_mytid()) < 0) {
    pvm_perror("mytid");
    return 0;
  }
  
  if (me == 0) {
    if (pvm_joingroup(nom) < 0) {
      pvm_perror("joingroup");
      return 0;
    }
  } else {
    while (pvm_gsize(nom) != me)
      sleep(1);
    if (pvm_joingroup(nom) < 0) {
      pvm_perror("joingroup");
      return 0;
    }
  } 
 
  pvm_barrier(nom, numnode);
  
  pgme = (pgme_s*)malloc(sizeof(pgme_s));
  pgme->nbNode = numnode;
  pgme->name = (char*)calloc(strlen(nom) + 1, sizeof(char));
  sprintf(pgme->name, "%s", nom);
  pgme->tid = (int*)calloc(numnode, sizeof(int));
  pgme->myPos = -1;
  for (i = 0; i < numnode; i++) {
    pgme->tid[i] = pvm_gettid(nom, i);
    if (mytid == pgme->tid[i])
      pgme->myPos = i;
  }
  
  pvm_barrier(nom, numnode);
  
  /*pvm_setopt(PvmRoute, PvmRouteDirect);*/
  pvm_setopt(PvmFragSize, GlueFragBuffer);
  
  ic_this_program = pgme;
  
  return pgme;
}

void init_routes(IC_Program* this, IC_Program* other, int numnode, char* name, int numnodeother, char* nameother)
{
  pgme_s* pgme;
  pgme_s* pgme_other;

  int i;
  
 
   pgme=(pgme_s*)malloc(sizeof(pgme_s)); 
   pgme->nbNode=numnode; 
   pgme->name=(char *)malloc(strlen(name)+1); 
   sprintf(pgme->name,"%s",name); 
   pgme->tid=(int *)malloc(sizeof(int)*numnode); 

   for(i=0; i< numnode; i++)
     pgme->tid[i] = this->tid[i];
   pgme->myPos = this->myPos;


   pgme_other=(pgme_s*)malloc(sizeof(pgme_s)); 
   pgme_other->nbNode=numnodeother; 
   pgme_other->name=(char *)malloc(strlen(nameother)+1); 
   sprintf(pgme_other->name,"%s",nameother); 
   pgme_other->tid=(int *)malloc(sizeof(int)*numnodeother); 

   for(i=0; i< numnodeother; i++)
     pgme_other->tid[i] = other->tid[i];
   pgme_other->myPos = other->myPos;

   Init_PVM_routes(pgme, pgme_other);


}
