
#include "InterComm.h"
#include "IC_EndPoint.h"
#include <assert.h>

#ifdef PPLUSPLUS
#include <A++.h>
#include "pplus2bparti.h"
#endif

#ifdef PPLUSPLUS

// Methods for int arrays

void IC_EndPoint::exportArray(const intArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned ndim=a.numberOfDimensions();
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];
  
  for(i=0;i<ndim;++i) {
    lower[i]=a.getBase(i); 
    upper[i]=a.getBound(i); 
    stride[i]=a.getStride(i);
#ifdef DEBUG
    cout << lower[i] << " " << upper[i] << " " << stride[i] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=getPartiDescriptor(a,thisProgramAO==otherProgramAO);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_int(otherProgram,sched,a.getLocalArray().getDataPointer(),IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;
}

void IC_EndPoint::importArray(const intArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.numberOfDimensions();
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  for(i=0;i<ndim;++i) {
    lower[i]=a.getBase(i); 
    upper[i]=a.getBound(i); 
    stride[i]=a.getStride(i);
#ifdef DEBUG
    cout << lower[i] << " " << upper[i] << " " << stride[i] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=getPartiDescriptor(a,thisProgramAO==otherProgramAO);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_int(otherProgram,sched,a.getLocalArray().getDataPointer(),IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;
}


// Methods for float arrays

void IC_EndPoint::exportArray(const floatArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned ndim=a.numberOfDimensions();
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];
  
  for(i=0;i<ndim;++i) {
    lower[i]=a.getBase(i); 
    upper[i]=a.getBound(i); 
    stride[i]=a.getStride(i);
#ifdef DEBUG
    cout << lower[i] << " " << upper[i] << " " << stride[i] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=getPartiDescriptor(a,thisProgramAO==otherProgramAO);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_float(otherProgram,sched,a.getLocalArray().getDataPointer(),IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;
}

void IC_EndPoint::importArray(const floatArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.numberOfDimensions();
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  for(i=0;i<ndim;++i) {
    lower[i]=a.getBase(i); 
    upper[i]=a.getBound(i); 
    stride[i]=a.getStride(i);
#ifdef DEBUG
    cout << lower[i] << " " << upper[i] << " " << stride[i] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=getPartiDescriptor(a,thisProgramAO==otherProgramAO);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_float(otherProgram,sched,a.getLocalArray().getDataPointer(),IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;
}


// Methods for double arrays

void IC_EndPoint::exportArray(const doubleArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned ndim=a.numberOfDimensions();
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];
  
  for(i=0;i<ndim;++i) {
    lower[i]=a.getBase(i); 
    upper[i]=a.getBound(i); 
    stride[i]=a.getStride(i);
#ifdef DEBUG
    cout << lower[i] << " " << upper[i] << " " << stride[i] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=getPartiDescriptor(a,thisProgramAO==otherProgramAO);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_double(otherProgram,sched,a.getLocalArray().getDataPointer(),IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;
}

void IC_EndPoint::importArray(const doubleArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.numberOfDimensions();
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  for(i=0;i<ndim;++i) {
    lower[i]=a.getBase(i); 
    upper[i]=a.getBound(i); 
    stride[i]=a.getStride(i);
#ifdef DEBUG
    cout << lower[i] << " " << upper[i] << " " << stride[i] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=getPartiDescriptor(a,thisProgramAO==otherProgramAO);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_double(otherProgram,sched,a.getLocalArray().getDataPointer(),IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;
}


#endif

#ifdef FORTRAN90

// Methods for char arrays

void IC_EndPoint::exportArray(const F90_charArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned i;
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }

  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_char(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

void IC_EndPoint::importArray(const F90_charArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_char(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

IC_Desc* IC_EndPoint::buildDescriptor(const F90_charArray& a) {
  if (static_cast<unsigned>(a.rank)>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    return NULL;
  }

  int tasks[1];
  tasks[0]=0;

  IC_Desc* desc;

  switch(a.rank) {
    case 1: {
      int blocks[1][2][1];

      blocks[0][0][0]=0;
      blocks[0][1][0]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 2: {
      int blocks[1][2][2];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][1][0]=a.shape[1]-1;
      blocks[0][1][1]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 3: {
      int blocks[1][2][3];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][1][0]=a.shape[2]-1;
      blocks[0][1][1]=a.shape[1]-1;
      blocks[0][1][2]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 4: {
      int blocks[1][2][4];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][0][3]=0;
      blocks[0][1][0]=a.shape[3]-1;
      blocks[0][1][1]=a.shape[2]-1;
      blocks[0][1][2]=a.shape[1]-1;
      blocks[0][1][3]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }

    default:
      assert(0);
  }

  return desc;
}

// Methods for short arrays

void IC_EndPoint::exportArray(const F90_shortArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned i;
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }

  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_short(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

void IC_EndPoint::importArray(const F90_shortArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_short(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

IC_Desc* IC_EndPoint::buildDescriptor(const F90_shortArray& a) {
  if (static_cast<unsigned>(a.rank)>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    return NULL;
  }

  int tasks[1];
  tasks[0]=0;

  IC_Desc* desc;

  switch(a.rank) {
    case 1: {
      int blocks[1][2][1];

      blocks[0][0][0]=0;
      blocks[0][1][0]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 2: {
      int blocks[1][2][2];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][1][0]=a.shape[1]-1;
      blocks[0][1][1]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 3: {
      int blocks[1][2][3];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][1][0]=a.shape[2]-1;
      blocks[0][1][1]=a.shape[1]-1;
      blocks[0][1][2]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 4: {
      int blocks[1][2][4];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][0][3]=0;
      blocks[0][1][0]=a.shape[3]-1;
      blocks[0][1][1]=a.shape[2]-1;
      blocks[0][1][2]=a.shape[1]-1;
      blocks[0][1][3]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }

    default:
      assert(0);
  }

  return desc;
}

// Methods for int arrays

void IC_EndPoint::exportArray(const F90_intArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned i;
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }

  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_int(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

void IC_EndPoint::importArray(const F90_intArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_int(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

IC_Desc* IC_EndPoint::buildDescriptor(const F90_intArray& a) {
  if (static_cast<unsigned>(a.rank)>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    return NULL;
  }

  int tasks[1];
  tasks[0]=0;

  IC_Desc* desc;

  switch(a.rank) {
    case 1: {
      int blocks[1][2][1];

      blocks[0][0][0]=0;
      blocks[0][1][0]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 2: {
      int blocks[1][2][2];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][1][0]=a.shape[1]-1;
      blocks[0][1][1]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 3: {
      int blocks[1][2][3];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][1][0]=a.shape[2]-1;
      blocks[0][1][1]=a.shape[1]-1;
      blocks[0][1][2]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 4: {
      int blocks[1][2][4];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][0][3]=0;
      blocks[0][1][0]=a.shape[3]-1;
      blocks[0][1][1]=a.shape[2]-1;
      blocks[0][1][2]=a.shape[1]-1;
      blocks[0][1][3]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }

    default:
      assert(0);
  }

  return desc;
}

// Methods for float arrays

void IC_EndPoint::exportArray(const F90_floatArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned i;
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }

  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_float(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

void IC_EndPoint::importArray(const F90_floatArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_float(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

IC_Desc* IC_EndPoint::buildDescriptor(const F90_floatArray& a) {
  if (static_cast<unsigned>(a.rank)>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    return NULL;
  }

  int tasks[1];
  tasks[0]=0;

  IC_Desc* desc;

  switch(a.rank) {
    case 1: {
      int blocks[1][2][1];

      blocks[0][0][0]=0;
      blocks[0][1][0]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 2: {
      int blocks[1][2][2];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][1][0]=a.shape[1]-1;
      blocks[0][1][1]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 3: {
      int blocks[1][2][3];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][1][0]=a.shape[2]-1;
      blocks[0][1][1]=a.shape[1]-1;
      blocks[0][1][2]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 4: {
      int blocks[1][2][4];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][0][3]=0;
      blocks[0][1][0]=a.shape[3]-1;
      blocks[0][1][1]=a.shape[2]-1;
      blocks[0][1][2]=a.shape[1]-1;
      blocks[0][1][3]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }

    default:
      assert(0);
  }

  return desc;
}

// Methods for double arrays

void IC_EndPoint::exportArray(const F90_doubleArray& a, int& status) {
// exportArrays a double array using InterComm
  unsigned i;
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }

  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Send_double(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

void IC_EndPoint::importArray(const F90_doubleArray& a, int& status) {
// importArrays a double array using InterComm
  unsigned ndim=a.rank;
  if (ndim>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    status=IC_INVALID_NDIM;
    return;
  }
  unsigned i;
  int lower[IC_MAX_DIM], upper[IC_MAX_DIM], stride[IC_MAX_DIM];

  // column-major order
  for(i=0;i<ndim;++i) {
    lower[ndim-i-1]=0; 
    stride[ndim-i-1]=1;
    upper[ndim-i-1]=a.shape[i]-1; 
#ifdef DEBUG
    cout << lower[ndim-i-1] << " " << upper[ndim-i-1] << " " << stride[ndim-i-1] << endl;
#endif
  }

#ifdef DEBUG
  cout << "number of dimensions is " << ndim << endl;
#endif

  IC_Region* region_set[1];
  region_set[0]=IC_Create_block_region(ndim,lower,upper,stride);
  if (!region_set[0]) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_ALLOC_REGION] << endl;
#endif
    status=IC_CANT_ALLOC_REGION;
    return;
  }

  IC_Desc* desc=buildDescriptor(a);
  if (!desc) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_GET_DA_DESCRIPTOR] << endl;
#endif
    status=IC_CANT_GET_DA_DESCRIPTOR;
    return;
  }

  // this can probably be cached in a more sophisticated version!!!
  IC_Sched* sched=IC_Compute_schedule(thisProgram,otherProgram,desc,region_set,1);
  if (!sched) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_CANT_COMPUTE_COMM_SCHEDULE] << endl;
#endif
    status=IC_CANT_COMPUTE_COMM_SCHEDULE;
    return;
  }

  int ret=IC_Recv_double(otherProgram,sched,a.base,IC_TAG);
  if (ret!=0) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_COMM_FAILURE] << endl;
#endif
    status=IC_COMM_FAILURE;
    return;
  }

  IC_Free_region(region_set[0]);
  IC_Free_desc(desc);

  status=IC_OK;

}

IC_Desc* IC_EndPoint::buildDescriptor(const F90_doubleArray& a) {
  if (static_cast<unsigned>(a.rank)>IC_MAX_DIM) {
#ifdef IC_DEBUG
    cout << IC_errors[-IC_INVALID_NDIM] << endl;
#endif
    return NULL;
  }

  int tasks[1];
  tasks[0]=0;

  IC_Desc* desc;

  switch(a.rank) {
    case 1: {
      int blocks[1][2][1];

      blocks[0][0][0]=0;
      blocks[0][1][0]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 2: {
      int blocks[1][2][2];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][1][0]=a.shape[1]-1;
      blocks[0][1][1]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 3: {
      int blocks[1][2][3];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][1][0]=a.shape[2]-1;
      blocks[0][1][1]=a.shape[1]-1;
      blocks[0][1][2]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }
    case 4: {
      int blocks[1][2][4];

      blocks[0][0][0]=0;
      blocks[0][0][1]=0;
      blocks[0][0][2]=0;
      blocks[0][0][3]=0;
      blocks[0][1][0]=a.shape[3]-1;
      blocks[0][1][1]=a.shape[2]-1;
      blocks[0][1][2]=a.shape[1]-1;
      blocks[0][1][3]=a.shape[0]-1;
      desc=IC_Create_bdecomp_desc(a.rank,&blocks[0][0][0],tasks,1);
      break;
    }

    default:
      assert(0);
  }

  return desc;
}

#endif
