'MPI process not being evenly distributed for all ranks in netcdf parallel

I am trying out netcdf parallel function using MPI and it seems like only one rank seems to be doing most of the work (see the code output).

When I am not writing the data using the nc_put_vara_float(), and just printing out the for loop, I get evenly distributed output. But when I am using the nc_put_vara_float(), it behaves like how I described earlier.

Why is this happening?

#include <stdio.h>
#include <netcdf.h>
#include <mpi.h>

#define nc_err(err_code) {printf("Error: %s\n", nc_strerror(err_code)); return 2;}
#define ofpath    "mpi.mock.nc"

#define x_siz     20
#define y_siz     20
#define t_siz     40
#define xy_siz    (x_siz * y_siz)
#define xyt_siz   (x_siz * y_siz * t_siz)

#define odim      3

void init_arr(int in_x_siz, int in_y_siz, int in_t_siz, int in_mpi_rank, float in_arr[]);


int main()
{


// MPI variables
int      mpi_world, mpi_rank, mpi_namelen;
char     mpi_name[100];

MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;


// netcdf variable
int      idx_div, start_idx, end_idx, slab_idx;

int      err;

int      out_fid;

int      out_var_id;

int      out_x_dimid,
         out_y_dimid,
         out_t_dimid;

int      out_var_dimids[odim];

size_t   out_var_start[odim],
         out_var_count[odim];


// mock array for netcdf output
float    mock_arr[xyt_siz];


// indexing variable
int i;

/* initialize MPI */
MPI_Init(NULL,NULL);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_world);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

init_arr(x_siz,y_siz,t_siz,mpi_rank,mock_arr);

idx_div     =  t_siz / mpi_world;
start_idx   =  idx_div * mpi_rank;
end_idx     =  start_idx + idx_div;


/* create a parallel netcdf-4 file. */
if ((err = nc_create_par(ofpath, NC_NETCDF4|NC_MPIIO, comm, info, &out_fid))) nc_err(err);

/* create three dimensions. */
if ((err = nc_def_dim(out_fid, "t", t_siz, &out_t_dimid))) nc_err(err);
if ((err = nc_def_dim(out_fid, "y", y_siz, &out_y_dimid))) nc_err(err);
if ((err = nc_def_dim(out_fid, "x", x_siz, &out_x_dimid))) nc_err(err);

/* setting dimension ids for output variable */
out_var_dimids[0] =  out_t_dimid;
out_var_dimids[1] =  out_y_dimid;
out_var_dimids[2] =  out_x_dimid;

/* create one var. */
if ((err = nc_def_var(out_fid, "mock", NC_FLOAT, odim, out_var_dimids, &out_var_id))) nc_err(err);

if ((err = nc_enddef(out_fid))) nc_err(err);

out_var_start[0]  =  start_idx;
out_var_start[1]  =  0;
out_var_start[2]  =  0;

out_var_count[0]  =  1;
out_var_count[1]  =  y_siz;
out_var_count[2]  =  x_siz;


if ((err = nc_var_par_access(out_fid, out_var_id, 1))) nc_err(err);

/* write mock data (each mpi processes write small segments) */
printf("\n");
printf("mpi rank        : %02d\n",mpi_rank);
printf("====================\n");

for (i=start_idx; i<end_idx; i++)
{  
   out_var_start[0]  =  i;
   slab_idx          =  i * xy_siz;
   
   printf("each index prog : %04d | slab idx : %10d\n",i,slab_idx);

   if ((err = nc_put_vara_float(out_fid, out_var_id, out_var_start, out_var_count, &mock_arr[slab_idx]))) nc_err(err);
}

printf("\n");


/* close the netcdf file. */
if ((err = nc_close(out_fid))) nc_err(err);

/* shut down MPI. */
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();

return 0;
}

void init_arr(int in_x_siz, int in_y_siz, int in_t_siz, int in_mpi_rank, float in_arr[])
{
   int i,j,k;

   for (k=0;k<in_t_siz;k++)
      for (j=0;j<in_y_siz;j++)
         for (i=0;i<in_x_siz;i++)
            in_arr[i + j * in_x_siz + k * in_x_siz * in_y_siz] = in_mpi_rank;
}

expected results (not using nc_put_vara_float())

mpi rank        : 00
====================
each index prog : 0000 | slab idx :          0
each index prog : 0001 | slab idx :        400
each index prog : 0002 | slab idx :        800
each index prog : 0003 | slab idx :       1200
each index prog : 0004 | slab idx :       1600
each index prog : 0005 | slab idx :       2000
each index prog : 0006 | slab idx :       2400
each index prog : 0007 | slab idx :       2800
each index prog : 0008 | slab idx :       3200
each index prog : 0009 | slab idx :       3600


mpi rank        : 01
====================
each index prog : 0010 | slab idx :       4000
each index prog : 0011 | slab idx :       4400
each index prog : 0012 | slab idx :       4800
each index prog : 0013 | slab idx :       5200
each index prog : 0014 | slab idx :       5600
each index prog : 0015 | slab idx :       6000
each index prog : 0016 | slab idx :       6400
each index prog : 0017 | slab idx :       6800
each index prog : 0018 | slab idx :       7200
each index prog : 0019 | slab idx :       7600


mpi rank        : 02
====================
each index prog : 0020 | slab idx :       8000
each index prog : 0021 | slab idx :       8400
each index prog : 0022 | slab idx :       8800
each index prog : 0023 | slab idx :       9200
each index prog : 0024 | slab idx :       9600
each index prog : 0025 | slab idx :      10000
each index prog : 0026 | slab idx :      10400
each index prog : 0027 | slab idx :      10800
each index prog : 0028 | slab idx :      11200
each index prog : 0029 | slab idx :      11600


mpi rank        : 03
====================
each index prog : 0030 | slab idx :      12000
each index prog : 0031 | slab idx :      12400
each index prog : 0032 | slab idx :      12800
each index prog : 0033 | slab idx :      13200
each index prog : 0034 | slab idx :      13600
each index prog : 0035 | slab idx :      14000
each index prog : 0036 | slab idx :      14400
each index prog : 0037 | slab idx :      14800
each index prog : 0038 | slab idx :      15200
each index prog : 0039 | slab idx :      15600

actual output (using nc_put_vara_float())

mpi rank        : 00
====================
each index prog : 0000 | slab idx :          0

mpi rank        : 01
====================
each index prog : 0010 | slab idx :       4000

mpi rank        : 02
====================
each index prog : 0020 | slab idx :       8000

mpi rank        : 03
====================
each index prog : 0030 | slab idx :      12000
each index prog : 0001 | slab idx :        400
each index prog : 0011 | slab idx :       4400
each index prog : 0021 | slab idx :       8400
each index prog : 0031 | slab idx :      12400
each index prog : 0022 | slab idx :       8800
each index prog : 0002 | slab idx :        800
each index prog : 0012 | slab idx :       4800
each index prog : 0032 | slab idx :      12800
each index prog : 0003 | slab idx :       1200
each index prog : 0033 | slab idx :      13200
each index prog : 0013 | slab idx :       5200
each index prog : 0023 | slab idx :       9200
each index prog : 0034 | slab idx :      13600
each index prog : 0014 | slab idx :       5600
each index prog : 0004 | slab idx :       1600
each index prog : 0024 | slab idx :       9600
each index prog : 0005 | slab idx :       2000
each index prog : 0035 | slab idx :      14000
each index prog : 0015 | slab idx :       6000
each index prog : 0025 | slab idx :      10000
each index prog : 0016 | slab idx :       6400
each index prog : 0006 | slab idx :       2400
each index prog : 0036 | slab idx :      14400
each index prog : 0026 | slab idx :      10400
each index prog : 0027 | slab idx :      10800
each index prog : 0037 | slab idx :      14800
each index prog : 0017 | slab idx :       6800
each index prog : 0007 | slab idx :       2800
each index prog : 0028 | slab idx :      11200
each index prog : 0038 | slab idx :      15200
each index prog : 0018 | slab idx :       7200
each index prog : 0008 | slab idx :       3200
each index prog : 0019 | slab idx :       7600
each index prog : 0029 | slab idx :      11600
each index prog : 0039 | slab idx :      15600
each index prog : 0009 | slab idx :       3600



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source