'MPI process not being evenly distributed for all ranks in netcdf parallel
I am trying out netcdf parallel function using MPI and it seems like only one rank seems to be doing most of the work (see the code output).
When I am not writing the data using the nc_put_vara_float(), and just printing out the for loop, I get evenly distributed output. But when I am using the nc_put_vara_float(), it behaves like how I described earlier.
Why is this happening?
#include <stdio.h>
#include <netcdf.h>
#include <mpi.h>
#define nc_err(err_code) {printf("Error: %s\n", nc_strerror(err_code)); return 2;}
#define ofpath "mpi.mock.nc"
#define x_siz 20
#define y_siz 20
#define t_siz 40
#define xy_siz (x_siz * y_siz)
#define xyt_siz (x_siz * y_siz * t_siz)
#define odim 3
void init_arr(int in_x_siz, int in_y_siz, int in_t_siz, int in_mpi_rank, float in_arr[]);
int main()
{
// MPI variables
int mpi_world, mpi_rank, mpi_namelen;
char mpi_name[100];
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
// netcdf variable
int idx_div, start_idx, end_idx, slab_idx;
int err;
int out_fid;
int out_var_id;
int out_x_dimid,
out_y_dimid,
out_t_dimid;
int out_var_dimids[odim];
size_t out_var_start[odim],
out_var_count[odim];
// mock array for netcdf output
float mock_arr[xyt_siz];
// indexing variable
int i;
/* initialize MPI */
MPI_Init(NULL,NULL);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_world);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
init_arr(x_siz,y_siz,t_siz,mpi_rank,mock_arr);
idx_div = t_siz / mpi_world;
start_idx = idx_div * mpi_rank;
end_idx = start_idx + idx_div;
/* create a parallel netcdf-4 file. */
if ((err = nc_create_par(ofpath, NC_NETCDF4|NC_MPIIO, comm, info, &out_fid))) nc_err(err);
/* create three dimensions. */
if ((err = nc_def_dim(out_fid, "t", t_siz, &out_t_dimid))) nc_err(err);
if ((err = nc_def_dim(out_fid, "y", y_siz, &out_y_dimid))) nc_err(err);
if ((err = nc_def_dim(out_fid, "x", x_siz, &out_x_dimid))) nc_err(err);
/* setting dimension ids for output variable */
out_var_dimids[0] = out_t_dimid;
out_var_dimids[1] = out_y_dimid;
out_var_dimids[2] = out_x_dimid;
/* create one var. */
if ((err = nc_def_var(out_fid, "mock", NC_FLOAT, odim, out_var_dimids, &out_var_id))) nc_err(err);
if ((err = nc_enddef(out_fid))) nc_err(err);
out_var_start[0] = start_idx;
out_var_start[1] = 0;
out_var_start[2] = 0;
out_var_count[0] = 1;
out_var_count[1] = y_siz;
out_var_count[2] = x_siz;
if ((err = nc_var_par_access(out_fid, out_var_id, 1))) nc_err(err);
/* write mock data (each mpi processes write small segments) */
printf("\n");
printf("mpi rank : %02d\n",mpi_rank);
printf("====================\n");
for (i=start_idx; i<end_idx; i++)
{
out_var_start[0] = i;
slab_idx = i * xy_siz;
printf("each index prog : %04d | slab idx : %10d\n",i,slab_idx);
if ((err = nc_put_vara_float(out_fid, out_var_id, out_var_start, out_var_count, &mock_arr[slab_idx]))) nc_err(err);
}
printf("\n");
/* close the netcdf file. */
if ((err = nc_close(out_fid))) nc_err(err);
/* shut down MPI. */
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
void init_arr(int in_x_siz, int in_y_siz, int in_t_siz, int in_mpi_rank, float in_arr[])
{
int i,j,k;
for (k=0;k<in_t_siz;k++)
for (j=0;j<in_y_siz;j++)
for (i=0;i<in_x_siz;i++)
in_arr[i + j * in_x_siz + k * in_x_siz * in_y_siz] = in_mpi_rank;
}
expected results (not using nc_put_vara_float())
mpi rank : 00
====================
each index prog : 0000 | slab idx : 0
each index prog : 0001 | slab idx : 400
each index prog : 0002 | slab idx : 800
each index prog : 0003 | slab idx : 1200
each index prog : 0004 | slab idx : 1600
each index prog : 0005 | slab idx : 2000
each index prog : 0006 | slab idx : 2400
each index prog : 0007 | slab idx : 2800
each index prog : 0008 | slab idx : 3200
each index prog : 0009 | slab idx : 3600
mpi rank : 01
====================
each index prog : 0010 | slab idx : 4000
each index prog : 0011 | slab idx : 4400
each index prog : 0012 | slab idx : 4800
each index prog : 0013 | slab idx : 5200
each index prog : 0014 | slab idx : 5600
each index prog : 0015 | slab idx : 6000
each index prog : 0016 | slab idx : 6400
each index prog : 0017 | slab idx : 6800
each index prog : 0018 | slab idx : 7200
each index prog : 0019 | slab idx : 7600
mpi rank : 02
====================
each index prog : 0020 | slab idx : 8000
each index prog : 0021 | slab idx : 8400
each index prog : 0022 | slab idx : 8800
each index prog : 0023 | slab idx : 9200
each index prog : 0024 | slab idx : 9600
each index prog : 0025 | slab idx : 10000
each index prog : 0026 | slab idx : 10400
each index prog : 0027 | slab idx : 10800
each index prog : 0028 | slab idx : 11200
each index prog : 0029 | slab idx : 11600
mpi rank : 03
====================
each index prog : 0030 | slab idx : 12000
each index prog : 0031 | slab idx : 12400
each index prog : 0032 | slab idx : 12800
each index prog : 0033 | slab idx : 13200
each index prog : 0034 | slab idx : 13600
each index prog : 0035 | slab idx : 14000
each index prog : 0036 | slab idx : 14400
each index prog : 0037 | slab idx : 14800
each index prog : 0038 | slab idx : 15200
each index prog : 0039 | slab idx : 15600
actual output (using nc_put_vara_float())
mpi rank : 00
====================
each index prog : 0000 | slab idx : 0
mpi rank : 01
====================
each index prog : 0010 | slab idx : 4000
mpi rank : 02
====================
each index prog : 0020 | slab idx : 8000
mpi rank : 03
====================
each index prog : 0030 | slab idx : 12000
each index prog : 0001 | slab idx : 400
each index prog : 0011 | slab idx : 4400
each index prog : 0021 | slab idx : 8400
each index prog : 0031 | slab idx : 12400
each index prog : 0022 | slab idx : 8800
each index prog : 0002 | slab idx : 800
each index prog : 0012 | slab idx : 4800
each index prog : 0032 | slab idx : 12800
each index prog : 0003 | slab idx : 1200
each index prog : 0033 | slab idx : 13200
each index prog : 0013 | slab idx : 5200
each index prog : 0023 | slab idx : 9200
each index prog : 0034 | slab idx : 13600
each index prog : 0014 | slab idx : 5600
each index prog : 0004 | slab idx : 1600
each index prog : 0024 | slab idx : 9600
each index prog : 0005 | slab idx : 2000
each index prog : 0035 | slab idx : 14000
each index prog : 0015 | slab idx : 6000
each index prog : 0025 | slab idx : 10000
each index prog : 0016 | slab idx : 6400
each index prog : 0006 | slab idx : 2400
each index prog : 0036 | slab idx : 14400
each index prog : 0026 | slab idx : 10400
each index prog : 0027 | slab idx : 10800
each index prog : 0037 | slab idx : 14800
each index prog : 0017 | slab idx : 6800
each index prog : 0007 | slab idx : 2800
each index prog : 0028 | slab idx : 11200
each index prog : 0038 | slab idx : 15200
each index prog : 0018 | slab idx : 7200
each index prog : 0008 | slab idx : 3200
each index prog : 0019 | slab idx : 7600
each index prog : 0029 | slab idx : 11600
each index prog : 0039 | slab idx : 15600
each index prog : 0009 | slab idx : 3600
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
