'Why I am getting different output in every run in openmp

I have N number of text files. I am trying to read from those files parallelly, so I have forked N threads and each thread gets one text file from those N files(Thread 0 gets file0.txt, thread 1 gets file1.txt, thread 2 gets file2.txt and so on ). The next step I am trying to do is read first 1000 lines from the files parallelly and put in a vector say V1, then I will sort V1. All the resources are private to each tread except V1. But after sorting I am getting different output every time. My code is given below. The value of N is 395. Thanks in advance.

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <vector>
#include <iostream>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include <algorithm>
#include <fstream>
#include <string.h>
#include <math.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/io.h>
#include <sys/mman.h>
#include <bits/stdc++.h>
#include <ctype.h>      
#include <algorithm>
#include <omp.h>

typedef struct Data
{

    int  POS;
    char Record[600];
};

using namespace std;

std::vector<Data> sortDataOnDevice(std::vector<Data>,int);

struct comparator_sort
{
    __device__

    bool operator() (const Data &x, const Data& y)
    {
        return x.POS < y.POS;
    }
};

int sortedFilePos;

#pragma omp threadprivate (sortedFilePos)

void externalSorting(void)
{
    string tempString;
    int count = 0;
    stringstream s;
    std::vector<string> result;
    std::vector<Data>   V1;
    std::vector<Data>  finalSorted;
    std::vector<Data> temp1;
    string line;
    string word;
    
    int fp1 = 0;
    omp_set_num_threads(395);
    outfile.open("/ExtrSort/Sorted.txt");
    int firstTrav = 0;
        #pragma omp parallel for private(tempString,line,word,firstTrav) shared(V1)
            for(int i=0;i<395;i++ )
            {
                    int vecIdx = 0;
                    int beg = 0, end = 0;
                    int chrNo;
                    int tid;
                    
                    int fileReadIdx = 0;
                    stringstream intToString;
                    intToString << i;
                    intToString >> tempString;
                    std::string name = "/ExtrSort/Chr_Iteration" + tempString + ".txt";
    

                    cout<<name<<"     "<<omp_get_thread_num()<<endl;
                    
                    std::ifstream sortedFile(name.c_str());

                    if(!sortedFile)
                        cout<<"Fie openning error"<<endl;

                    
                        for(int n=0;n<1000;n++)
                        {

                            int wordCount = 0;

                            int chrFlag = 0;


                            Data *sd = (Data*)malloc(sizeof(Data));
                            
                            getline(sortedFile, line);
                        
                            stringstream s(line);
                            while(s>>word)
                            {
                                wordCount++;
                                if (wordCount == 4)
                                {

                                    strcpy(sd->wholeRecord,line.c_str());
                                    s >> word;
                                    stringstream geek(word);
                                    geek>>sd->POS;
                                    geek.clear();

                                    if(n==999)
                                    {

                                        int fp = sortedFile.tellg();


                                        sortedFilePos = fp;

                                    }

                                    break;
                                }
                            }
                            #pragma omp critical//(fill)
                            {
                                V1.push_back(*sd);
                                free(sd);
                                sd = NULL;
                            }


                        }//for(int n=0;n<1000;n++)
            }
            cout<<file1.size()<<endl;

            finalSorted = sortDataOnDevice(V1,0);

            cout<<finalSorted[0].wholeRecord<<endl;

            V1.clear();

            outfile.close();
        
        
}

std::vector<Data> sortDataOnDevice(std::vector<SAMData> dataX, int composite)
{

    thrust::device_vector<Data> dDataX(dataX.size());
    thrust::copy(dataX.begin(),dataX.end(),dDataX.begin());



    cout<<"Copied"<<endl;

    thrust::stable_sort(dDataX.begin(),dDataX.end(),comparator_sort());

    cout<<"Sorted"<<endl;

    std::vector<Data> sortedDataHost(dDataX.size());
    thrust::copy(dDataX.begin(),dDataX.end(),sortedDataHost.begin());

    dataX.clear();
    dDataX.clear();

    cout<<"After sort"<<endl<<endl<<endl;

    return sortedDataHost;
    
}


Solution 1:[1]

I'm not sure what the original code looks like, because the sample presented has several dozen errors in it from missing variables, missing includes, and references to non-existent struct fields. Here's a simplified version with what I think is the fix in it. The OpenMP pragmas seem to be basically correct, but there's no check to see if the record actually got populated in the while loop or not before adding it to the vector. Due to the way uninitialized data behaves, note that the use of malloc while certainly ok in C++ means the object is created in an inconsistent uninitialized state, you are probably getting different slices of garbage data sent into the vector as a result.

The version below has these main differences:

  1. Data is declared correctly for C++ (typedef requires a name after the declaration, C++ provides the typedef by default).
  2. POS is initialized to -1 on construction of a Data struct, this syntax requires C++11 and the change in declaration of sd below.
  3. Rather than privatizing variables, I've moved their declarations to the appropriate scopes so they are naturally scoped to each thread. This is better for performance, and in my opinion for readability, but is not a requirement.
  4. sd is declared on the stack rather than as a pointer to a malloced buffer. If it must be allocated with malloc, then it must be initialized with placement new to get correct behavior.
  5. The various stringstreams have been removed in favor of directly using std::to_string on ints and directly reading values as integers where appropriate.
  6. The key change, the record is only added to the vector if POS has been set to a new value after the while loop.

Since this is an incomplete example, I can't be sure this will fix the problem, but the code should be easier to reason about with these changes applied.

#include <ctype.h>
#include <fstream>
#include <iostream>
#include <limits.h>
#include <math.h>
#include <omp.h>
#include <sstream>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <vector>

struct Data {

  int POS = -1;
  string Record;
};

using namespace std;

std::vector<Data> sortDataOnDevice(std::vector<Data>, int);

struct comparator_sort {
  bool operator()(const Data& x, const Data& y) { return x.POS < y.POS; }
};

int sortedFilePos;

#pragma omp threadprivate(sortedFilePos)

void externalSorting(void)
{
  std::ofstream outfile;
  std::vector<string> result;
  std::vector<Data> V1;
  std::vector<Data> finalSorted;
  std::vector<Data> temp1;

  int fp1 = 0;
  omp_set_num_threads(395);
  outfile.open("/ExtrSort/Sorted.txt");
#pragma omp parallel for shared(V1) default(none)
  for (int i = 0; i < 395; i++) {

    std::string name = "/ExtrSort/Chr_Iteration" + std::to_string(i) + ".txt";

    cout << name << "     " << omp_get_thread_num() << endl;

    std::ifstream sortedFile(name);

    if (!sortedFile) {
      cout << "Fie openning error" << endl;
    }

    for (int n = 0; n < 1000; n++) {
      Data sd {}; // create and initialize on the stack
      int wordCount = 0;
      int chrFlag = 0;
      string line;
      getline(sortedFile, line);

      string word;
      istringstream s(line);
      while (s >> word) {
        wordCount++;
        if (wordCount == 4) {
          sd.Record = line;
          s >> sd.POS;
          if (n == 999) {
            int fp = sortedFile.tellg();
            sortedFilePos = fp;
          }
          break;
        }
      }
      if (POS != -1) { // KEY CHANGE
#pragma omp critical //(fill)
        {
          V1.push_back(sd);
        }
      }
    } // for(int n=0;n<1000;n++)
  }
  /* cout << file1.size() << endl; */

  finalSorted = sortDataOnDevice(V1, 0);

  cout << finalSorted[0].Record << endl;

  V1.clear();

  outfile.close();
}

If for some reason you need the original structure, the only required changes should be a check for whether the record is valid, and either replacing the malloc with new or after allocating the memory using placement new to initialize the record before using it.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1