Parallelize three_for_loop in C++ by openmp -


i have code. here, a, b, c, a1, b1, c1 vectors in 3 dimensional. a, b, c independent , a1, b1, c1 independent together. want parallelize calculate using openmp. however, run openmp, "segmentation fault" error.could me problem? thank in advance.

#include <omp.h> #include<math.h>  #include<cmath>  #include<vector>     #include<iostream>  using namespace std; int main () {  int nx=801;              // number of grid in x direction int ny=501;               int nz=401;    float pi=3.14159265358979323846; unsigned int i,j,k; vector<vector<vector<float> > > (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > a1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));  cout<<"start"<<endl; #pragma omp parallel private (j) shared(a,b,c,i,k,nx,ny,nz)  (i=0;i<nx;i++)     (j=0;j<ny;j++)         (k=0;k<nz;k++)         {             a[i][j][k]=sin(2.0*pi/float(nx*ny*nz)*float(i*j*k));             b[i][j][k]=cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));             c[i][j][k]=sin(2.0*pi/float(nx*ny*nz))*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));         }  #pragma omp parallel private (j) shared(a1,b1,c1,a,b,c,i,k,nx,ny,nz)  (i=1;i<nx-1;i++)     (j=1;j<ny-1;j++)         (k=1;k<nz-1;k++)         {             a1[i][j][k]=c[i+1][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));             b1[i][j][k]=a[i][j][k]+b[i][j][k]+c[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));             c1[i][j][k]=16.0*a[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));         } cout<<"finish"<<endl;   return 0; } 

this code easy parallelise openmp. however, did mistakes in own attempt, notably trying declare i , k shared whereas should private. better even, don't declare variables in advance , declare them inside for loop. way, have automatically right scope, preventing mixing up.

here give:

#include <omp.h> #include<math.h>  #include<cmath>  #include<vector>     #include<iostream>  using namespace std; int main () {      int nx=801;              // number of grid in x direction     int ny=501;                   int nz=401;        float pi=3.14159265358979323846;     vector<vector<vector<float> > > (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > b (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > c (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > a1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > b1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > c1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));      cout<<"start"<<endl;     #pragma omp parallel     (int i=0;i<nx;i++)         (int j=0;j<ny;j++)             (int k=0;k<nz;k++)             {                 a[i][j][k]=sin(2.0*pi/float(nx*ny*nz)*float(i*j*k));                 b[i][j][k]=cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));                 c[i][j][k]=sin(2.0*pi/float(nx*ny*nz))*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));             }      #pragma omp parallel      (int i=1;i<nx-1;i++)         (int j=1;j<ny-1;j++)             (int k=1;k<nz-1;k++)             {                 a1[i][j][k]=c[i+1][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));                 b1[i][j][k]=a[i][j][k]+b[i][j][k]+c[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));                 c1[i][j][k]=16.0*a[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k));             }     cout<<"finish"<<endl;      return 0; } 

now, since ask parallelising code, guess interested in performance. nothing should prevent implementing 1 or 2 basic performance optimisations this:

#include <omp.h> #include<math.h>  #include<cmath>  #include<vector>     #include<iostream>  using namespace std; int main () {      int nx=801;              // number of grid in x direction     int ny=501;                   int nz=401;        float pi=3.14159265358979323846;     vector<vector<vector<float> > > (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > b (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > c (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > a1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > b1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));     vector<vector<vector<float> > > c1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0)));      const float pioversize = pi/(nx*ny*nz);     const float sin2pioversize = sin(2.0f*pioversize);     cout<<"start"<<endl;     double tbeg = omp_get_wtime();     #pragma omp parallel     {     #pragma omp     (int i=0;i<nx;i++)         (int j=0;j<ny;j++)         {             float ijpioversize=i*j*pioversize;             (int k=0;k<nz;k++)             {                 a[i][j][k]=sin(2.0f*ijpioversize*k);                 b[i][j][k]=cos(5.0f*ijpioversize*k);                 c[i][j][k]=sin2pioversize*cos(5.0f*ijpioversize*k);             }          }     #pragma omp      (int i=1;i<nx-1;i++)         (int j=1;j<ny-1;j++)         {             float ijpioversize=i*j*pioversize;             (int k=1;k<nz-1;k++)             {                 a1[i][j][k]=c[i+1][j][k]*cos(5.0f*ijpioversize*k);                 b1[i][j][k]=a[i][j][k]+b[i][j][k]+c[i][j][k]*cos(5.0f*ijpioversize*k);                 c1[i][j][k]=16.0f*a[i][j][k]*cos(5.0f*ijpioversize*k);             }         }     }     double time = omp_get_wtime() - tbeg;     cout<<"finish in "<<time<<" seconds"<<endl;      return 0; } 

with this, code should faster.


Comments

Popular posts from this blog

c++ - llvm function pass ReplaceInstWithInst malloc -

java.lang.NoClassDefFoundError When Creating New Android Project -

Decoding a Python 2 `tempfile` with python-future -