Parallelize three_for_loop in C++ by openmp -
i have code. here, a, b, c, a1, b1, c1 vectors in 3 dimensional. a, b, c independent , a1, b1, c1 independent together. want parallelize calculate using openmp. however, run openmp, "segmentation fault" error.could me problem? thank in advance.
#include <omp.h> #include<math.h> #include<cmath> #include<vector> #include<iostream> using namespace std; int main () { int nx=801; // number of grid in x direction int ny=501; int nz=401; float pi=3.14159265358979323846; unsigned int i,j,k; vector<vector<vector<float> > > (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > a1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); cout<<"start"<<endl; #pragma omp parallel private (j) shared(a,b,c,i,k,nx,ny,nz) (i=0;i<nx;i++) (j=0;j<ny;j++) (k=0;k<nz;k++) { a[i][j][k]=sin(2.0*pi/float(nx*ny*nz)*float(i*j*k)); b[i][j][k]=cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); c[i][j][k]=sin(2.0*pi/float(nx*ny*nz))*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); } #pragma omp parallel private (j) shared(a1,b1,c1,a,b,c,i,k,nx,ny,nz) (i=1;i<nx-1;i++) (j=1;j<ny-1;j++) (k=1;k<nz-1;k++) { a1[i][j][k]=c[i+1][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); b1[i][j][k]=a[i][j][k]+b[i][j][k]+c[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); c1[i][j][k]=16.0*a[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); } cout<<"finish"<<endl; return 0; }
this code easy parallelise openmp. however, did mistakes in own attempt, notably trying declare i
, k
shared
whereas should private
. better even, don't declare variables in advance , declare them inside for
loop. way, have automatically right scope, preventing mixing up.
here give:
#include <omp.h> #include<math.h> #include<cmath> #include<vector> #include<iostream> using namespace std; int main () { int nx=801; // number of grid in x direction int ny=501; int nz=401; float pi=3.14159265358979323846; vector<vector<vector<float> > > (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > a1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); cout<<"start"<<endl; #pragma omp parallel (int i=0;i<nx;i++) (int j=0;j<ny;j++) (int k=0;k<nz;k++) { a[i][j][k]=sin(2.0*pi/float(nx*ny*nz)*float(i*j*k)); b[i][j][k]=cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); c[i][j][k]=sin(2.0*pi/float(nx*ny*nz))*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); } #pragma omp parallel (int i=1;i<nx-1;i++) (int j=1;j<ny-1;j++) (int k=1;k<nz-1;k++) { a1[i][j][k]=c[i+1][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); b1[i][j][k]=a[i][j][k]+b[i][j][k]+c[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); c1[i][j][k]=16.0*a[i][j][k]*cos(5.0*pi/float(nx*ny*nz)*float(i*j*k)); } cout<<"finish"<<endl; return 0; }
now, since ask parallelising code, guess interested in performance. nothing should prevent implementing 1 or 2 basic performance optimisations this:
#include <omp.h> #include<math.h> #include<cmath> #include<vector> #include<iostream> using namespace std; int main () { int nx=801; // number of grid in x direction int ny=501; int nz=401; float pi=3.14159265358979323846; vector<vector<vector<float> > > (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > a1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > b1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); vector<vector<vector<float> > > c1 (nx,vector<vector<float> >(ny,vector <float>(nz,0.0))); const float pioversize = pi/(nx*ny*nz); const float sin2pioversize = sin(2.0f*pioversize); cout<<"start"<<endl; double tbeg = omp_get_wtime(); #pragma omp parallel { #pragma omp (int i=0;i<nx;i++) (int j=0;j<ny;j++) { float ijpioversize=i*j*pioversize; (int k=0;k<nz;k++) { a[i][j][k]=sin(2.0f*ijpioversize*k); b[i][j][k]=cos(5.0f*ijpioversize*k); c[i][j][k]=sin2pioversize*cos(5.0f*ijpioversize*k); } } #pragma omp (int i=1;i<nx-1;i++) (int j=1;j<ny-1;j++) { float ijpioversize=i*j*pioversize; (int k=1;k<nz-1;k++) { a1[i][j][k]=c[i+1][j][k]*cos(5.0f*ijpioversize*k); b1[i][j][k]=a[i][j][k]+b[i][j][k]+c[i][j][k]*cos(5.0f*ijpioversize*k); c1[i][j][k]=16.0f*a[i][j][k]*cos(5.0f*ijpioversize*k); } } } double time = omp_get_wtime() - tbeg; cout<<"finish in "<<time<<" seconds"<<endl; return 0; }
with this, code should faster.
Comments
Post a Comment