Skip to content

Commit

Permalink
suppressed ipass, PERFORM_CUTHILL_MCKEE, ACTUALLY_IMPLEMENT_PERM* and…
Browse files Browse the repository at this point in the history
… FURTHER_REDUCE_CACHE_MISSES, which were making the code too complex with no significant performance improvement. Also suppressed GENERATE_PARAVER_TRACES, which is not used any more.
  • Loading branch information
komatits committed Feb 19, 2014
1 parent cdb75e3 commit 5db0729
Show file tree
Hide file tree
Showing 18 changed files with 147 additions and 1,258 deletions.
21 changes: 0 additions & 21 deletions setup/constants.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,33 +33,12 @@
! maximum size of the X and Z directions of a JPEG image generated by the display routine
integer, parameter :: NX_NZ_IMAGE_MAX = 10000

! further reduce cache misses inner/outer in two passes in the case of an MPI simulation
! this flag is ignored in the case of a serial simulation
logical, parameter :: FURTHER_REDUCE_CACHE_MISSES = .true.

! for inverse Cuthill-McKee (1969) permutation
logical, parameter :: INVERSE = .true.
logical, parameter :: FACE = .false.
integer, parameter :: NGNOD_QUADRANGLE = 4
! perform classical or multi-level Cuthill-McKee ordering
logical, parameter :: CMcK_MULTI = .false.
! maximum size if multi-level Cuthill-McKee ordering
integer, parameter :: LIMIT_MULTI_CUTHILL = 50

! implement Cuthill-McKee or replace with identity permutation
logical, parameter :: ACTUALLY_IMPLEMENT_PERM_OUT = .false.
logical, parameter :: ACTUALLY_IMPLEMENT_PERM_INN = .false.
logical, parameter :: ACTUALLY_IMPLEMENT_PERM_WHOLE = .true.

! create file DATA/model_velocity.dat_output or not (can be huge and slow, thus off by default)
logical, parameter :: OUTPUT_MODEL_VELOCITY_FILE = .false.

! in PostScript files about mesh quality, show all elements above this threshold
double precision, parameter :: THRESHOLD_POSTSCRIPT = 95.d0 / 100.d0

! add MPI barriers and suppress seismograms if we generate traces of the run for analysis with "ParaVer"
logical, parameter :: GENERATE_PARAVER_TRACES = .false.

! option to display only part of the mesh and not the whole mesh,
! for instance to analyze Cuthill-McKee mesh partitioning etc.
! Possible values are:
Expand Down
6 changes: 0 additions & 6 deletions src/meshfem2D/read_parameter_file.F90
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,6 @@ module parameter_file
! non linear display to enhance small amplitudes in color images
double precision :: POWER_DISPLAY_COLOR

! perform inverse Cuthill-McKee (1969) permutation for mesh numbering
logical :: PERFORM_CUTHILL_MCKEE

! output seismograms in Seismic Unix format (adjoint traces will be read in the same format)
logical :: SU_FORMAT

Expand Down Expand Up @@ -201,9 +198,6 @@ subroutine read_parameter_file()
call read_value_integer_p(partitioning_method, 'mesher.partitioning_method')
if(err_occurred() /= 0) stop 'error reading parameter 5a in Par_file'

call read_value_logical_p(PERFORM_CUTHILL_MCKEE, 'mesher.PERFORM_CUTHILL_MCKEE')
if(err_occurred() /= 0) stop 'error reading parameter 5b in Par_file'

call read_value_integer_p(ngnod, 'mesher.ngnod')
if(err_occurred() /= 0) stop 'error reading parameter 6 in Par_file'

Expand Down
3 changes: 0 additions & 3 deletions src/meshfem2D/save_databases.f90
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,6 @@ subroutine save_databases(nspec,num_material,region_pml_external_mesh, &
write(15,*) 'POWER_DISPLAY_COLOR'
write(15,*) POWER_DISPLAY_COLOR

write(15,*) 'PERFORM_CUTHILL_MCKEE'
write(15,*) PERFORM_CUTHILL_MCKEE

write(15,*) 'SU_FORMAT'
write(15,*) SU_FORMAT

Expand Down
4 changes: 0 additions & 4 deletions src/specfem2D/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ OBJS_SPECFEM2D = \
$O/force_ftz.o \
$O/get_global.o \
$O/get_MPI.o \
$O/get_perm_cuthill_mckee.o \
$O/get_poroelastic_velocities.o \
$O/gll_library.o \
$O/gmat01.o \
Expand Down Expand Up @@ -309,9 +308,6 @@ $O/get_global.o: ${S}/get_global.f90 ${SETUP}/constants.h
$O/get_MPI.o: ${S}/get_MPI.F90 ${SETUP}/constants.h
${F90} $(FLAGS_CHECK) -c -o $O/get_MPI.o ${S}/get_MPI.F90

$O/get_perm_cuthill_mckee.o: ${S}/get_perm_cuthill_mckee.f90 ${SETUP}/constants.h
${F90} $(FLAGS_CHECK) -c -o $O/get_perm_cuthill_mckee.o ${S}/get_perm_cuthill_mckee.f90

$O/get_poroelastic_velocities.o: ${S}/get_poroelastic_velocities.f90 ${SETUP}/constants.h
${F90} $(FLAGS_CHECK) -c -o $O/get_poroelastic_velocities.o ${S}/get_poroelastic_velocities.f90

Expand Down
12 changes: 3 additions & 9 deletions src/specfem2D/createnum_fast.f90
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@
!
!========================================================================

subroutine createnum_fast(knods,ibool,shape,coorg,nglob,npgeo,nspec,ngnod,myrank,ipass)
subroutine createnum_fast(knods,ibool,shape,coorg,nglob,npgeo,nspec,ngnod,myrank)

! same as subroutine "createnum_slow" but with a faster algorithm

implicit none

include "constants.h"

integer nglob,npgeo,nspec,ngnod,myrank,ipass
integer nglob,npgeo,nspec,ngnod,myrank
integer knods(ngnod,nspec),ibool(NGLLX,NGLLZ,nspec)
double precision shape(ngnod,NGLLX,NGLLX)
double precision coorg(NDIM,npgeo)
Expand All @@ -70,7 +70,7 @@ subroutine createnum_fast(knods,ibool,shape,coorg,nglob,npgeo,nspec,ngnod,myrank


!---- create global mesh numbering
if(myrank == 0 .and. ipass == 1) then
if(myrank == 0) then
write(IOUT,*)
write(IOUT,*)
write(IOUT,*) 'Generating global mesh numbering (fast version)...'
Expand Down Expand Up @@ -231,12 +231,6 @@ subroutine createnum_fast(knods,ibool,shape,coorg,nglob,npgeo,nspec,ngnod,myrank
! check the numbering obtained
if(minval(ibool) /= 1 .or. maxval(ibool) /= nglob) call exit_MPI('Error while generating global numbering')

! if(myrank == 0 .and. ipass == 1) then
! write(IOUT,*)
! write(IOUT,*) 'Total number of points of the global mesh on slice 0: ',nglob
! write(IOUT,*)
! endif

end subroutine createnum_fast


Expand Down
15 changes: 3 additions & 12 deletions src/specfem2D/createnum_slow.f90
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@
!
!========================================================================

subroutine createnum_slow(knods,ibool,nglob,nspec,ngnod,myrank,ipass)
subroutine createnum_slow(knods,ibool,nglob,nspec,ngnod,myrank)

! generate the global numbering

implicit none

include "constants.h"

integer nglob,nspec,ngnod,myrank,ipass
integer nglob,nspec,ngnod,myrank

integer knods(ngnod,nspec),ibool(NGLLX,NGLLZ,nspec)

Expand All @@ -63,7 +63,7 @@ subroutine createnum_slow(knods,ibool,nglob,nspec,ngnod,myrank,ipass)


!---- create global mesh numbering
if(myrank == 0 .and. ipass == 1) then
if(myrank == 0) then
write(IOUT,*)
write(IOUT,*) 'Generating global mesh numbering (slow version)...'
write(IOUT,*)
Expand Down Expand Up @@ -311,14 +311,5 @@ subroutine createnum_slow(knods,ibool,nglob,nspec,ngnod,myrank,ipass)
! verification de la coherence de la numerotation generee
if(minval(ibool) /= 1 .or. maxval(ibool) /= nglob) call exit_MPI('Error while generating global numbering')

! if(myrank == 0 .and. ipass == 1) then
! write(IOUT,*) 'Total number of points of the global mesh on slice 0: ',nglob,' distributed as follows:'
! write(IOUT,*)
! write(IOUT,*) 'Number of interior points: ',nglob-npedge-npcorn
! write(IOUT,*) 'Number of edge points (without corners): ',npedge
! write(IOUT,*) 'Number of corner points: ',npcorn
! write(IOUT,*)
! endif

end subroutine createnum_slow

12 changes: 5 additions & 7 deletions src/specfem2D/get_MPI.F90
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ subroutine get_MPI(nspec,ibool,knods,ngnod,nglob,elastic,poroelastic, &
inum_interfaces_poroelastic, &
ninterface_acoustic, ninterface_elastic, ninterface_poroelastic, &
mask_ispec_inner_outer, &
myrank,ipass,coord)
myrank,coord)

! sets up the MPI interface for communication between partitions

Expand Down Expand Up @@ -85,7 +85,7 @@ subroutine get_MPI(nspec,ibool,knods,ngnod,nglob,elastic,poroelastic, &

logical, dimension(nspec), intent(inout) :: mask_ispec_inner_outer

integer :: myrank,ipass
integer :: myrank
double precision, dimension(NDIM,nglob) :: coord

!local parameters
Expand Down Expand Up @@ -231,7 +231,7 @@ subroutine get_MPI(nspec,ibool,knods,ngnod,nglob,elastic,poroelastic, &
! outputs total number of MPI interface points
call MPI_REDUCE(num_points2, num_points1, 1, MPI_INTEGER, &
MPI_SUM, 0, MPI_COMM_WORLD, ier)
if( myrank == 0 .and. ipass == 1 ) then
if( myrank == 0 ) then
write(IOUT,*) 'total MPI interface points: ',num_points1
endif

Expand Down Expand Up @@ -276,7 +276,7 @@ subroutine get_MPI(nspec,ibool,knods,ngnod,nglob,elastic,poroelastic, &
call MPI_REDUCE(inum, num_points1, 1, MPI_INTEGER, &
MPI_SUM, 0, MPI_COMM_WORLD, ier)

if( myrank == 0 .and. ipass == 1 ) then
if( myrank == 0 ) then
write(IOUT,*) ' acoustic interface points: ',num_points1
endif

Expand Down Expand Up @@ -310,9 +310,7 @@ subroutine get_MPI(nspec,ibool,knods,ngnod,nglob,elastic,poroelastic, &
MPI_SUM, 0, MPI_COMM_WORLD, ier)

if( myrank == 0 ) then
if( ipass == 1 ) then
write(IOUT,*) ' assembly acoustic MPI interface points:',num_points2
endif
write(IOUT,*) ' assembly acoustic MPI interface points:',num_points2

! they don't need to fit, somehow..
!if( num_points2 /= num_points1 ) then
Expand Down
47 changes: 2 additions & 45 deletions src/specfem2D/get_global.f90
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,12 @@
!
!========================================================================


subroutine get_global(nspec_outer,nspec,nglob,ibool)
subroutine get_global(nspec,nglob,ibool)

implicit none
include "constants.h"

integer :: nspec_outer,nspec,nglob
integer :: nspec,nglob

integer, dimension(NGLLX,NGLLZ,nspec) :: ibool

Expand All @@ -69,46 +68,6 @@ subroutine get_global(nspec_outer,nspec,nglob,ibool)

inumber = 0

if(.not. ACTUALLY_IMPLEMENT_PERM_WHOLE) then

! first reduce cache misses in outer elements, since they are taken first
! loop over spectral elements
do ispec = 1,nspec_outer
do j=1,NGLLZ
do i=1,NGLLX
if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
! create a new point
inumber = inumber + 1
ibool(i,j,ispec) = inumber
mask_ibool(copy_ibool_ori(i,j,ispec)) = inumber
else
! use an existing point created previously
ibool(i,j,ispec) = mask_ibool(copy_ibool_ori(i,j,ispec))
endif
enddo
enddo
enddo

! then reduce cache misses in inner elements, since they are taken second
! loop over spectral elements
do ispec = nspec_outer+1,nspec
do j=1,NGLLZ
do i=1,NGLLX
if(mask_ibool(copy_ibool_ori(i,j,ispec)) == -1) then
! create a new point
inumber = inumber + 1
ibool(i,j,ispec) = inumber
mask_ibool(copy_ibool_ori(i,j,ispec)) = inumber
else
! use an existing point created previously
ibool(i,j,ispec) = mask_ibool(copy_ibool_ori(i,j,ispec))
endif
enddo
enddo
enddo

else ! if ACTUALLY_IMPLEMENT_PERM_WHOLE

! reduce cache misses in all the elements
! loop over spectral elements
do ispec = 1,nspec
Expand All @@ -127,8 +86,6 @@ subroutine get_global(nspec_outer,nspec,nglob,ibool)
enddo
enddo

endif

deallocate(mask_ibool,copy_ibool_ori)

end subroutine get_global
Expand Down
Loading

0 comments on commit 5db0729

Please sign in to comment.