WRF4.1 , node: Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz 32核 intelmpi
icc/ifort 19.0.5.281 20190815
AVX512:
DESCRIPTION = INTEL ($SFC/$SCC): Xeon (SNB with AVX mods)
DMPARALLEL = 1
OMPCPP = # -D_OPENMP
OMP = # -qopenmp -fpp -auto
OMPCC = # -qopenmp -fpp -auto
SFC = ifort
SCC = icc
CCOMP = icc
DM_FC = mpif90 -f90=$(SFC)
DM_CC = mpicc -cc=$(SCC)
FC = $(DM_FC)
CC = $(DM_CC) -DFSEEKO64_OK
LD = $(FC)
RWORDSIZE = $(NATIVE_RWORDSIZE)
PROMOTION = -real-size `expr 8 \* $(RWORDSIZE)` -i4
ARCH_LOCAL = -DNONSTANDARD_SYSTEM_FUNC -DCHUNK=64 -DXEON_OPTIMIZED_WSM5 -DOPTIMIZE_CFL_TEST -DWRF_USE_CLM $(NETCDF4_IO_OPTS)
OPTNOSIMD =
OPTAVX = -xCOMMON-AVX512
CFLAGS_LOCAL = -w -O3 -ip -xHost -fp-model fast=2 -no-prec-div -no-prec-sqrt -ftz -no-multibyte-chars $(OPTAVX)
LDFLAGS_LOCAL = -ip -xHost -fp-model fast=2 -no-prec-div -no-prec-sqrt -ftz -align all -fno-alias -fno-common $(OPTAVX)
CPLUSPLUSLIB =
ESMF_LDFLAG = $(CPLUSPLUSLIB)
FCOPTIM = -O3 $(OPTAVX)
FCREDUCEDOPT = $(FCOPTIM)
FCNOOPT = -O0 -fno-inline -no-ip
FCDEBUG = # -g $(FCNOOPT) -traceback # -fpe0 -check noarg_temp_created,bounds,format,output_conversion,pointers,uninit -ftrapuv -unroll0 -u
FORMAT_FIXED = -FI
FORMAT_FREE = -FR
FCSUFFIX =
BYTESWAPIO = -convert big_endian
RECORDLENGTH = -assume byterecl
#FCBASEOPTS_NO_G = -w $(OMP) -auto -ftz -fno-alias -fp-model fast=1 -no-prec-div -no-prec-sqrt $(FORMAT_FREE) $(BYTESWAPIO) -auto -align array64byte #-vec-report6
FCBASEOPTS_NO_G = -ip -fp-model precise -w -ftz -align all -fno-alias $(FORMAT_FREE) $(BYTESWAPIO) -xHost -fp-model fast=2 -no-heap-arrays -no-prec-div -no-prec-sqrt -fno-common $(OPTAVX)
FCBASEOPTS = $(FCBASEOPTS_NO_G) $(FCDEBUG)
MODULE_SRCH_FLAG =
TRADFLAG = -traditional-cpp $(NETCDF4_IO_OPTS)
CPP = /lib/cpp -P -nostdinc
AR = ar
ARFLAGS = ru
M4 = m4
RANLIB = ranlib
RLFLAGS =
CC_TOOLS = $(SCC)
AVX:
./configure , Choice 20
DATA:
JAN00_GFS_FNL.tar.gz
namelist.input:
&time_control
run_days = 0,
run_hours = 2,
run_minutes = 0,
run_seconds = 0,
start_year = 2000, 2007, 2007,
start_month = 01, 08, 08,
start_day = 24, 15, 15,
start_hour = 12, 00, 00,
end_year = 2000, 2007, 2007,
end_month = 01, 08, 08,
end_day = 25, 16, 16,
end_hour = 12, 00, 00,
interval_seconds = 21600
input_from_file = .true.,.true.,.true.,
history_interval = 180, 60, 60,
frames_per_outfile = 1000, 1000, 1000,
restart = .false.,
restart_interval = 7200,
io_form_history = 2
io_form_restart = 2
io_form_input = 2
io_form_boundary = 2
/
&domains
time_step = 15,
time_step_fract_num = 0,
time_step_fract_den = 1,
max_dom = 1,
e_we = 421, 334, 94,
e_sn = 241, 304, 91,
e_vert = 33, 33, 33,
p_top_requested = 5000,
num_metgrid_levels = 27,
num_metgrid_soil_levels = 2,
dx = 2500, 10000, 3333.33,
dy = 2500, 10000, 3333.33,
grid_id = 1, 2, 3,
parent_id = 0, 1, 2,
i_parent_start = 1, 31, 30,
j_parent_start = 1, 17, 30,
parent_grid_ratio = 1, 3, 3,
parent_time_step_ratio = 1, 3, 3,
feedback = 1,
smooth_option = 0
I found that the CPU frequency is relatively low when running AVX512
When Turbo is turned off,2500-2700MHz。 open:2700-3200MHz
icc/ifort 19.0.5.281 20190815
AVX512:
DESCRIPTION = INTEL ($SFC/$SCC): Xeon (SNB with AVX mods)
DMPARALLEL = 1
OMPCPP = # -D_OPENMP
OMP = # -qopenmp -fpp -auto
OMPCC = # -qopenmp -fpp -auto
SFC = ifort
SCC = icc
CCOMP = icc
DM_FC = mpif90 -f90=$(SFC)
DM_CC = mpicc -cc=$(SCC)
FC = $(DM_FC)
CC = $(DM_CC) -DFSEEKO64_OK
LD = $(FC)
RWORDSIZE = $(NATIVE_RWORDSIZE)
PROMOTION = -real-size `expr 8 \* $(RWORDSIZE)` -i4
ARCH_LOCAL = -DNONSTANDARD_SYSTEM_FUNC -DCHUNK=64 -DXEON_OPTIMIZED_WSM5 -DOPTIMIZE_CFL_TEST -DWRF_USE_CLM $(NETCDF4_IO_OPTS)
OPTNOSIMD =
OPTAVX = -xCOMMON-AVX512
CFLAGS_LOCAL = -w -O3 -ip -xHost -fp-model fast=2 -no-prec-div -no-prec-sqrt -ftz -no-multibyte-chars $(OPTAVX)
LDFLAGS_LOCAL = -ip -xHost -fp-model fast=2 -no-prec-div -no-prec-sqrt -ftz -align all -fno-alias -fno-common $(OPTAVX)
CPLUSPLUSLIB =
ESMF_LDFLAG = $(CPLUSPLUSLIB)
FCOPTIM = -O3 $(OPTAVX)
FCREDUCEDOPT = $(FCOPTIM)
FCNOOPT = -O0 -fno-inline -no-ip
FCDEBUG = # -g $(FCNOOPT) -traceback # -fpe0 -check noarg_temp_created,bounds,format,output_conversion,pointers,uninit -ftrapuv -unroll0 -u
FORMAT_FIXED = -FI
FORMAT_FREE = -FR
FCSUFFIX =
BYTESWAPIO = -convert big_endian
RECORDLENGTH = -assume byterecl
#FCBASEOPTS_NO_G = -w $(OMP) -auto -ftz -fno-alias -fp-model fast=1 -no-prec-div -no-prec-sqrt $(FORMAT_FREE) $(BYTESWAPIO) -auto -align array64byte #-vec-report6
FCBASEOPTS_NO_G = -ip -fp-model precise -w -ftz -align all -fno-alias $(FORMAT_FREE) $(BYTESWAPIO) -xHost -fp-model fast=2 -no-heap-arrays -no-prec-div -no-prec-sqrt -fno-common $(OPTAVX)
FCBASEOPTS = $(FCBASEOPTS_NO_G) $(FCDEBUG)
MODULE_SRCH_FLAG =
TRADFLAG = -traditional-cpp $(NETCDF4_IO_OPTS)
CPP = /lib/cpp -P -nostdinc
AR = ar
ARFLAGS = ru
M4 = m4
RANLIB = ranlib
RLFLAGS =
CC_TOOLS = $(SCC)
AVX:
./configure , Choice 20
DATA:
JAN00_GFS_FNL.tar.gz
namelist.input:
&time_control
run_days = 0,
run_hours = 2,
run_minutes = 0,
run_seconds = 0,
start_year = 2000, 2007, 2007,
start_month = 01, 08, 08,
start_day = 24, 15, 15,
start_hour = 12, 00, 00,
end_year = 2000, 2007, 2007,
end_month = 01, 08, 08,
end_day = 25, 16, 16,
end_hour = 12, 00, 00,
interval_seconds = 21600
input_from_file = .true.,.true.,.true.,
history_interval = 180, 60, 60,
frames_per_outfile = 1000, 1000, 1000,
restart = .false.,
restart_interval = 7200,
io_form_history = 2
io_form_restart = 2
io_form_input = 2
io_form_boundary = 2
/
&domains
time_step = 15,
time_step_fract_num = 0,
time_step_fract_den = 1,
max_dom = 1,
e_we = 421, 334, 94,
e_sn = 241, 304, 91,
e_vert = 33, 33, 33,
p_top_requested = 5000,
num_metgrid_levels = 27,
num_metgrid_soil_levels = 2,
dx = 2500, 10000, 3333.33,
dy = 2500, 10000, 3333.33,
grid_id = 1, 2, 3,
parent_id = 0, 1, 2,
i_parent_start = 1, 31, 30,
j_parent_start = 1, 17, 30,
parent_grid_ratio = 1, 3, 3,
parent_time_step_ratio = 1, 3, 3,
feedback = 1,
smooth_option = 0
I found that the CPU frequency is relatively low when running AVX512
When Turbo is turned off,2500-2700MHz。 open:2700-3200MHz