Skip to content

Commit a4893ad

Browse files
authored
Update MPI-Examples. Add instructions for running on AAC7, update to new OSU benchmark version and add one-line patch to work around teardown race condition between HipDeviceReset() and MPI_Finalize. Fix Typo in CMake of GhostExchange Ver6. (#142)
1 parent 9f387bd commit a4893ad

2 files changed

Lines changed: 28 additions & 27 deletions

File tree

MPI-examples/GhostExchange/GhostExchange_ArrayAssign/Ver6/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ endif()
7575

7676
add_executable(GhostExchange GhostExchange.cc timer.c timer.h)
7777

78-
set_target_properties(GhostExchange PROPERTIES INCLUDE_DIRECTORIES "${MPI_CXX_INCLUDE_PATH};$ENV{ROCM_PATH}/include/rcotracer")
78+
set_target_properties(GhostExchange PROPERTIES INCLUDE_DIRECTORIES "${MPI_CXX_INCLUDE_PATH};$ENV{ROCM_PATH}/include/roctracer")
7979
set_target_properties(GhostExchange PROPERTIES COMPILE_FLAGS "${MPI_CXX_COMPILE_FLAGS}")
8080
target_link_libraries(GhostExchange PRIVATE "${MPI_CXX_LIBRARIES};-L$ENV{ROCM_PATH}/lib -lroctx64")
8181

MPI-examples/README.md

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33

44
README.md from `HPCTrainingExamples/MPI-examples` from the Training Examples repository.
55

6+
> [!NOTE]
7+
> If you are on **AAC7**, replace the generic `module load openmpi rocm amdclang` steps with these modules to follow the rest of the exercise:
8+
> ```
9+
> module unload openmpi rocm
10+
> module load rocm-therock/23.1.0 openmpi/5.0.10-ucc1.6.0-ucx1.19.1-xpmem-2.7.4-rocm-therock-23.1.0
11+
> export CC=$(which amdclang); export CXX=$(which amdclang++); export FC=$(which amdflang)
12+
> ```
13+
614
## Point-to-point and collective
715
816
**NOTE**: these exercises have been tested on MI210 and MI300A accelerators using a container environment.
@@ -11,7 +19,7 @@ To see details on the container environment (such as operating system and module
1119
Allocate at least two GPUs and set up your environment
1220
1321
```
14-
module load openmpi rocm
22+
module load rocm openmpi # For modules on AAC7, see note at beginning
1523
export OMPI_CXX=hipcc
1624
```
1725
@@ -27,47 +35,41 @@ mpirun -n 2 -mca pml ucx ./pt2pt
2735
2836
## OSU Benchmark
2937
30-
Get the OSU micro-benchmark tarball and extract it
38+
Setup your module environment
3139
```
32-
mkdir OMB
33-
cd OMB
34-
wget https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.3.tar.gz
35-
tar -xvf osu-micro-benchmarks-7.3.tar.gz
40+
module load rocm openmpi # For modules on AAC7, see note at beginning
3641
```
37-
38-
Create a build directory and cd to osu-micro-benchmarks-7.3
42+
Then, get the OSU micro-benchmark tarball and extract it
3943
```
40-
mkdir build
44+
mkdir OMB
45+
cd OMB
46+
wget https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz
47+
tar -xvf osu-micro-benchmarks-7.5.tar.gz
4148
cd osu-micro-benchmarks-7.3
42-
module load rocm openmpi
4349
```
4450
45-
Build and install OSU micro-benchmarks
51+
Build and install the OSU micro-benchmarks
4652
```
53+
mkdir -p ../build
54+
# Fix teardown race: hipDeviceReset() after MPI_Finalize()
55+
sed -i 's/hipDeviceReset/hipDeviceSynchronize/g' c/util/osu_util_mpi.c
4756
./configure --prefix=`pwd`/../build/ \
4857
CC=`which mpicc` \
4958
CXX=`which mpicxx` \
5059
CPPFLAGS=-D__HIP_PLATFORM_AMD__=1 \
5160
--enable-rocm \
5261
--with-rocm=${ROCM_PATH}
53-
make -j12
62+
make
5463
make install
5564
```
56-
If you get the error "cannot include hip/hip_runtime_api.h", grep for __HIP_PLATFORM_HCC__ and replace it with __HIP_PLATFORM_AMD__ in configure.ac and configure files.
5765
58-
Check if osu microbenchmark is actually built
66+
Check if the OSU micro-benchmarks are actually built
5967
```
6068
ls -l ../build/libexec/osu-micro-benchmarks/mpi/
61-
6269
```
63-
if you see files collective, one-sided, pt2pt, and startup, your build is successful.
70+
if you see folders `collective/`, `one-sided/`, `pt2pt/`, and `startup/`, your build is successful.
6471
65-
Allocate 2 GPUs, and make those visible
66-
```
67-
export HIP_VISIBLE_DEVICES=0,1
68-
```
69-
70-
Make sure GPU-Aware communication is enabled and run the benchmark
72+
Allocate 2 GPUs, make sure GPU-Aware communication is enabled and run the benchmark
7173
```
7274
mpirun -n 2 -mca pml ucx ../build/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bw \
7375
-m $((16*1024*1024)) D D
@@ -85,12 +87,12 @@ The Ghost Exchange example is a simplified instance of what we believe a real sc
8587
8688
For what follows, we focus on the 2D [OpenMP version set](https://github.com/amd/HPCTrainingExamples/tree/main/MPI-examples/GhostExchange/GhostExchange_ArrayAssign), which begins with a CPU only version that can be compiled and run as below:
8789
```
88-
module load amdclang openmpi
90+
module load rocm openmpi amdclang # For modules on AAC7, see note at beginning
8991
git clone https://github.com/amd/HPCTrainingExamples.git
9092
cd HPCTrainingExamples/MPI-examples/GhostExchange/GhostExchange_ArrayAssign/Orig
9193
mkdir build && cd build
9294
cmake ..
93-
make -j
95+
make
9496
mpirun -n 8 --mca pml ucx ./GhostExchange -x 4 -y 2 -i 20000 -j 20000 -h 2 -t -c -I 1000
9597
```
9698
We can improve this performance by using process placement so that we are using all the memory
@@ -174,8 +176,7 @@ Two more versions are available in the dedicated [directory](https://github.com/
174176
To run RCCL test, follow these steps:
175177
176178
```
177-
module load rocm
178-
module load openmpi
179+
module load rocm openmpi # For modules on AAC7, see note at beginning
179180
git clone https://github.com/ROCm/rccl-tests.git
180181
cd rccl-tests/
181182
make MPI=1 MPI_HOME=$MPI_PATH HIP_HOME=$ROCM_PATH

0 commit comments

Comments
 (0)