diff options
Diffstat (limited to 'release_docs/INSTALL_parallel')
-rw-r--r-- | release_docs/INSTALL_parallel | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/release_docs/INSTALL_parallel b/release_docs/INSTALL_parallel new file mode 100644 index 0000000..5a8b603 --- /dev/null +++ b/release_docs/INSTALL_parallel @@ -0,0 +1,382 @@ + Installation instructions for Parallel HDF5 + ------------------------------------------- + +0. Use Build Scripts +-------------------- +The HDF Group is accumulating build scripts to handle building parallel HDF5 +on various platforms (Cray, IBM, SGI, etc...). These scripts are being +maintained and updated continuously for current and future systems. The reader +is strongly encouraged to consult the repository at, + +https://github.com/HDFGroup/build_hdf5 + +for building parallel HDF5 on these system. All contributions, additions +and fixes to the repository are welcomed and encouraged. + + +1. Overview +----------- +This file contains instructions for the installation of parallel HDF5 (PHDF5). +It is assumed that you are familiar with the general installation steps as +described in the INSTALL file. Get familiar with that file before trying +the parallel HDF5 installation. + +The remaining of this section explains the requirements to run PHDF5. +Section 2 shows quick instructions for some well know systems. Section 3 +explains the details of the installation steps. Section 4 shows some details +of running the parallel test suites. + + +1.1. Requirements +----------------- +PHDF5 requires an MPI compiler with MPI-IO support and a POSIX compliant +(Ref. 1) parallel file system. If you don't know yet, you should first consult +with your system support staff of information how to compile an MPI program, +how to run an MPI application, and how to access the parallel file system. +There are sample MPI-IO C and Fortran programs in the appendix section of +"Sample programs". You can use them to run simple tests of your MPI compilers +and the parallel file system. + + +1.2. Further Help +----------------- +If you still have difficulties installing PHDF5 in your system, please send +mail to + help@hdfgroup.org + +In your mail, please include the output of "uname -a". If you have run the +"configure" command, attach the output of the command and the content of +the file "config.log". + + +2. Quick Instruction for known systems +-------------------------------------- +The following shows particular steps to run the parallel HDF5 configure for +a few machines we've tested. If your particular platform is not shown or +somehow the steps do not work for yours, please go to the next section for +more detailed explanations. + + +2.1. Know parallel compilers +---------------------------- +HDF5 knows several parallel compilers: mpicc, hcc, mpcc, mpcc_r. To build +parallel HDF5 with one of the above, just set CC as it and configure. + + $ CC=/usr/local/mpi/bin/mpicc ./configure --enable-parallel --prefix=<install-directory> + $ make # build the library + $ make check # verify the correctness + # Read the Details section about parallel tests. + $ make install + + +2.2. Linux 2.4 and greater +-------------------------- +Be sure that your installation of MPICH was configured with the following +configuration command-line option: + + -cflags="-D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64" + +This allows for >2GB sized files on Linux systems and is only available with +Linux kernels 2.4 and greater. + + +2.3. Hopper (Cray XE6) (for v1.8 and later) +------------------------- + +The following steps are for building HDF5 for the Hopper compute +nodes. They would probably work for other Cray systems but have +not been verified. + +Obtain a copy from the HDF ftp server: +http://www.hdfgroup.org/ftp/HDF5/current/src/ +(link might change, so always double check the HDF group website). + +$ wget http://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-x.x.x.tar.gz +unpack the tarball + +The entire build process should be done on a MOM node in an interactive allocation and on a file system accessible by all compute nodes. +Request an interactive allocation with qsub: +qsub -I -q debug -l mppwidth=8 + +- create a build directory build-hdf5: + mkdir build-hdf5; cd build-hdf5/ + +- configure HDF5: + RUNSERIAL="aprun -q -n 1" RUNPARALLEL="aprun -q -n 6" FC=ftn CC=cc /path/to/source/configure --enable-fortran --enable-parallel --disable-shared + + RUNSERIAL and RUNPARALLEL tells the library how it should launch programs that are part of the build procedure. + +- Compile HDF5: + gmake + +- Check HDF5 + gmake check + +- Install HDF5 + gmake install + +The build will be in build-hdf5/hdf5/ (or whatever you specify in --prefix). +To compile other HDF5 applications use the wrappers created by the build (build-hdf5/hdf5/bin/h5pcc or h5fc) + + +3. Detail explanation +--------------------- + +3.1. Installation steps (Uni/Multiple processes modes) +----------------------- +During the step of configure, you must be running in the uni-process mode. +If multiple processes are doing the configure simultaneously, they will +incur errors. + +In the build step (make), it depends on your make command whether it can +run correctly in multiple processes mode. If you are not sure, you should +try running it in uni-process mode. + +In the test step (make check), if your system can control number of processes +running in the MPI application, you can just use "make check". But if your +system (e.g., IBM SP) has a fixed number of processes for each batch run, +you need to do the serial tests by "make check-s", requesting 1 process and +then do the parallel tests by "make check-p", requesting n processes. + +Lastly, "make install" should be run in the uni-process mode. + + +3.2. Configure details +---------------------- +The HDF5 library can be configured to use MPI and MPI-IO for parallelism on +a distributed multi-processor system. The easiest way to do this is to have +a properly installed parallel compiler (e.g., MPICH's mpicc or IBM's mpcc_r) +and supply the compiler name as the value of the CC environment variable. +For examples, + + $ CC=mpcc_r ./configure --enable-parallel + $ CC=/usr/local/mpi/bin/mpicc ./configure --enable-parallel + +If a parallel library is being built then configure attempts to determine how +to run a parallel application on one processor and on many processors. If the +compiler is `mpicc' and the user hasn't specified values for RUNSERIAL and +RUNPARALLEL then configure chooses `mpiexec' from the same directory as `mpicc': + + RUNSERIAL: /usr/local/mpi/bin/mpiexec -np 1 + RUNPARALLEL: /usr/local/mpi/bin/mpiexec -np $${NPROCS:=6} + +The `$${NPROCS:=6}' will be substituted with the value of the NPROCS +environment variable at the time `make check' is run (or the value 6). + + +4. Parallel test suite +---------------------- +The testpar/ directory contains tests for Parallel HDF5 and MPI-IO. Here are +some notes about some of the tests. + +The t_mpi tests the basic functionalities of some MPI-IO features used by +Parallel HDF5. It usually exits with non-zero code if a required MPI-IO +feature does not succeed as expected. One exception is the testing of +accessing files larger than 2GB. If the underlying filesystem or if the +MPI-IO library fails to handle file sizes larger than 2GB, the test will +print informational messages stating the failure but will not exit with +non-zero code. Failure to support file size greater than 2GB is not a fatal +error for HDF5 because HDF5 can use other file-drivers such as families of +files to bypass the file size limit. + +The t_cache does many small sized I/O requests and may not run well in a +slow file system such as NFS disk. If it takes a long time to run it, try +set the environment variable $HDF5_PARAPREFIX to a file system more suitable +for MPI-IO requests before running t_cache. + +By default, the parallel tests use the current directory as the test directory. +This can be changed by the environment variable $HDF5_PARAPREFIX. For example, +if the tests should use directory /PFS/user/me, do + HDF5_PARAPREFIX=/PFS/user/me + export HDF5_PARAPREFIX + make check + +(In some batch job system, you many need to hardset HDF5_PARAPREFIX in the +shell initial files like .profile, .cshrc, etc.) + + +Reference +--------- +1. POSIX Compliant. A good explanation is by Donald Lewin, + After a write() to a regular file has successfully returned, any + successful read() from each byte position on the file that was modified + by that write() will return the date that was written by the write(). A + subsequent write() to the same byte will overwrite the file data. If a + read() of a file data can be proven by any means [e.g., MPI_Barrier()] + to occur after a write() of that data, it must reflect that write(), + even if the calls are made by a different process. + Lewin, D. (1994). "POSIX Programmer's Guide (pg. 513-4)". O'Reilly + & Associates. + + +Appendix A. Sample programs +--------------------------- +Here are sample MPI-IO C and Fortran programs. You may use them to run simple +tests of your MPI compilers and the parallel file system. The MPI commands +used here are mpicc, mpif90 and mpiexec. Replace them with the commands of +your system. + +The programs assume they run in the parallel file system. Thus they create +the test data file in the current directory. If the parallel file system +is somewhere else, you need to run the sample programs there or edit the +programs to use a different file name. + +Example compiling and running: + +% mpicc Sample_mpio.c -o c.out +% mpiexec -np 4 c.out + +% mpif90 Sample_mpio.f90 -o f.out +% mpiexec -np 4 f.out + + +==> Sample_mpio.c <== +/* Simple MPI-IO program testing if a parallel file can be created. + * Default filename can be specified via first program argument. + * Each process writes something, then reads all data back. + */ + +#include <mpi.h> +#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ +# include <mpio.h> +#endif + +#define DIMSIZE 10 /* dimension size, avoid powers of 2. */ +#define PRINTID printf("Proc %d: ", mpi_rank) + +main(int ac, char **av) +{ + char hostname[128]; + int mpi_size, mpi_rank; + MPI_File fh; + char *filename = "./mpitest.data"; + char mpi_err_str[MPI_MAX_ERROR_STRING]; + int mpi_err_strlen; + int mpi_err; + char writedata[DIMSIZE], readdata[DIMSIZE]; + char expect_val; + int i, irank; + int nerrors = 0; /* number of errors */ + MPI_Offset mpi_off; + MPI_Status mpi_stat; + + MPI_Init(&ac, &av); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + /* get file name if provided */ + if (ac > 1){ + filename = *++av; + } + if (mpi_rank==0){ + printf("Testing simple MPIO program with %d processes accessing file %s\n", + mpi_size, filename); + printf(" (Filename can be specified via program argument)\n"); + } + + /* show the hostname so that we can tell where the processes are running */ + if (gethostname(hostname, 128) < 0){ + PRINTID; + printf("gethostname failed\n"); + return 1; + } + PRINTID; + printf("hostname=%s\n", hostname); + + if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename, + MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, + MPI_INFO_NULL, &fh)) + != MPI_SUCCESS){ + MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); + PRINTID; + printf("MPI_File_open failed (%s)\n", mpi_err_str); + return 1; + } + + /* each process writes some data */ + for (i=0; i < DIMSIZE; i++) + writedata[i] = mpi_rank*DIMSIZE + i; + mpi_off = mpi_rank*DIMSIZE; + if ((mpi_err = MPI_File_write_at(fh, mpi_off, writedata, DIMSIZE, MPI_BYTE, + &mpi_stat)) + != MPI_SUCCESS){ + MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); + PRINTID; + printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n", + (long) mpi_off, (int) DIMSIZE, mpi_err_str); + return 1; + }; + + /* make sure all processes has done writing. */ + MPI_Barrier(MPI_COMM_WORLD); + + /* each process reads all data and verify. */ + for (irank=0; irank < mpi_size; irank++){ + mpi_off = irank*DIMSIZE; + if ((mpi_err = MPI_File_read_at(fh, mpi_off, readdata, DIMSIZE, MPI_BYTE, + &mpi_stat)) + != MPI_SUCCESS){ + MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); + PRINTID; + printf("MPI_File_read_at offset(%ld), bytes (%d), failed (%s)\n", + (long) mpi_off, (int) DIMSIZE, mpi_err_str); + return 1; + }; + for (i=0; i < DIMSIZE; i++){ + expect_val = irank*DIMSIZE + i; + if (readdata[i] != expect_val){ + PRINTID; + printf("read data[%d:%d] got %d, expect %d\n", irank, i, + readdata[i], expect_val); + nerrors++; + } + } + } + if (nerrors) + return 1; + + MPI_File_close(&fh); + + PRINTID; + printf("all tests passed\n"); + + MPI_Finalize(); + return 0; +} + +==> Sample_mpio.f90 <== +! +! The following example demonstrates how to create and close a parallel +! file using MPI-IO calls. +! +! USE MPI is the proper way to bring in MPI definitions but many +! MPI Fortran compiler supports the pseudo standard of INCLUDE. +! So, HDF5 uses the INCLUDE statement instead. +! + + PROGRAM MPIOEXAMPLE + + ! USE MPI + + IMPLICIT NONE + + INCLUDE 'mpif.h' + + CHARACTER(LEN=80), PARAMETER :: filename = "filef.h5" ! File name + INTEGER :: ierror ! Error flag + INTEGER :: fh ! File handle + INTEGER :: amode ! File access mode + + call MPI_INIT(ierror) + amode = MPI_MODE_RDWR + MPI_MODE_CREATE + MPI_MODE_DELETE_ON_CLOSE + call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, fh, ierror) + print *, "Trying to create ", filename + if ( ierror .eq. MPI_SUCCESS ) then + print *, "MPI_FILE_OPEN succeeded" + call MPI_FILE_CLOSE(fh, ierror) + else + print *, "MPI_FILE_OPEN failed" + endif + + call MPI_FINALIZE(ierror); + END PROGRAM |