From 6b28adf8bc5a86da3257dfdaedce030bdb8d013b Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 12 Jan 2024 18:56:49 +0000 Subject: [PATCH 01/15] update to 4.0.0 release Signed-off-by: Dalton Bohning --- ...4d536643475269d37211e283b49ebd6732d7.patch | 19644 ---------------- Makefile | 1 - ior.spec | 11 +- 3 files changed, 6 insertions(+), 19650 deletions(-) delete mode 100644 3.3.0..d3574d536643475269d37211e283b49ebd6732d7.patch diff --git a/3.3.0..d3574d536643475269d37211e283b49ebd6732d7.patch b/3.3.0..d3574d536643475269d37211e283b49ebd6732d7.patch deleted file mode 100644 index 418dbeb..0000000 --- a/3.3.0..d3574d536643475269d37211e283b49ebd6732d7.patch +++ /dev/null @@ -1,19644 +0,0 @@ -diff --git a/META b/META -index 0ba3315..4e3ed35 100755 ---- a/META -+++ b/META -@@ -1,3 +1,3 @@ - Package: ior --Version: 3.3.0 -+Version: 3.4.0+dev - Release: 0 -diff --git a/Makefile.am b/Makefile.am -index d874a90..d6465a8 100755 ---- a/Makefile.am -+++ b/Makefile.am -@@ -10,4 +10,5 @@ ACLOCAL_AMFLAGS = -I config - # `make dist` and `make test` for simple test binaries that do not require any - # special environment. - #TESTS = testing/basic-tests.sh --#DISTCLEANFILES = -r test test_out -+ -+DISTCLEANFILES = ./src/build.conf -diff --git a/NEWS b/NEWS -index 3e370d8..00b98ff 100644 ---- a/NEWS -+++ b/NEWS -@@ -1,3 +1,13 @@ -+Version 3.4.0+dev -+-------------------------------------------------------------------------------- -+ -+New major features: -+ -+New minor features: -+ -+Bugfixes: -+ -+ - Version 3.3.0 - -------------------------------------------------------------------------------- - -@@ -101,7 +111,7 @@ Known issues: - because `-u`/`-c`/`-p` cannot be specified (issue #98) - - `writeCheck` cannot be enabled for write-only tests using some AIORIs such as - MPI-IO (pull request #89) -- -+ - - Version 3.0.2 - -------------------------------------------------------------------------------- -@@ -129,7 +139,7 @@ Version 2.10.3 - Contributed by demyn@users.sourceforge.net - - Ported to Windows. Required changes related to 'long' types, which on Windows - are always 32-bits, even on 64-bit systems. Missing system headers and -- functions acount for most of the remaining changes. -+ functions account for most of the remaining changes. - New files for Windows: - - IOR/ior.vcproj - Visual C project file - - IOR/src/C/win/getopt.{h,c} - GNU getopt() support -@@ -189,7 +199,7 @@ Version 2.9.5 - - Added notification for "Using reorderTasks '-C' (expecting block, not cyclic, - task assignment)" - - Corrected bug with read performance with stonewalling (was using full size, -- stat'ed file instead of bytes transfered). -+ stat'ed file instead of bytes transferred). - - Version 2.9.4 - -------------------------------------------------------------------------------- -diff --git a/README.md b/README.md -index c1c73a8..081752b 100755 ---- a/README.md -+++ b/README.md -@@ -1,8 +1,8 @@ --# HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=master)](https://travis-ci.org/hpc/ior) -+# HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=main)](https://travis-ci.org/hpc/ior) - - This repository contains the IOR and mdtest parallel I/O benchmarks. The --[official IOR/mdtest documention][] can be found in the `docs/` subdirectory or --on Read the Docs. -+[official IOR/mdtest documentation][] can be found in the `docs/` subdirectory -+or on Read the Docs. - - ## Building - -@@ -28,4 +28,4 @@ on Read the Docs. - distributions at once. - - [official IOR release]: https://github.com/hpc/ior/releases --[official IOR/mdtest documention]: http://ior.readthedocs.org/ -+[official IOR/mdtest documentation]: http://ior.readthedocs.org/ -diff --git a/configure.ac b/configure.ac -index b34f819..6cd1d7b 100755 ---- a/configure.ac -+++ b/configure.ac -@@ -70,9 +70,57 @@ AS_IF([test "$ac_cv_header_gpfs_h" = "yes" -o "$ac_cv_header_gpfs_fcntl_h" = "ye - AC_SEARCH_LIBS([gpfs_fcntl], [gpfs], [], - [AC_MSG_ERROR([Library containing gpfs_fcntl symbols not found]) - ]) -+ AC_CHECK_TYPES([gpfsFineGrainWriteSharing_t], [], [], [[#include ]]) - ]) - ]) - -+# Check for CUDA -+AC_ARG_WITH([cuda], -+ [AS_HELP_STRING([--with-cuda], -+ [support configurable CUDA @<:@default=check@:>@])], -+ [], [with_cuda=check]) -+ -+AS_IF([test "x$with_cuda" != xno], [ -+ LDFLAGS="$LDFLAGS -L$with_cuda/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_cuda/lib64" -+ CPPFLAGS="$CPPFLAGS -I$with_cuda/include" -+ -+ AC_CHECK_HEADERS([cuda_runtime.h], [AC_DEFINE([HAVE_CUDA], [], [CUDA GPU API found])], [ -+ if test "x$with_cuda" != xcheck; then -+ AC_MSG_FAILURE([--with-cuda was given, not found]) -+ fi -+ ]) -+AS_IF([test "$ac_cv_header_cuda_runtime_h" = "yes"], [ -+ AC_SEARCH_LIBS([cudaMalloc], [cudart cudart_static], [], -+ [AC_MSG_ERROR([Library containing cudaMalloc symbol not found])]) -+ ]) -+]) -+AM_CONDITIONAL([HAVE_CUDA], [test x$with_cuda = xyes]) -+AM_COND_IF([HAVE_CUDA],[AC_DEFINE([HAVE_CUDA], [], [CUDA GPU API found])]) -+ -+# Check for GPUDirect -+AC_ARG_WITH([gpuDirect], -+ [AS_HELP_STRING([--with-gpuDirect], -+ [support configurable GPUDirect @<:@default=check@:>@])], -+ [], [with_gpuDirect=check]) -+ -+AS_IF([test "x$with_gpuDirect" != xno], [ -+ LDFLAGS="$LDFLAGS -L$with_gpuDirect/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_gpuDirect/lib64" -+ CPPFLAGS="$CPPFLAGS -I$with_gpuDirect/include" -+ -+ AC_CHECK_HEADERS([cufile.h], [AC_DEFINE([HAVE_GPU_DIRECT], [], [GPUDirect API found])], [ -+ if test "x$with_gpuDirect" != xcheck; then -+ AC_MSG_FAILURE([--with-gpuDirect was given, not found]) -+ fi -+ ]) -+AS_IF([test "$ac_cv_header_cufile_h" = "yes"], [ -+ AC_SEARCH_LIBS([cuFileDriverOpen], [cufile], [], -+ [AC_MSG_ERROR([Library containing cuFileDriverOpen symbol not found])]) -+ ]) -+]) -+AM_CONDITIONAL([HAVE_GPU_DIRECT], [test x$with_gpuDirect = xyes]) -+AM_COND_IF([HAVE_GPU_DIRECT],[AC_DEFINE([HAVE_GPU_DIRECT], [], [GPUDirect API found])]) -+ -+ - # Check for system capabilities - AC_SYS_LARGEFILE - -@@ -84,7 +132,8 @@ AC_ARG_WITH([lustre], - [support configurable Lustre striping values @<:@default=check@:>@])], - [], [with_lustre=check]) - AS_IF([test "x$with_lustre" = xyes ], [ -- AC_CHECK_HEADERS([linux/lustre/lustre_user.h lustre/lustre_user.h], break, [ -+ AC_CHECK_HEADERS([linux/lustre/lustre_user.h lustre/lustre_user.h], -+ [AC_DEFINE([HAVE_LUSTRE_USER], [], [Lustre user API available in some shape or form])], [ - if test "x$with_lustre" != xcheck -a \ - "x$ac_cv_header_linux_lustre_lustre_user_h" = "xno" -a \ - "x$ac_cv_header_lustre_lustre_user_h" = "xno" ; then -@@ -160,8 +209,10 @@ AC_ARG_WITH([ncmpi], - [], - [with_ncmpi=no]) - AM_CONDITIONAL([USE_NCMPI_AIORI], [test x$with_ncmpi = xyes]) --AM_COND_IF([USE_NCMPI_AIORI],[ -- AC_DEFINE([USE_NCMPI_AIORI], [], [Build NCMPI backend AIORI]) -+AS_IF([test "x$with_ncmpi" = xyes ], [ -+ AC_CHECK_HEADERS([pnetcdf.h], [AC_DEFINE([USE_NCMPI_AIORI], [], [PNetCDF available])], [ -+ AC_MSG_FAILURE([--with-ncmpi was given but pnetcdf.h not found]) -+ ]) - ]) - - # MMAP IO support -@@ -186,6 +237,34 @@ AM_COND_IF([USE_POSIX_AIORI],[ - AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI]) - ]) - -+# PMDK IO support -+AC_ARG_WITH([pmdk], -+ [AS_HELP_STRING([--with-pmdk], -+ [support IO with PMDK backend @<:@default=no@:>@])], -+ [], -+ [with_pmdk=no]) -+AM_CONDITIONAL([USE_PMDK_AIORI], [test x$with_pmdk = xyes]) -+AS_IF([test "x$with_pmdk" != xno], [ -+ AC_DEFINE([USE_PMDK_AIORI], [], [Build PMDK backend AIORI]) -+ AC_CHECK_HEADERS(libpmem.h,, [unset PMDK]) -+ AC_SEARCH_LIBS([pmem_map_file], [pmdk], -+ [AC_MSG_ERROR([Library containing pmdk symbols not found])]) -+]) -+ -+# LINUX AIO support -+AC_ARG_WITH([aio], -+ [AS_HELP_STRING([--with-aio], -+ [support Linux AIO @<:@default=no@:>@])], -+ [], -+ [with_aio=no]) -+AM_CONDITIONAL([USE_AIO_AIORI], [test x$with_aio = xyes]) -+AS_IF([test "x$with_aio" != xno], [ -+ AC_DEFINE([USE_AIO_AIORI], [], [Build AIO backend]) -+ AC_CHECK_HEADERS(libaio.h,, [unset AIO]) -+ AC_SEARCH_LIBS([aio], [io_setup], [AC_MSG_ERROR([Library containing AIO symbol io_setup not found])]) -+]) -+ -+ - # RADOS support - AC_ARG_WITH([rados], - [AS_HELP_STRING([--with-rados], -@@ -211,40 +290,25 @@ AM_COND_IF([USE_CEPHFS_AIORI],[ - AC_DEFINE([USE_CEPHFS_AIORI], [], [Build CEPHFS backend AIORI]) - ]) - --# DAOS Backends (DAOS and DFS) IO support require DAOS and CART/GURT --AC_ARG_WITH([cart], -- [AS_HELP_STRING([--with-cart], -- [support IO with DAOS backends @<:@default=no@:>@])], -- [], [with_cart=no]) -- --AS_IF([test "x$with_cart" != xno], [ -- CART="yes" -- LDFLAGS="$LDFLAGS -L$with_cart/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_cart/lib64" -- LDFLAGS="$LDFLAGS -L$with_cart/lib -Wl,--enable-new-dtags -Wl,-rpath=$with_cart/lib" -- CPPFLAGS="$CPPFLAGS -I$with_cart/include/" -- AC_CHECK_HEADERS(gurt/common.h,, [unset CART]) -- AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset CART]) --]) -- -+# DAOS-FS Backend (DFS) - AC_ARG_WITH([daos], - [AS_HELP_STRING([--with-daos], -- [support IO with DAOS backends @<:@default=no@:>@])], -+ [support IO with DAOS backend @<:@default=no@:>@])], - [], [with_daos=no]) -- - AS_IF([test "x$with_daos" != xno], [ - DAOS="yes" - LDFLAGS="$LDFLAGS -L$with_daos/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_daos/lib64" - CPPFLAGS="$CPPFLAGS -I$with_daos/include" -- AC_CHECK_HEADERS(daos_types.h,, [unset DAOS]) -+ AC_CHECK_HEADERS(gurt/common.h,, [unset DAOS]) -+ AC_CHECK_HEADERS(daos.h,, [unset DAOS]) -+ AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset DAOS]) - AC_CHECK_LIB([uuid], [uuid_generate],, [unset DAOS]) -- AC_CHECK_LIB([daos_common], [daos_sgl_init],, [unset DAOS]) - AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) - AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS]) - ]) -- - AM_CONDITIONAL([USE_DAOS_AIORI], [test x$DAOS = xyes]) - AM_COND_IF([USE_DAOS_AIORI],[ -- AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backends AIORI]) -+ AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS-FS backend AIORI]) - ]) - - # Gfarm support -@@ -293,19 +357,54 @@ AM_COND_IF([AWS4C_DIR],[ - ]) - - -+ -+# Amazon S3 support using the libs3 API -+AC_ARG_WITH([S3-libs3], -+ [AS_HELP_STRING([--with-S3-libs3], -+ [support IO with Amazon libS3 @<:@default=no@:>@])], -+ [], -+ [with_S3_libs3=no]) -+AM_CONDITIONAL([USE_S3_LIBS3_AIORI], [test x$with_S3_libs3 = xyes]) -+AM_COND_IF([USE_S3_LIBS3_AIORI],[ -+ AC_DEFINE([USE_S3_LIBS3_AIORI], [], [Build Amazon-S3 backend AIORI using libs3]) -+]) -+ -+err=0 -+AS_IF([test "x$with_S3_libs3" != xno], [ -+ AC_MSG_NOTICE([beginning of S3-related checks]) -+ ORIG_CPPFLAGS=$CPPFLAGS -+ ORIG_LDFLAGS=$LDFLAGS -+ -+ AC_CHECK_HEADERS([libs3.h], [], [err=1]) -+ -+ # Autotools thinks searching for a library means I want it added to LIBS -+ ORIG_LIBS=$LIBS -+ AC_CHECK_LIB([s3], [S3_initialize], [], [err=1]) -+ LIBS=$ORIG_LIBS -+ -+ AC_MSG_NOTICE([end of S3-related checks]) -+ if test "$err" == 1; then -+ AC_MSG_FAILURE([S3 support is missing. dnl Make sure you have access to libs3. dnl]) -+ fi -+ -+ # restore user's values -+ CPPFLAGS=$ORIG_CPPFLAGS -+ LDFLAGS=$ORIG_LDFLAGS -+]) -+ - # Amazon S3 support [see also: --with-aws4c] --AC_ARG_WITH([S3], -- [AS_HELP_STRING([--with-S3], -+AC_ARG_WITH([S3-4c], -+ [AS_HELP_STRING([--with-S3-4c], - [support IO with Amazon S3 backend @<:@default=no@:>@])], - [], -- [with_S3=no]) --AM_CONDITIONAL([USE_S3_AIORI], [test x$with_S3 = xyes]) --AM_COND_IF([USE_S3_AIORI],[ -- AC_DEFINE([USE_S3_AIORI], [], [Build Amazon-S3 backend AIORI]) -+ [with_S3_4c=no]) -+AM_CONDITIONAL([USE_S3_4C_AIORI], [test x$with_S3_4c = xyes]) -+AM_COND_IF([USE_S3_4C_AIORI],[ -+ AC_DEFINE([USE_S3_4C_AIORI], [], [Build Amazon-S3 backend AIORI using lib4c]) - ]) - - err=0 --AS_IF([test "x$with_S3" != xno], [ -+AS_IF([test "x$with_S3_4c" != xno], [ - AC_MSG_NOTICE([beginning of S3-related checks]) - - # save user's values, while we use AC_CHECK_HEADERS with $AWS4C_DIR -@@ -337,6 +436,30 @@ Consider --with-aws4c=, CPPFLAGS, LDFLAGS, etc]) - LDFLAGS=$ORIG_LDFLAGS - ]) - -+# Check for existence of the function to detect the CPU socket ID (for multi-socket systems) -+AC_COMPILE_IFELSE( -+ [AC_LANG_SOURCE([[ -+ int main(){ -+ unsigned long a,d,c; -+ __asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); -+ return 0; -+ } -+ ]])], -+ AC_DEFINE([HAVE_RDTSCP_ASM], [], [Has ASM to detect CPU socket ID])) -+ -+AC_COMPILE_IFELSE( -+ [AC_LANG_SOURCE([[ -+ #define _GNU_SOURCE -+ #include -+ #include -+ unsigned long GetProcessorAndCore(int *chip, int *core){ -+ return syscall(SYS_getcpu, core, chip, NULL); -+ } -+ int main(){ -+ } -+ ]])], -+ AC_DEFINE([HAVE_GETCPU_SYSCALL], [], [Has syscall to detect CPU socket ID])) -+ - - # Enable building "IOR", in all capitals - AC_ARG_ENABLE([caps], -diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE -index 3d6b4e4..02f90cc 100755 ---- a/doc/USER_GUIDE -+++ b/doc/USER_GUIDE -@@ -47,7 +47,7 @@ Two ways to run IOR: - E.g., to execute: IOR -W -f script - This defaults all tests in 'script' to use write data checking. - -- * The Command line supports to specify additional parameters for the choosen API. -+ * The Command line supports to specify additional parameters for the chosen API. - For example, username and password for the storage. - Available options are listed in the help text after selecting the API when running with -h. - For example, 'IOR -a DUMMY -h' shows the supported options for the DUMMY backend. -@@ -89,7 +89,7 @@ These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'. - -n noFill -- no fill in HDF5 file creation - -N N numTasks -- number of tasks that should participate in the test - -o S testFile -- full name for test -- -O S string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32) -+ -O S string of IOR directives (e.g. -O checkRead=1,GPUid=2) - -p preallocate -- preallocate file size - -P useSharedFilePointer -- use shared file pointer [not working] - -q quitOnError -- during file error-checking, abort on error -@@ -164,7 +164,7 @@ GENERAL: - - * numTasks - number of tasks that should participate in the test - [0] -- NOTE: 0 denotes all tasks -+ NOTE: -1 denotes all tasks - - * interTestDelay - this is the time in seconds to delay before - beginning a write or read in a series of tests [0] -@@ -347,28 +347,54 @@ MPIIO-, HDF5-, AND NCMPI-ONLY: - - LUSTRE-SPECIFIC: - ================ -- * lustreStripeCount - set the lustre stripe count for the test file(s) [0] -+ * POSIX-ONLY: -+ * --posix.lustre.stripecount - set the Lustre stripe count for the test file(s) [0] - -- * lustreStripeSize - set the lustre stripe size for the test file(s) [0] -+ * --posix.lustre.stripesize - set the Lustre stripe size for the test file(s) [0] - -- * lustreStartOST - set the starting OST for the test file(s) [-1] -+ * --posix.lustre.startost - set the starting OST for the test file(s) [-1] - -- * lustreIgnoreLocks - disable lustre range locking [0] -+ * --posix.lustre.ignorelocks - disable Lustre range locking [0] - --GPFS-SPECIFIC: -+ * MPIIO-, HDF5-, AND NCMPI-ONLY: -+ * ROMIO-based IO (see https://github.com/pmodels/mpich/blob/048879f1234419abb035aacbaf655880c8f77dba/src/mpi/romio/adio/ad_lustre/ad_lustre_open.c#L58): -+ * requires setting the environment variable ROMIO_FSTYPE_FORCE=LUSTRE: (or similar for specific MPIs) to enable ROMIO's Lustre ADIO -+ -+ * IOR_HINT__MPI__striping_factor - set the Lustre stripe count for the test file(s) [-1] -+ -+ * IOR_HINT__MPI__striping_unit - set the Lustre stripe size for the test file(s) [0] -+ -+ * IOR_HINT__MPI__romio_lustre_start_iodevice - set the starting OST for the test file(s) [-1] -+ -+ * OMPIO-based IO (see https://github.com/open-mpi/ompi/blob/6d237e85d730ed946c9f45fcd3e19b78a243203e/ompi/mca/fs/lustre/fs_lustre_component.c#L75) -+ * not setting either of the environment variables below causes a fatal "Floating point exception: Integer divide-by-zero" error -+ -+ * execution with either of the environment variables causes this message "ior: setstripe error for 'testfile': stripe already set" which can safely be ignored as OMPIO tries to modify the stripe settings twice although the first time succeeds -+ -+ * OMPI_MCA_fs_lustre_stripe_width / IOR_HINT__MPI__stripe_width - set the Lustre stripe count for the test file(s) [0] -+ -+ * OMPI_MCA_fs_lustre_stripe_size / IOR_HINT__MPI__stripe_size - set the Lustre stripe size for the test file(s) [0] -+ -+GPFS-SPECIFIC (POSIX-ONLY): - ================ -- * gpfsHintAccess - use gpfs_fcntl hints to pre-declare accesses -+ * --posix.gpfs.hintaccess - use gpfs_fcntl hints to pre-declare accesses -+ -+ * --posix.gpfs.releasetoken - immediately after opening or creating file, release -+ all locks. Might help mitigate lock-revocation -+ traffic when many processes write/read to same file. -+ -+ * --posix.gpfs.finegrainwritesharing - This hint optimizes the performance of small strided -+ writes to a shared file from a parallel application - -- * gpfsReleaseToken - immediately after opening or creating file, release -- all locks. Might help mitigate lock-revocation -- traffic when many proceses write/read to same file. -+ * --posix.gpfs.finegrainreadsharing - This hint optimizes the performance of small strided -+ reads from a shared file from a parallel application - --BeeGFS-SPECIFIC (POSIX only): -+BeeGFS-SPECIFIC (POSIX-ONLY): - ================ -- * beegfsNumTargets - set the number of storage targets to use -+ * --posix.beegfs.NumTargets - set the number of storage targets to use - -- * beegfsChunkSize - set the striping chunk size. Must be a power of two, -- and greater than 64kiB, (e.g.: 256k, 1M, ...) -+ * --posix.beegfs.ChunkSize - set the striping chunk size. Must be a power of two, -+ and greater than 64kiB, (e.g.: 256k, 1M, ...) - - - *********************** -@@ -499,7 +525,7 @@ zip, gzip, and bzip. - - 3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed). - To avoid compression a transfer size of greater than the bzip block size is required -- (default = 900KB). I suggest a transfer size of greather than 1MB to avoid bzip2 compression. -+ (default = 900KB). I suggest a transfer size of greater than 1MB to avoid bzip2 compression. - - Be aware of the block size your compression algorithm will look at, and adjust the transfer size - accordingly. -@@ -660,7 +686,7 @@ HOW DO I USE HINTS? - 'setenv IOR_HINT__MPI__ ' - - --HOW DO I EXPLICITY SET THE FILE DATA SIGNATURE? -+HOW DO I EXPLICITLY SET THE FILE DATA SIGNATURE? - - The data signature for a transfer contains the MPI task number, transfer- - buffer offset, and also timestamp for the start of iteration. As IOR works -diff --git a/doc/mdtest.1 b/doc/mdtest.1 -index 3cfc082..27d4d7b 100644 ---- a/doc/mdtest.1 -+++ b/doc/mdtest.1 -@@ -28,7 +28,7 @@ Use ``collective creates'', meaning task 0 does all the creates. - Only perform the create phase of the tests. - .TP - .I "-d" testdir[@testdir2] --The directory in which the tests will run. For multiple pathes, must use fully-qualified pathnames. -+The directory in which the tests will run. For multiple paths, must use fully-qualified pathnames. - [default: working directory of mdtest]. - .TP - .I "-D" -@@ -78,6 +78,9 @@ Stride # between neighbor tasks for file/dir stat, 0 = local - .I "-p" seconds - Pre-iteration delay (in seconds). - .TP -+.I "-P" -+Print both the file creation rate and the elapsed time. -+.TP - .I "-r" - Only perform the remove phase of the tests. - .TP -@@ -121,6 +124,19 @@ Set verbosity value - Set the number of Bytes to write to each file after it is created - [default: 0]. - .TP -+.I "-W" seconds -+Specify the stonewall time in seconds. When the stonewall timer has elapsed, -+the rank with the highest number of creates sets -+.I number_of_items -+for the other ranks, so that all ranks create the same number of files. -+.TP -+.I "-x" filename -+Filename to use for stonewall synchronization between processes. -+.TP -+.I "Y" -+Call the sync command after each phase, which is included in the -+timing. Note that it causes all IO to be flushed from the nodes. -+.TP - .I "-z" tree_depth - The depth of the hierarchical directory tree [default: 0]. - .SH EXAMPLES -diff --git a/src/Makefile.am b/src/Makefile.am -index 3786560..037433c 100755 ---- a/src/Makefile.am -+++ b/src/Makefile.am -@@ -1,20 +1,25 @@ - SUBDIRS = . test - --bin_PROGRAMS = ior mdtest -+bin_PROGRAMS = ior mdtest md-workbench - if USE_CAPS --bin_PROGRAMS += IOR MDTEST -+bin_PROGRAMS += IOR MDTEST MD-WORKBENCH - endif - --noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h -+noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h aiori-debug.h aiori-POSIX.h md-workbench.h - - lib_LIBRARIES = libaiori.a --libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c -+libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c md-workbench.c - - extraSOURCES = aiori.c aiori-DUMMY.c - extraLDADD = - extraLDFLAGS = - extraCPPFLAGS = - -+md_workbench_SOURCES = md-workbench-main.c -+md_workbench_LDFLAGS = -+md_workbench_LDADD = libaiori.a -+md_workbench_CPPFLAGS = -+ - ior_SOURCES = ior-main.c - ior_LDFLAGS = - ior_LDADD = libaiori.a -@@ -36,6 +41,14 @@ extraLDFLAGS += -L/opt/hadoop-2.2.0/lib/native - extraLDADD += -lhdfs - endif - -+if HAVE_CUDA -+extraLDADD += -lcudart -+endif -+ -+if HAVE_GPU_DIRECT -+extraLDADD += -lcufile -+endif -+ - if USE_HDF5_AIORI - extraSOURCES += aiori-HDF5.c - extraLDADD += -lhdf5 -lz -@@ -65,6 +78,16 @@ if USE_POSIX_AIORI - extraSOURCES += aiori-POSIX.c - endif - -+if USE_AIO_AIORI -+extraSOURCES += aiori-aio.c -+extraLDADD += -laio -+endif -+ -+if USE_PMDK_AIORI -+extraSOURCES += aiori-PMDK.c -+extraLDADD += -lpmem -+endif -+ - if USE_RADOS_AIORI - extraSOURCES += aiori-RADOS.c - extraLDADD += -lrados -@@ -77,7 +100,8 @@ endif - - - if USE_DAOS_AIORI --extraSOURCES += aiori-DAOS.c aiori-DFS.c -+extraSOURCES += aiori-DFS.c -+extraLDADD += -lgurt -ldaos_common -ldaos -ldfs -luuid - endif - - if USE_GFARM_AIORI -@@ -85,8 +109,8 @@ extraSOURCES += aiori-Gfarm.c - extraLDADD += -lgfarm - endif - --if USE_S3_AIORI --extraSOURCES += aiori-S3.c -+if USE_S3_4C_AIORI -+extraSOURCES += aiori-S3-4c.c - if AWS4C_DIR - extraCPPFLAGS += $(AWS4C_CPPFLAGS) - extraLDFLAGS += $(AWS4C_LDFLAGS) -@@ -95,6 +119,12 @@ extraLDADD += -lcurl - extraLDADD += -lxml2 - extraLDADD += -laws4c - extraLDADD += -laws4c_extra -+extraLDADD += -lcrypto -+endif -+ -+if USE_S3_LIBS3_AIORI -+extraSOURCES += aiori-S3-libs3.c -+extraLDADD += -ls3 - endif - - if WITH_LUSTRE -@@ -111,6 +141,16 @@ mdtest_LDFLAGS += $(extraLDFLAGS) - mdtest_LDADD += $(extraLDADD) - mdtest_CPPFLAGS += $(extraCPPFLAGS) - -+md_workbench_SOURCES += $(extraSOURCES) -+md_workbench_LDFLAGS += $(extraLDFLAGS) -+md_workbench_LDADD += $(extraLDADD) -+md_workbench_CPPFLAGS += $(extraCPPFLAGS) -+ -+MD_WORKBENCH_SOURCES = $(md_workbench_SOURCES) -+MD_WORKBENCH_LDFLAGS = $(md_workbench_LDFLAGS) -+MD_WORKBENCH_LDADD = $(md_workbench_LDADD) -+MD_WORKBENCH_CPPFLAGS = $(md_workbench_CPPFLAGS) -+ - IOR_SOURCES = $(ior_SOURCES) - IOR_LDFLAGS = $(ior_LDFLAGS) - IOR_LDADD = $(ior_LDADD) -@@ -123,3 +163,10 @@ MDTEST_CPPFLAGS = $(mdtest_CPPFLAGS) - - libaiori_a_SOURCES += $(extraSOURCES) - libaiori_a_CPPFLAGS = $(extraCPPFLAGS) -+ -+# Generate a config file with the build flags to allow the reuse of library -+.PHONY: build.conf -+all-local: build.conf -+build.conf: -+ @echo LDFLAGS=$(LDFLAGS) $(extraLDFLAGS) $(extraLDADD) $(LIBS) > build.conf -+ @echo CFLAGS=$(CFLAGS) $(extraCPPFLAGS) >> build.conf -diff --git a/src/aiori-CEPHFS.c b/src/aiori-CEPHFS.c -index 27f12db..23cc56d 100755 ---- a/src/aiori-CEPHFS.c -+++ b/src/aiori-CEPHFS.c -@@ -18,6 +18,7 @@ - # include "config.h" - #endif - -+#include - #include - #include - #include -@@ -43,18 +44,24 @@ struct cephfs_options{ - char * user; - char * conf; - char * prefix; -+ char * remote_prefix; -+ int olazy; - }; - - static struct cephfs_options o = { - .user = NULL, - .conf = NULL, - .prefix = NULL, -+ .remote_prefix = NULL, -+ .olazy = 0, - }; - - static option_help options [] = { -- {0, "cephfs.user", "Username for the ceph cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.user}, -- {0, "cephfs.conf", "Config file for the ceph cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.conf}, -- {0, "cephfs.prefix", "mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, -+ {0, "cephfs.user", "Username for the ceph cluster", OPTION_OPTIONAL_ARGUMENT, 's', & o.user}, -+ {0, "cephfs.conf", "Config file for the ceph cluster", OPTION_OPTIONAL_ARGUMENT, 's', & o.conf}, -+ {0, "cephfs.prefix", "Mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, -+ {0, "cephfs.remote_prefix", "Remote mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.remote_prefix}, -+ {0, "cephfs.olazy", "Enable Lazy I/O", OPTION_FLAG, 'd', & o.olazy}, - LAST_OPTION - }; - -@@ -63,22 +70,25 @@ static struct ceph_mount_info *cmount; - /**************************** P R O T O T Y P E S *****************************/ - static void CEPHFS_Init(); - static void CEPHFS_Final(); --static void *CEPHFS_Create(char *, IOR_param_t *); --static void *CEPHFS_Open(char *, IOR_param_t *); --static IOR_offset_t CEPHFS_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void CEPHFS_Close(void *, IOR_param_t *); --static void CEPHFS_Delete(char *, IOR_param_t *); --static void CEPHFS_Fsync(void *, IOR_param_t *); --static IOR_offset_t CEPHFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); --static int CEPHFS_StatFS(const char *, ior_aiori_statfs_t *, IOR_param_t *); --static int CEPHFS_MkDir(const char *, mode_t, IOR_param_t *); --static int CEPHFS_RmDir(const char *, IOR_param_t *); --static int CEPHFS_Access(const char *, int, IOR_param_t *); --static int CEPHFS_Stat(const char *, struct stat *, IOR_param_t *); --static void CEPHFS_Sync(IOR_param_t *); -+void CEPHFS_xfer_hints(aiori_xfer_hint_t * params); -+static aiori_fd_t *CEPHFS_Create(char *path, int flags, aiori_mod_opt_t *options); -+static aiori_fd_t *CEPHFS_Open(char *path, int flags, aiori_mod_opt_t *options); -+static IOR_offset_t CEPHFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *options); -+static void CEPHFS_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void CEPHFS_Delete(char *path, aiori_mod_opt_t *); -+static void CEPHFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static IOR_offset_t CEPHFS_GetFileSize(aiori_mod_opt_t *, char *); -+static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat, aiori_mod_opt_t *options); -+static int CEPHFS_MkDir(const char *path, mode_t mode, aiori_mod_opt_t *options); -+static int CEPHFS_RmDir(const char *path, aiori_mod_opt_t *options); -+static int CEPHFS_Access(const char *path, int mode, aiori_mod_opt_t *options); -+static int CEPHFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t *options); -+static void CEPHFS_Sync(aiori_mod_opt_t *); - static option_help * CEPHFS_options(); - -+static aiori_xfer_hint_t * hints = NULL; -+ - /************************** D E C L A R A T I O N S ***************************/ - ior_aiori_t cephfs_aiori = { - .name = "CEPHFS", -@@ -90,7 +100,9 @@ ior_aiori_t cephfs_aiori = { - .xfer = CEPHFS_Xfer, - .close = CEPHFS_Close, - .delete = CEPHFS_Delete, -+ .get_options = CEPHFS_options, - .get_version = aiori_get_version, -+ .xfer_hints = CEPHFS_xfer_hints, - .fsync = CEPHFS_Fsync, - .get_file_size = CEPHFS_GetFileSize, - .statfs = CEPHFS_StatFS, -@@ -99,7 +111,6 @@ ior_aiori_t cephfs_aiori = { - .access = CEPHFS_Access, - .stat = CEPHFS_Stat, - .sync = CEPHFS_Sync, -- .get_options = CEPHFS_options, - }; - - #define CEPHFS_ERR(__err_str, __ret) do { \ -@@ -108,6 +119,12 @@ ior_aiori_t cephfs_aiori = { - } while(0) - - /***************************** F U N C T I O N S ******************************/ -+ -+void CEPHFS_xfer_hints(aiori_xfer_hint_t * params) -+{ -+ hints = params; -+} -+ - static const char* pfix(const char* path) { - const char* npath = path; - const char* prefix = o.prefix; -@@ -125,11 +142,16 @@ static option_help * CEPHFS_options(){ - - static void CEPHFS_Init() - { -+ char *remote_prefix = "/"; -+ - /* Short circuit if the options haven't been filled yet. */ - if (!o.user || !o.conf || !o.prefix) { - WARN("CEPHFS_Init() called before options have been populated!"); - return; - } -+ if (o.remote_prefix != NULL) { -+ remote_prefix = o.remote_prefix; -+ } - - /* Short circuit if the mount handle already exists */ - if (cmount) { -@@ -150,7 +172,7 @@ static void CEPHFS_Init() - } - - /* mount the handle */ -- ret = ceph_mount(cmount, "/"); -+ ret = ceph_mount(cmount, remote_prefix); - if (ret) { - CEPHFS_ERR("unable to mount cephfs", ret); - ceph_shutdown(cmount); -@@ -184,55 +206,61 @@ static void CEPHFS_Final() - cmount = NULL; - } - --static void *CEPHFS_Create(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *CEPHFS_Create(char *path, int flags, aiori_mod_opt_t *options) - { -- return CEPHFS_Open(testFileName, param); -+ return CEPHFS_Open(path, flags | IOR_CREAT, options); - } - --static void *CEPHFS_Open(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *CEPHFS_Open(char *path, int flags, aiori_mod_opt_t *options) - { -- const char *file = pfix(testFileName); -+ const char *file = pfix(path); - int* fd; - fd = (int *)malloc(sizeof(int)); - - mode_t mode = 0664; -- int flags = (int) 0; -+ int ceph_flags = (int) 0; - - /* set IOR file flags to CephFS flags */ - /* -- file open flags -- */ -- if (param->openFlags & IOR_RDONLY) { -- flags |= CEPH_O_RDONLY; -+ if (flags & IOR_RDONLY) { -+ ceph_flags |= CEPH_O_RDONLY; - } -- if (param->openFlags & IOR_WRONLY) { -- flags |= CEPH_O_WRONLY; -+ if (flags & IOR_WRONLY) { -+ ceph_flags |= CEPH_O_WRONLY; - } -- if (param->openFlags & IOR_RDWR) { -- flags |= CEPH_O_RDWR; -+ if (flags & IOR_RDWR) { -+ ceph_flags |= CEPH_O_RDWR; - } -- if (param->openFlags & IOR_APPEND) { -- fprintf(stdout, "File append not implemented in CephFS\n"); -+ if (flags & IOR_APPEND) { -+ CEPHFS_ERR("File append not implemented in CephFS", EINVAL); - } -- if (param->openFlags & IOR_CREAT) { -- flags |= CEPH_O_CREAT; -+ if (flags & IOR_CREAT) { -+ ceph_flags |= CEPH_O_CREAT; - } -- if (param->openFlags & IOR_EXCL) { -- flags |= CEPH_O_EXCL; -+ if (flags & IOR_EXCL) { -+ ceph_flags |= CEPH_O_EXCL; - } -- if (param->openFlags & IOR_TRUNC) { -- flags |= CEPH_O_TRUNC; -+ if (flags & IOR_TRUNC) { -+ ceph_flags |= CEPH_O_TRUNC; - } -- if (param->openFlags & IOR_DIRECT) { -- fprintf(stdout, "O_DIRECT not implemented in CephFS\n"); -+ if (flags & IOR_DIRECT) { -+ CEPHFS_ERR("O_DIRECT not implemented in CephFS", EINVAL); - } -- *fd = ceph_open(cmount, file, flags, mode); -+ *fd = ceph_open(cmount, file, ceph_flags, mode); - if (*fd < 0) { - CEPHFS_ERR("ceph_open failed", *fd); - } -+ if (o.olazy == TRUE) { -+ int ret = ceph_lazyio(cmount, *fd, 1); -+ if (ret != 0) { -+ WARN("Error enabling lazy mode"); -+ } -+ } - return (void *) fd; - } - --static IOR_offset_t CEPHFS_Xfer(int access, void *file, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) -+static IOR_offset_t CEPHFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *options) - { - uint64_t size = (uint64_t) length; - char *buf = (char *) buffer; -@@ -241,19 +269,19 @@ static IOR_offset_t CEPHFS_Xfer(int access, void *file, IOR_size_t * buffer, - - if (access == WRITE) - { -- ret = ceph_write(cmount, fd, buf, size, param->offset); -+ ret = ceph_write(cmount, fd, buf, size, offset); - if (ret < 0) { - CEPHFS_ERR("unable to write file to CephFS", ret); - } else if (ret < size) { - CEPHFS_ERR("short write to CephFS", ret); - } -- if (param->fsyncPerWrite == TRUE) { -- CEPHFS_Fsync(&fd, param); -+ if (hints->fsyncPerWrite == TRUE) { -+ CEPHFS_Fsync(file, options); - } - } - else /* READ */ - { -- ret = ceph_read(cmount, fd, buf, size, param->offset); -+ ret = ceph_read(cmount, fd, buf, size, offset); - if (ret < 0) { - CEPHFS_ERR("unable to read file from CephFS", ret); - } else if (ret < size) { -@@ -264,7 +292,7 @@ static IOR_offset_t CEPHFS_Xfer(int access, void *file, IOR_size_t * buffer, - return length; - } - --static void CEPHFS_Fsync(void *file, IOR_param_t * param) -+static void CEPHFS_Fsync(aiori_fd_t *file, aiori_mod_opt_t *options) - { - int fd = *(int *) file; - int ret = ceph_fsync(cmount, fd, 0); -@@ -273,7 +301,7 @@ static void CEPHFS_Fsync(void *file, IOR_param_t * param) - } - } - --static void CEPHFS_Close(void *file, IOR_param_t * param) -+static void CEPHFS_Close(aiori_fd_t *file, aiori_mod_opt_t *options) - { - int fd = *(int *) file; - int ret = ceph_close(cmount, fd); -@@ -284,28 +312,27 @@ static void CEPHFS_Close(void *file, IOR_param_t * param) - return; - } - --static void CEPHFS_Delete(char *testFileName, IOR_param_t * param) -+static void CEPHFS_Delete(char *path, aiori_mod_opt_t *options) - { -- int ret = ceph_unlink(cmount, pfix(testFileName)); -+ int ret = ceph_unlink(cmount, pfix(path)); - if (ret < 0) { - CEPHFS_ERR("ceph_unlink failed", ret); - } - return; - } - --static IOR_offset_t CEPHFS_GetFileSize(IOR_param_t * param, MPI_Comm testComm, -- char *testFileName) -+static IOR_offset_t CEPHFS_GetFileSize(aiori_mod_opt_t *options, char *path) - { - struct stat stat_buf; - IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; - -- int ret = ceph_stat(cmount, pfix(testFileName), &stat_buf); -+ int ret = ceph_stat(cmount, pfix(path), &stat_buf); - if (ret < 0) { - CEPHFS_ERR("ceph_stat failed", ret); - } - aggFileSizeFromStat = stat_buf.st_size; - -- if (param->filePerProc == TRUE) { -+ if (hints->filePerProc == TRUE) { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, testComm), - "cannot total data moved"); -@@ -327,11 +354,9 @@ static IOR_offset_t CEPHFS_GetFileSize(IOR_param_t * param, MPI_Comm testComm, - } - - return (aggFileSizeFromStat); -- - } - --static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, -- IOR_param_t *param) -+static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, aiori_mod_opt_t *options) - { - #if defined(HAVE_STATVFS) - struct statvfs statfs_buf; -@@ -354,28 +379,28 @@ static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, - #endif - } - --static int CEPHFS_MkDir(const char *path, mode_t mode, IOR_param_t *param) -+static int CEPHFS_MkDir(const char *path, mode_t mode, aiori_mod_opt_t *options) - { - return ceph_mkdir(cmount, pfix(path), mode); - } - --static int CEPHFS_RmDir(const char *path, IOR_param_t *param) -+static int CEPHFS_RmDir(const char *path, aiori_mod_opt_t *options) - { - return ceph_rmdir(cmount, pfix(path)); - } - --static int CEPHFS_Access(const char *testFileName, int mode, IOR_param_t *param) -+static int CEPHFS_Access(const char *path, int mode, aiori_mod_opt_t *options) - { - struct stat buf; -- return ceph_stat(cmount, pfix(testFileName), &buf); -+ return ceph_stat(cmount, pfix(path), &buf); - } - --static int CEPHFS_Stat(const char *testFileName, struct stat *buf, IOR_param_t *param) -+static int CEPHFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t *options) - { -- return ceph_stat(cmount, pfix(testFileName), buf); -+ return ceph_stat(cmount, pfix(path), buf); - } - --static void CEPHFS_Sync(IOR_param_t *param) -+static void CEPHFS_Sync(aiori_mod_opt_t *options) - { - int ret = ceph_sync_fs(cmount); - if (ret < 0) { -diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c -deleted file mode 100644 -index 8fa1578..0000000 ---- a/src/aiori-DAOS.c -+++ /dev/null -@@ -1,542 +0,0 @@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --/* -- * Copyright (C) 2018-2020 Intel Corporation -- * See the file COPYRIGHT for a complete copyright notice and license. -- */ -- --/* -- * This file implements the abstract I/O interface for DAOS Array API. -- */ -- --#define _BSD_SOURCE -- --#ifdef HAVE_CONFIG_H --#include "config.h" --#endif -- --#include --#include --#include --#include --#include --#include --#include -- --#include --#include -- --#include "ior.h" --#include "aiori.h" --#include "iordef.h" -- --/************************** O P T I O N S *****************************/ --struct daos_options{ -- char *pool; -- char *svcl; -- char *group; -- char *cont; -- int chunk_size; -- int destroy; -- char *oclass; --}; -- --static struct daos_options o = { -- .pool = NULL, -- .svcl = NULL, -- .group = NULL, -- .cont = NULL, -- .chunk_size = 1048576, -- .destroy = 0, -- .oclass = NULL, --}; -- --static option_help options [] = { -- {0, "daos.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o.pool}, -- {0, "daos.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', &o.svcl}, -- {0, "daos.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.group}, -- {0, "daos.cont", "container uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o.cont}, -- {0, "daos.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, -- {0, "daos.destroy", "Destroy Container", OPTION_FLAG, 'd', &o.destroy}, -- {0, "daos.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, -- LAST_OPTION --}; -- --/**************************** P R O T O T Y P E S *****************************/ -- --static void DAOS_Init(); --static void DAOS_Fini(); --static void *DAOS_Create(char *, IOR_param_t *); --static void *DAOS_Open(char *, IOR_param_t *); --static int DAOS_Access(const char *, int, IOR_param_t *); --static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void DAOS_Close(void *, IOR_param_t *); --static void DAOS_Delete(char *, IOR_param_t *); --static char* DAOS_GetVersion(); --static void DAOS_Fsync(void *, IOR_param_t *); --static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *); --static option_help * DAOS_options(); -- --/************************** D E C L A R A T I O N S ***************************/ -- --ior_aiori_t daos_aiori = { -- .name = "DAOS", -- .create = DAOS_Create, -- .open = DAOS_Open, -- .access = DAOS_Access, -- .xfer = DAOS_Xfer, -- .close = DAOS_Close, -- .delete = DAOS_Delete, -- .get_version = DAOS_GetVersion, -- .fsync = DAOS_Fsync, -- .get_file_size = DAOS_GetFileSize, -- .initialize = DAOS_Init, -- .finalize = DAOS_Fini, -- .get_options = DAOS_options, -- .statfs = aiori_posix_statfs, -- .mkdir = aiori_posix_mkdir, -- .rmdir = aiori_posix_rmdir, -- .stat = aiori_posix_stat, --}; -- --#define IOR_DAOS_MUR_SEED 0xDEAD10CC -- --enum handleType { -- POOL_HANDLE, -- CONT_HANDLE, -- ARRAY_HANDLE --}; -- --static daos_handle_t poh; --static daos_handle_t coh; --static daos_handle_t aoh; --static daos_oclass_id_t objectClass = OC_SX; --static bool daos_initialized = false; -- --/***************************** F U N C T I O N S ******************************/ -- --/* For DAOS methods. */ --#define DCHECK(rc, format, ...) \ --do { \ -- int _rc = (rc); \ -- \ -- if (_rc < 0) { \ -- fprintf(stderr, "ior ERROR (%s:%d): %d: %d: " \ -- format"\n", __FILE__, __LINE__, rank, _rc, \ -- ##__VA_ARGS__); \ -- fflush(stdout); \ -- MPI_Abort(MPI_COMM_WORLD, -1); \ -- } \ --} while (0) -- --#define INFO(level, format, ...) \ --do { \ -- if (verbose >= level) \ -- printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ --} while (0) -- --/* For generic errors like invalid command line options. */ --#define GERR(format, ...) \ --do { \ -- fprintf(stderr, format"\n", ##__VA_ARGS__); \ -- MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ --} while (0) -- --/* Distribute process 0's pool or container handle to others. */ --static void --HandleDistribute(daos_handle_t *handle, enum handleType type) --{ -- d_iov_t global; -- int rc; -- -- global.iov_buf = NULL; -- global.iov_buf_len = 0; -- global.iov_len = 0; -- -- if (rank == 0) { -- /* Get the global handle size. */ -- if (type == POOL_HANDLE) -- rc = daos_pool_local2global(*handle, &global); -- else if (type == CONT_HANDLE) -- rc = daos_cont_local2global(*handle, &global); -- else -- rc = daos_array_local2global(*handle, &global); -- DCHECK(rc, "Failed to get global handle size"); -- } -- -- MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, -- MPI_COMM_WORLD), -- "Failed to bcast global handle buffer size"); -- -- global.iov_len = global.iov_buf_len; -- global.iov_buf = malloc(global.iov_buf_len); -- if (global.iov_buf == NULL) -- ERR("Failed to allocate global handle buffer"); -- -- if (rank == 0) { -- if (type == POOL_HANDLE) -- rc = daos_pool_local2global(*handle, &global); -- else if (type == CONT_HANDLE) -- rc = daos_cont_local2global(*handle, &global); -- else -- rc = daos_array_local2global(*handle, &global); -- DCHECK(rc, "Failed to create global handle"); -- } -- -- MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, -- MPI_COMM_WORLD), -- "Failed to bcast global pool handle"); -- -- if (rank != 0) { -- if (type == POOL_HANDLE) -- rc = daos_pool_global2local(global, handle); -- else if (type == CONT_HANDLE) -- rc = daos_cont_global2local(poh, global, handle); -- else -- rc = daos_array_global2local(coh, global, 0, handle); -- DCHECK(rc, "Failed to get local handle"); -- } -- -- free(global.iov_buf); --} -- --static option_help * --DAOS_options() --{ -- return options; --} -- --static void --DAOS_Init() --{ -- int rc; -- -- if (daos_initialized) -- return; -- -- if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) -- return; -- -- if (o.oclass) { -- objectClass = daos_oclass_name2id(o.oclass); -- if (objectClass == OC_UNKNOWN) -- GERR("Invalid DAOS Object class %s\n", o.oclass); -- } -- -- rc = daos_init(); -- if (rc) -- DCHECK(rc, "Failed to initialize daos"); -- -- if (rank == 0) { -- uuid_t uuid; -- d_rank_list_t *svcl = NULL; -- static daos_pool_info_t po_info; -- static daos_cont_info_t co_info; -- -- INFO(VERBOSE_1, "Connecting to pool %s", o.pool); -- -- rc = uuid_parse(o.pool, uuid); -- DCHECK(rc, "Failed to parse 'pool': %s", o.pool); -- -- svcl = daos_rank_list_parse(o.svcl, ":"); -- if (svcl == NULL) -- ERR("Failed to allocate svcl"); -- -- rc = daos_pool_connect(uuid, o.group, svcl, DAOS_PC_RW, -- &poh, &po_info, NULL); -- d_rank_list_free(svcl); -- DCHECK(rc, "Failed to connect to pool %s", o.pool); -- -- INFO(VERBOSE_1, "Create/Open Container %s", o.cont); -- -- uuid_clear(uuid); -- rc = uuid_parse(o.cont, uuid); -- DCHECK(rc, "Failed to parse 'cont': %s", o.cont); -- -- rc = daos_cont_open(poh, uuid, DAOS_COO_RW, &coh, &co_info, -- NULL); -- /* If NOEXIST we create it */ -- if (rc == -DER_NONEXIST) { -- INFO(VERBOSE_2, "Creating DAOS Container...\n"); -- rc = daos_cont_create(poh, uuid, NULL, NULL); -- if (rc == 0) -- rc = daos_cont_open(poh, uuid, DAOS_COO_RW, -- &coh, &co_info, NULL); -- } -- DCHECK(rc, "Failed to create container"); -- } -- -- HandleDistribute(&poh, POOL_HANDLE); -- HandleDistribute(&coh, CONT_HANDLE); -- aoh.cookie = 0; -- -- daos_initialized = true; --} -- --static void --DAOS_Fini() --{ -- int rc; -- -- if (!daos_initialized) -- return; -- -- MPI_Barrier(MPI_COMM_WORLD); -- rc = daos_cont_close(coh, NULL); -- if (rc) { -- DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); -- MPI_Abort(MPI_COMM_WORLD, -1); -- } -- MPI_Barrier(MPI_COMM_WORLD); -- -- if (o.destroy) { -- if (rank == 0) { -- uuid_t uuid; -- double t1, t2; -- -- INFO(VERBOSE_1, "Destroying DAOS Container %s", o.cont); -- uuid_parse(o.cont, uuid); -- t1 = MPI_Wtime(); -- rc = daos_cont_destroy(poh, uuid, 1, NULL); -- t2 = MPI_Wtime(); -- if (rc == 0) -- INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); -- } -- -- MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); -- if (rc) { -- if (rank == 0) -- DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); -- MPI_Abort(MPI_COMM_WORLD, -1); -- } -- } -- -- if (rank == 0) -- INFO(VERBOSE_1, "Disconnecting from DAOS POOL.."); -- -- rc = daos_pool_disconnect(poh, NULL); -- DCHECK(rc, "Failed to disconnect from pool %s", o.pool); -- -- MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); -- if (rank == 0) -- INFO(VERBOSE_1, "Finalizing DAOS.."); -- -- rc = daos_fini(); -- DCHECK(rc, "Failed to finalize daos"); -- -- daos_initialized = false; --} -- --static void --gen_oid(const char *name, daos_obj_id_t *oid) --{ -- -- oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); -- oid->hi = 0; -- -- daos_array_generate_id(oid, objectClass, true, 0); --} -- --static void * --DAOS_Create(char *testFileName, IOR_param_t *param) --{ -- daos_obj_id_t oid; -- int rc; -- -- /** Convert file name into object ID */ -- gen_oid(testFileName, &oid); -- -- /** Create the array */ -- if (param->filePerProc || rank == 0) { -- rc = daos_array_create(coh, oid, DAOS_TX_NONE, 1, o.chunk_size, -- &aoh, NULL); -- DCHECK(rc, "Failed to create array object\n"); -- } -- -- /** Distribute the array handle if not FPP */ -- if (!param->filePerProc) -- HandleDistribute(&aoh, ARRAY_HANDLE); -- -- return &aoh; --} -- --static int --DAOS_Access(const char *testFileName, int mode, IOR_param_t * param) --{ -- daos_obj_id_t oid; -- daos_size_t cell_size, chunk_size; -- int rc; -- -- /** Convert file name into object ID */ -- gen_oid(testFileName, &oid); -- -- rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, -- &cell_size, &chunk_size, &aoh, NULL); -- if (rc) -- return rc; -- -- if (cell_size != 1) -- GERR("Invalid DAOS Array object.\n"); -- -- rc = daos_array_close(aoh, NULL); -- aoh.cookie = 0; -- return rc; --} -- --static void * --DAOS_Open(char *testFileName, IOR_param_t *param) --{ -- daos_obj_id_t oid; -- -- /** Convert file name into object ID */ -- gen_oid(testFileName, &oid); -- -- /** Open the array */ -- if (param->filePerProc || rank == 0) { -- daos_size_t cell_size, chunk_size; -- int rc; -- -- rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, -- &cell_size, &chunk_size, &aoh, NULL); -- DCHECK(rc, "Failed to create array object\n"); -- -- if (cell_size != 1) -- GERR("Invalid DAOS Array object.\n"); -- } -- -- /** Distribute the array handle if not FPP */ -- if (!param->filePerProc) -- HandleDistribute(&aoh, ARRAY_HANDLE); -- -- return &aoh; --} -- --static IOR_offset_t --DAOS_Xfer(int access, void *file, IOR_size_t *buffer, -- IOR_offset_t length, IOR_param_t *param) --{ -- daos_array_iod_t iod; -- daos_range_t rg; -- d_sg_list_t sgl; -- d_iov_t iov; -- int rc; -- -- /** set array location */ -- iod.arr_nr = 1; -- rg.rg_len = length; -- rg.rg_idx = param->offset; -- iod.arr_rgs = &rg; -- -- /** set memory location */ -- sgl.sg_nr = 1; -- d_iov_set(&iov, buffer, length); -- sgl.sg_iovs = &iov; -- -- if (access == WRITE) { -- rc = daos_array_write(aoh, DAOS_TX_NONE, &iod, &sgl, NULL); -- DCHECK(rc, "daos_array_write() failed (%d).", rc); -- } else { -- rc = daos_array_read(aoh, DAOS_TX_NONE, &iod, &sgl, NULL); -- DCHECK(rc, "daos_array_read() failed (%d).", rc); -- } -- -- return length; --} -- --static void --DAOS_Close(void *file, IOR_param_t *param) --{ -- int rc; -- -- if (!daos_initialized) -- GERR("DAOS is not initialized!"); -- -- rc = daos_array_close(aoh, NULL); -- DCHECK(rc, "daos_array_close() failed (%d).", rc); -- -- aoh.cookie = 0; --} -- --static void --DAOS_Delete(char *testFileName, IOR_param_t *param) --{ -- daos_obj_id_t oid; -- daos_size_t cell_size, chunk_size; -- int rc; -- -- if (!daos_initialized) -- GERR("DAOS is not initialized!"); -- -- /** Convert file name into object ID */ -- gen_oid(testFileName, &oid); -- -- /** open the array to verify it exists */ -- rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, -- &cell_size, &chunk_size, &aoh, NULL); -- DCHECK(rc, "daos_array_open() failed (%d).", rc); -- -- if (cell_size != 1) -- GERR("Invalid DAOS Array object.\n"); -- -- rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); -- DCHECK(rc, "daos_array_destroy() failed (%d).", rc); -- -- rc = daos_array_close(aoh, NULL); -- DCHECK(rc, "daos_array_close() failed (%d).", rc); -- aoh.cookie = 0; --} -- --static char * --DAOS_GetVersion() --{ -- static char ver[1024] = {}; -- -- sprintf(ver, "%s", "DAOS"); -- return ver; --} -- --static void --DAOS_Fsync(void *file, IOR_param_t *param) --{ -- return; --} -- --static IOR_offset_t --DAOS_GetFileSize(IOR_param_t *param, MPI_Comm testComm, char *testFileName) --{ -- daos_obj_id_t oid; -- daos_size_t size; -- int rc; -- -- if (!daos_initialized) -- GERR("DAOS is not initialized!"); -- -- /** Convert file name into object ID */ -- gen_oid(testFileName, &oid); -- -- /** open the array to verify it exists */ -- if (param->filePerProc || rank == 0) { -- daos_size_t cell_size, chunk_size; -- -- rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, -- &cell_size, &chunk_size, &aoh, NULL); -- DCHECK(rc, "daos_array_open() failed (%d).", rc); -- -- if (cell_size != 1) -- GERR("Invalid DAOS Array object.\n"); -- -- rc = daos_array_get_size(aoh, DAOS_TX_NONE, &size, NULL); -- DCHECK(rc, "daos_array_get_size() failed (%d).", rc); -- -- rc = daos_array_close(aoh, NULL); -- DCHECK(rc, "daos_array_close() failed (%d).", rc); -- aoh.cookie = 0; -- } -- -- if (!param->filePerProc) -- MPI_Bcast(&size, 1, MPI_LONG, 0, MPI_COMM_WORLD); -- -- return size; --} -diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c -index e7b1d6b..23741e1 100755 ---- a/src/aiori-DFS.c -+++ b/src/aiori-DFS.c -@@ -27,22 +27,30 @@ - #include - #include - -+#include - #include - #include - #include - #include - --#include "ior.h" --#include "iordef.h" - #include "aiori.h" - #include "utilities.h" -+#include "iordef.h" -+ -+#if defined(DAOS_API_VERSION_MAJOR) && defined(DAOS_API_VERSION_MINOR) -+#define CHECK_DAOS_API_VERSION(major, minor) \ -+ ((DAOS_API_VERSION_MAJOR > (major)) \ -+ || (DAOS_API_VERSION_MAJOR == (major) && DAOS_API_VERSION_MINOR >= (minor))) -+#else -+#define CHECK_DAOS_API_VERSION(major, minor) 0 -+#endif - --dfs_t *dfs; -+static dfs_t *dfs; - static daos_handle_t poh, coh; --static daos_oclass_id_t objectClass = OC_SX; --static daos_oclass_id_t dir_oclass = OC_SX; --static struct d_hash_table *dir_hash; --static bool dfs_init; -+static daos_oclass_id_t objectClass; -+static daos_oclass_id_t dir_oclass; -+static struct d_hash_table *aiori_dfs_hash = NULL; -+static int dfs_init_count; - - struct aiori_dir_hdl { - d_list_t entry; -@@ -57,9 +65,8 @@ enum handleType { - }; - - /************************** O P T I O N S *****************************/ --struct dfs_options{ -+typedef struct { - char *pool; -- char *svcl; - char *group; - char *cont; - int chunk_size; -@@ -67,57 +74,67 @@ struct dfs_options{ - char *dir_oclass; - char *prefix; - int destroy; --}; -+} DFS_options_t; - --static struct dfs_options o = { -- .pool = NULL, -- .svcl = NULL, -- .group = NULL, -- .cont = NULL, -- .chunk_size = 1048576, -- .oclass = NULL, -- .dir_oclass = NULL, -- .prefix = NULL, -- .destroy = 0, --}; -+static option_help * DFS_options(aiori_mod_opt_t ** init_backend_options, -+ aiori_mod_opt_t * init_values){ -+ DFS_options_t * o = malloc(sizeof(DFS_options_t)); - --static option_help options [] = { -- {0, "dfs.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', & o.pool}, -- {0, "dfs.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', & o.svcl}, -- {0, "dfs.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group}, -- {0, "dfs.cont", "DFS container uuid", OPTION_OPTIONAL_ARGUMENT, 's', & o.cont}, -- {0, "dfs.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, -- {0, "dfs.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, -- {0, "dfs.dir_oclass", "directory object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.dir_oclass}, -- {0, "dfs.prefix", "mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, -- {0, "dfs.destroy", "Destroy DFS Container", OPTION_FLAG, 'd', &o.destroy}, -- LAST_OPTION --}; -+ if (init_values != NULL) { -+ memcpy(o, init_values, sizeof(DFS_options_t)); -+ } else { -+ memset(o, 0, sizeof(DFS_options_t)); -+ } -+ -+ *init_backend_options = (aiori_mod_opt_t *) o; -+ -+ option_help h [] = { -+ {0, "dfs.pool", "Pool label or uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o->pool}, -+ {0, "dfs.group", "DAOS system name", OPTION_OPTIONAL_ARGUMENT, 's', &o->group}, -+ {0, "dfs.cont", "Container label or uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o->cont}, -+ {0, "dfs.chunk_size", "File chunk size in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', &o->chunk_size}, -+ {0, "dfs.oclass", "File object class", OPTION_OPTIONAL_ARGUMENT, 's', &o->oclass}, -+ {0, "dfs.dir_oclass", "Directory object class", OPTION_OPTIONAL_ARGUMENT, 's', -+ &o->dir_oclass}, -+ {0, "dfs.prefix", "Mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', &o->prefix}, -+ {0, "dfs.destroy", "Destroy DFS container on finalize", OPTION_FLAG, 'd', &o->destroy}, -+ LAST_OPTION -+ }; -+ -+ option_help * help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); -+ return help; -+} - - /**************************** P R O T O T Y P E S *****************************/ --static void *DFS_Create(char *, IOR_param_t *); --static void *DFS_Open(char *, IOR_param_t *); --static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void DFS_Close(void *, IOR_param_t *); --static void DFS_Delete(char *, IOR_param_t *); -+static void DFS_Init(aiori_mod_opt_t *); -+static void DFS_Finalize(aiori_mod_opt_t *); -+static aiori_fd_t *DFS_Create(char *, int, aiori_mod_opt_t *); -+static aiori_fd_t *DFS_Open(char *, int, aiori_mod_opt_t *); -+static IOR_offset_t DFS_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, -+ IOR_offset_t, aiori_mod_opt_t *); -+static void DFS_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void DFS_Delete(char *, aiori_mod_opt_t *); - static char* DFS_GetVersion(); --static void DFS_Fsync(void *, IOR_param_t *); --static void DFS_Sync(IOR_param_t *); --static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); --static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *); --static int DFS_Stat (const char *, struct stat *, IOR_param_t *); --static int DFS_Mkdir (const char *, mode_t, IOR_param_t *); --static int DFS_Rmdir (const char *, IOR_param_t *); --static int DFS_Access (const char *, int, IOR_param_t *); --static void DFS_Init(); --static void DFS_Finalize(); -+static void DFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static void DFS_Sync(aiori_mod_opt_t *); -+static IOR_offset_t DFS_GetFileSize(aiori_mod_opt_t *, char *); -+static int DFS_Statfs (const char *, ior_aiori_statfs_t *, aiori_mod_opt_t *); -+static int DFS_Stat (const char *, struct stat *, aiori_mod_opt_t *); -+static int DFS_Mkdir (const char *, mode_t, aiori_mod_opt_t *); -+static int DFS_Rename(const char *, const char *, aiori_mod_opt_t *); -+static int DFS_Rmdir (const char *, aiori_mod_opt_t *); -+static int DFS_Access (const char *, int, aiori_mod_opt_t *); - static option_help * DFS_options(); -+static void DFS_init_xfer_options(aiori_xfer_hint_t *); -+static int DFS_check_params(aiori_mod_opt_t *); - - /************************** D E C L A R A T I O N S ***************************/ - - ior_aiori_t dfs_aiori = { - .name = "DFS", -+ .initialize = DFS_Init, -+ .finalize = DFS_Finalize, - .create = DFS_Create, - .open = DFS_Open, - .xfer = DFS_Xfer, -@@ -127,14 +144,15 @@ ior_aiori_t dfs_aiori = { - .fsync = DFS_Fsync, - .sync = DFS_Sync, - .get_file_size = DFS_GetFileSize, -+ .xfer_hints = DFS_init_xfer_options, - .statfs = DFS_Statfs, - .mkdir = DFS_Mkdir, -+ .rename = DFS_Rename, - .rmdir = DFS_Rmdir, - .access = DFS_Access, - .stat = DFS_Stat, -- .initialize = DFS_Init, -- .finalize = DFS_Finalize, - .get_options = DFS_options, -+ .check_params = DFS_check_params, - .enable_mdtest = true, - }; - -@@ -145,12 +163,12 @@ ior_aiori_t dfs_aiori = { - do { \ - int _rc = (rc); \ - \ -- if (_rc != 0) { \ -+ if (_rc != 0) { \ - fprintf(stderr, "ERROR (%s:%d): %d: %d: " \ - format"\n", __FILE__, __LINE__, rank, _rc, \ - ##__VA_ARGS__); \ - fflush(stderr); \ -- exit(-1); \ -+ goto out; \ - } \ - } while (0) - -@@ -160,12 +178,33 @@ do { \ - printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ - } while (0) - --#define GERR(format, ...) \ -+#define DERR(format, ...) \ - do { \ - fprintf(stderr, format"\n", ##__VA_ARGS__); \ -- MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ -+ fflush(stderr); \ -+ rc = -1; \ -+ goto out; \ - } while (0) - -+static aiori_xfer_hint_t * hints = NULL; -+ -+void DFS_init_xfer_options(aiori_xfer_hint_t * params) -+{ -+ hints = params; -+} -+ -+static int DFS_check_params(aiori_mod_opt_t * options){ -+ DFS_options_t *o = (DFS_options_t *) options; -+ -+ if (o->pool == NULL || o->cont == NULL) -+ ERR("Invalid pool or container options\n"); -+ -+ if (testComm == MPI_COMM_NULL) -+ testComm = MPI_COMM_WORLD; -+ -+ return 0; -+} -+ - static inline struct aiori_dir_hdl * - hdl_obj(d_list_t *rlink) - { -@@ -186,18 +225,33 @@ rec_free(struct d_hash_table *htable, d_list_t *rlink) - { - struct aiori_dir_hdl *hdl = hdl_obj(rlink); - -- assert(d_hash_rec_unlinked(&hdl->entry)); - dfs_release(hdl->oh); - free(hdl); - } - -+static bool -+rec_decref(struct d_hash_table *htable, d_list_t *rlink) -+{ -+ return true; -+} -+ -+static uint32_t -+rec_hash(struct d_hash_table *htable, d_list_t *rlink) -+{ -+ struct aiori_dir_hdl *hdl = hdl_obj(rlink); -+ -+ return d_hash_string_u32(hdl->name, strlen(hdl->name)); -+} -+ - static d_hash_table_ops_t hdl_hash_ops = { - .hop_key_cmp = key_cmp, -- .hop_rec_free = rec_free -+ .hop_rec_decref = rec_decref, -+ .hop_rec_free = rec_free, -+ .hop_rec_hash = rec_hash - }; - - /* Distribute process 0's pool or container handle to others. */ --static void -+static int - HandleDistribute(enum handleType type) - { - d_iov_t global; -@@ -219,8 +273,7 @@ HandleDistribute(enum handleType type) - DCHECK(rc, "Failed to get global handle size"); - } - -- MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, -- MPI_COMM_WORLD), -+ MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, testComm), - "Failed to bcast global handle buffer size"); - - global.iov_len = global.iov_buf_len; -@@ -238,8 +291,7 @@ HandleDistribute(enum handleType type) - DCHECK(rc, "Failed to create global handle"); - } - -- MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, -- MPI_COMM_WORLD), -+ MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, testComm), - "Failed to bcast global pool handle"); - - if (rank != 0) { -@@ -252,7 +304,10 @@ HandleDistribute(enum handleType type) - DCHECK(rc, "Failed to get local handle"); - } - -- free(global.iov_buf); -+out: -+ if (global.iov_buf) -+ free(global.iov_buf); -+ return rc; - } - - static int -@@ -267,14 +322,6 @@ parse_filename(const char *path, char **_obj_name, char **_cont_name) - if (path == NULL || _obj_name == NULL || _cont_name == NULL) - return -EINVAL; - -- if (strcmp(path, "/") == 0) { -- *_cont_name = strdup("/"); -- if (*_cont_name == NULL) -- return -ENOMEM; -- *_obj_name = NULL; -- return 0; -- } -- - f1 = strdup(path); - if (f1 == NULL) { - rc = -ENOMEM; -@@ -290,53 +337,35 @@ parse_filename(const char *path, char **_obj_name, char **_cont_name) - fname = basename(f1); - cont_name = dirname(f2); - -- if (cont_name[0] == '.' || cont_name[0] != '/') { -- char cwd[1024]; -+ if (cont_name[0] != '/') { -+ char *ptr; -+ char buf[PATH_MAX]; - -- if (getcwd(cwd, 1024) == NULL) { -- rc = -ENOMEM; -+ ptr = realpath(cont_name, buf); -+ if (ptr == NULL) { -+ rc = errno; - goto out; - } - -- if (strcmp(cont_name, ".") == 0) { -- cont_name = strdup(cwd); -- if (cont_name == NULL) { -- rc = -ENOMEM; -- goto out; -- } -- } else { -- char *new_dir = calloc(strlen(cwd) + strlen(cont_name) -- + 1, sizeof(char)); -- if (new_dir == NULL) { -- rc = -ENOMEM; -- goto out; -- } -- -- strcpy(new_dir, cwd); -- if (cont_name[0] == '.') { -- strcat(new_dir, &cont_name[1]); -- } else { -- strcat(new_dir, "/"); -- strcat(new_dir, cont_name); -- } -- cont_name = new_dir; -- } -- *_cont_name = cont_name; -- } else { -- *_cont_name = strdup(cont_name); -- if (*_cont_name == NULL) { -- rc = -ENOMEM; -+ cont_name = strdup(ptr); -+ if (cont_name == NULL) { -+ rc = ENOMEM; - goto out; - } -- } -+ *_cont_name = cont_name; -+ } else { -+ *_cont_name = strdup(cont_name); -+ if (*_cont_name == NULL) { -+ rc = ENOMEM; -+ goto out; -+ } -+ } - -- *_obj_name = strdup(fname); -- if (*_obj_name == NULL) { -- free(*_cont_name); -- *_cont_name = NULL; -- rc = -ENOMEM; -+ *_obj_name = strdup(fname); -+ if (*_obj_name == NULL) { -+ rc = ENOMEM; - goto out; -- } -+ } - - out: - if (f1) -@@ -346,285 +375,403 @@ out: - return rc; - } - -+static int -+share_file_handle(dfs_obj_t **file, MPI_Comm comm) -+{ -+ d_iov_t global; -+ int rc; -+ -+ global.iov_buf = NULL; -+ global.iov_buf_len = 0; -+ global.iov_len = 0; -+ -+ if (rank == 0) { -+ rc = dfs_obj_local2global(dfs, *file, &global); -+ DCHECK(rc, "Failed to get global handle size"); -+ } -+ -+ MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, testComm), -+ "Failed to bcast global handle buffer size"); -+ -+ global.iov_len = global.iov_buf_len; -+ global.iov_buf = malloc(global.iov_buf_len); -+ if (global.iov_buf == NULL) -+ ERR("Failed to allocate global handle buffer"); -+ -+ if (rank == 0) { -+ rc = dfs_obj_local2global(dfs, *file, &global); -+ DCHECK(rc, "Failed to create global handle"); -+ } -+ -+ MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, testComm), -+ "Failed to bcast global pool handle"); -+ -+ if (rank != 0) { -+ rc = dfs_obj_global2local(dfs, 0, global, file); -+ DCHECK(rc, "Failed to get local handle"); -+ } -+ -+out: -+ if (global.iov_buf) -+ free(global.iov_buf); -+ return rc; -+} -+ - static dfs_obj_t * - lookup_insert_dir(const char *name, mode_t *mode) - { - struct aiori_dir_hdl *hdl; -+ dfs_obj_t *oh; - d_list_t *rlink; -+ size_t len = strlen(name); - int rc; - -- rlink = d_hash_rec_find(dir_hash, name, strlen(name)); -+ rlink = d_hash_rec_find(aiori_dfs_hash, name, len); - if (rlink != NULL) { - hdl = hdl_obj(rlink); - return hdl->oh; - } - -+ rc = dfs_lookup(dfs, name, O_RDWR, &oh, mode, NULL); -+ if (rc) -+ return NULL; -+ -+ if (mode && !S_ISDIR(*mode)) -+ return oh; -+ - hdl = calloc(1, sizeof(struct aiori_dir_hdl)); - if (hdl == NULL) -- GERR("failed to alloc dir handle"); -+ return NULL; - -- strncpy(hdl->name, name, PATH_MAX-1); -- hdl->name[PATH_MAX-1] = '\0'; -+ strncpy(hdl->name, name, len); -+ hdl->oh = oh; - -- rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, mode, NULL); -- if (rc) -+ rc = d_hash_rec_insert(aiori_dfs_hash, hdl->name, len, &hdl->entry, false); -+ if (rc) { -+ fprintf(stderr, "Failed to insert dir handle in hashtable\n"); -+ dfs_release(hdl->oh); -+ free(hdl); - return NULL; -- if (mode && S_ISREG(*mode)) -- return hdl->oh; -- -- rc = d_hash_rec_insert(dir_hash, hdl->name, strlen(hdl->name), -- &hdl->entry, true); -- DCHECK(rc, "Failed to insert dir handle in hashtable"); -+ } - - return hdl->oh; - } - --static option_help * DFS_options(){ -- return options; --} -- - static void --DFS_Init() { -+DFS_Init(aiori_mod_opt_t * options) -+{ -+ DFS_options_t *o = (DFS_options_t *)options; -+ bool pool_connect, cont_create, cont_open, dfs_mounted; -+ uuid_t co_uuid; - int rc; - -- /** in case we are already initialized, return */ -- if (dfs_init) -+ dfs_init_count++; -+ if (dfs_init_count > 1) { -+ pool_connect = cont_create = cont_open = dfs_mounted = true; -+ /** chunk size and oclass can change between different runs */ -+ if (o->oclass) { -+ objectClass = daos_oclass_name2id(o->oclass); -+ if (objectClass == OC_UNKNOWN) -+ DERR("Invalid DAOS object class: %s\n", o->oclass); -+ } -+ if (o->dir_oclass) { -+ dir_oclass = daos_oclass_name2id(o->dir_oclass); -+ if (dir_oclass == OC_UNKNOWN) -+ DERR("Invalid DAOS directory object class: %s\n", o->dir_oclass); -+ } - return; -+ } - - /** shouldn't be fatal since it can be called with POSIX backend selection */ -- if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) -+ if (o->pool == NULL || o->cont == NULL) { -+ dfs_init_count--; - return; -+ } -+ -+ pool_connect = cont_create = cont_open = dfs_mounted = false; - - rc = daos_init(); - DCHECK(rc, "Failed to initialize daos"); - -- if (o.oclass) { -- objectClass = daos_oclass_name2id(o.oclass); -- if (objectClass == OC_UNKNOWN) -- GERR("Invalid DAOS object class %s\n", o.oclass); -- } -+ if (o->oclass) { -+ objectClass = daos_oclass_name2id(o->oclass); -+ if (objectClass == OC_UNKNOWN) -+ DERR("Invalid DAOS object class: %s\n", o->oclass); -+ } - -- if (o.dir_oclass) { -- dir_oclass = daos_oclass_name2id(o.dir_oclass); -- if (dir_oclass == OC_UNKNOWN) -- GERR("Invalid DAOS directory object class %s\n", o.dir_oclass); -- } -+ if (o->dir_oclass) { -+ dir_oclass = daos_oclass_name2id(o->dir_oclass); -+ if (dir_oclass == OC_UNKNOWN) -+ DERR("Invalid DAOS directory object class: %s\n", o->dir_oclass); -+ } - -- rc = d_hash_table_create(0, 16, NULL, &hdl_hash_ops, &dir_hash); -+ rc = d_hash_table_create(D_HASH_FT_EPHEMERAL | D_HASH_FT_NOLOCK | D_HASH_FT_LRU, -+ 4, NULL, &hdl_hash_ops, &aiori_dfs_hash); - DCHECK(rc, "Failed to initialize dir hashtable"); - - if (rank == 0) { -- uuid_t pool_uuid, co_uuid; -- d_rank_list_t *svcl = NULL; - daos_pool_info_t pool_info; - daos_cont_info_t co_info; - -- rc = uuid_parse(o.pool, pool_uuid); -- DCHECK(rc, "Failed to parse 'Pool uuid': %s", o.pool); -+ INFO(VERBOSE_1, "DFS Pool = %s", o->pool); -+ INFO(VERBOSE_1, "DFS Container = %s", o->cont); - -- rc = uuid_parse(o.cont, co_uuid); -- DCHECK(rc, "Failed to parse 'Cont uuid': %s", o.cont); -+#if CHECK_DAOS_API_VERSION(1, 4) -+ rc = daos_pool_connect(o->pool, o->group, DAOS_PC_RW, &poh, &pool_info, NULL); -+ DCHECK(rc, "Failed to connect to pool %s", o->pool); -+ pool_connect = true; - -- svcl = daos_rank_list_parse(o.svcl, ":"); -- if (svcl == NULL) -- ERR("Failed to allocate svcl"); -+ rc = daos_cont_open(poh, o->cont, DAOS_COO_RW, &coh, &co_info, NULL); -+#else -+ uuid_t pool_uuid; - -- INFO(VERBOSE_1, "Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl); -- INFO(VERBOSE_1, "DFS Container namespace uuid = %s\n", o.cont); -+ rc = uuid_parse(o->pool, pool_uuid); -+ DCHECK(rc, "Failed to parse 'Pool uuid': %s", o->pool); -+ rc = uuid_parse(o->cont, co_uuid); -+ DCHECK(rc, "Failed to parse 'Cont uuid': %s", o->cont); - -- /** Connect to DAOS pool */ -- rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, -- &poh, &pool_info, NULL); -- d_rank_list_free(svcl); -- DCHECK(rc, "Failed to connect to pool"); -+ rc = daos_pool_connect(pool_uuid, o->group, DAOS_PC_RW, &poh, &pool_info, NULL); -+ DCHECK(rc, "Failed to connect to pool %s", o->pool); -+ pool_connect = true; - -- rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, -- NULL); -+ rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL); -+#endif - /* If NOEXIST we create it */ - if (rc == -DER_NONEXIST) { - INFO(VERBOSE_1, "Creating DFS Container ...\n"); -- -+#if CHECK_DAOS_API_VERSION(1, 4) -+ if (uuid_parse(o->cont, co_uuid) != 0) -+ /** user passes in label */ -+ rc = dfs_cont_create_with_label(poh, o->cont, NULL, &co_uuid, &coh, NULL); -+ else -+ /** user passes in uuid */ -+#endif - rc = dfs_cont_create(poh, co_uuid, NULL, &coh, NULL); - if (rc) - DCHECK(rc, "Failed to create container"); -+ cont_create = true; - } else if (rc) { - DCHECK(rc, "Failed to create container"); - } -+ cont_open = true; - - rc = dfs_mount(poh, coh, O_RDWR, &dfs); - DCHECK(rc, "Failed to mount DFS namespace"); -+ dfs_mounted = true; - } - - HandleDistribute(POOL_HANDLE); -+ pool_connect = true; - HandleDistribute(CONT_HANDLE); -+ cont_open = true; - HandleDistribute(DFS_HANDLE); -+ dfs_mounted = true; - -- if (o.prefix) { -- rc = dfs_set_prefix(dfs, o.prefix); -+ if (o->prefix) { -+ rc = dfs_set_prefix(dfs, o->prefix); - DCHECK(rc, "Failed to set DFS Prefix"); - } -- dfs_init = true; -+ -+out: -+ if (rc) { -+ if (dfs_mounted) -+ dfs_umount(dfs); -+ if (cont_open) -+ daos_cont_close(coh, NULL); -+ if (cont_create && rank == 0) { -+#if CHECK_DAOS_API_VERSION(1, 4) -+ daos_cont_destroy(poh, o->cont, 1, NULL); -+#else -+ daos_cont_destroy(poh, co_uuid, 1, NULL); -+#endif -+ } -+ if (pool_connect) -+ daos_pool_disconnect(poh, NULL); -+ if (aiori_dfs_hash) -+ d_hash_table_destroy(aiori_dfs_hash, false); -+ daos_fini(); -+ dfs_init_count--; -+ ERR("Failed to initialize DAOS DFS driver"); -+ } - } - - static void --DFS_Finalize() -+DFS_Finalize(aiori_mod_opt_t *options) - { -+ DFS_options_t *o = (DFS_options_t *)options; - int rc; - -- MPI_Barrier(MPI_COMM_WORLD); -- d_hash_table_destroy(dir_hash, true /* force */); -+ objectClass = 0; -+ dir_oclass = 0; -+ -+ dfs_init_count --; -+ if (dfs_init_count != 0) -+ return; -+ -+ MPI_Barrier(testComm); -+ -+ while (1) { -+ d_list_t *rlink = NULL; -+ -+ rlink = d_hash_rec_first(aiori_dfs_hash); -+ if (rlink == NULL) -+ break; -+ d_hash_rec_decref(aiori_dfs_hash, rlink); -+ } -+ -+ rc = d_hash_table_destroy(aiori_dfs_hash, false); -+ DCHECK(rc, "Failed to destroy DFS hash"); -+ MPI_Barrier(testComm); - - rc = dfs_umount(dfs); - DCHECK(rc, "Failed to umount DFS namespace"); -- MPI_Barrier(MPI_COMM_WORLD); -+ MPI_Barrier(testComm); - - rc = daos_cont_close(coh, NULL); -- DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); -- MPI_Barrier(MPI_COMM_WORLD); -+ DCHECK(rc, "Failed to close container %s (%d)", o->cont, rc); -+ MPI_Barrier(testComm); - -- if (o.destroy) { -+ if (o->destroy) { - if (rank == 0) { -+ INFO(VERBOSE_1, "Destroying DFS Container: %s", o->cont); -+#if CHECK_DAOS_API_VERSION(1, 4) -+ daos_cont_destroy(poh, o->cont, 1, NULL); -+#else - uuid_t uuid; -- double t1, t2; -- -- INFO(VERBOSE_1, "Destorying DFS Container: %s\n", o.cont); -- uuid_parse(o.cont, uuid); -- t1 = MPI_Wtime(); -+ uuid_parse(o->cont, uuid); - rc = daos_cont_destroy(poh, uuid, 1, NULL); -- t2 = MPI_Wtime(); -- if (rc == 0) -- INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); -+#endif -+ DCHECK(rc, "Failed to destroy container %s", o->cont); - } - -- MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); -+ MPI_Bcast(&rc, 1, MPI_INT, 0, testComm); - if (rc) { - if (rank == 0) -- DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); -- MPI_Abort(MPI_COMM_WORLD, -1); -+ DCHECK(rc, "Failed to destroy container %s (%d)", o->cont, rc); - } - } - - if (rank == 0) -- INFO(VERBOSE_1, "Disconnecting from DAOS POOL\n"); -+ INFO(VERBOSE_1, "Disconnecting from DAOS POOL"); - - rc = daos_pool_disconnect(poh, NULL); - DCHECK(rc, "Failed to disconnect from pool"); - -- MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); -+ MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - - if (rank == 0) -- INFO(VERBOSE_1, "Finalizing DAOS..\n"); -+ INFO(VERBOSE_1, "Finalizing DAOS.."); - - rc = daos_fini(); - DCHECK(rc, "Failed to finalize DAOS"); - -+out: - /** reset tunables */ -- o.pool = NULL; -- o.svcl = NULL; -- o.group = NULL; -- o.cont = NULL; -- o.chunk_size = 1048576; -- o.oclass = NULL; -- o.dir_oclass = NULL; -- o.prefix = NULL; -- o.destroy = 0; -- objectClass = OC_SX; -- dir_oclass = OC_SX; -- dfs_init = false; -+ o->pool = NULL; -+ o->group = NULL; -+ o->cont = NULL; -+ o->chunk_size = 0; -+ o->oclass = NULL; -+ o->dir_oclass = NULL; -+ o->prefix = NULL; -+ o->destroy = 0; - } - - /* -- * Creat and open a file through the DFS interface. -+ * Create and open a file through the DFS interface. - */ --static void * --DFS_Create(char *testFileName, IOR_param_t *param) -+static aiori_fd_t * -+DFS_Create(char *testFileName, int flags, aiori_mod_opt_t *param) - { -+ DFS_options_t *o = (DFS_options_t*) param; - char *name = NULL, *dir_name = NULL; - dfs_obj_t *obj = NULL, *parent = NULL; -- mode_t mode = 0644; -+ mode_t mode = 0664; - int fd_oflag = 0; - int rc; - -- assert(param); -- - rc = parse_filename(testFileName, &name, &dir_name); - DCHECK(rc, "Failed to parse path %s", testFileName); - assert(dir_name); - assert(name); - -- parent = lookup_insert_dir(dir_name, NULL); -- if (parent == NULL) -- GERR("Failed to lookup parent dir"); -- -- mode = S_IFREG | param->mode; -- if (param->filePerProc || rank == 0) { -+ mode = S_IFREG | mode; -+ if (hints->filePerProc || rank == 0) { - fd_oflag |= O_CREAT | O_RDWR | O_EXCL; - -+ parent = lookup_insert_dir(dir_name, NULL); -+ if (parent == NULL) -+ DERR("Failed to lookup parent: %s", dir_name); -+ - rc = dfs_open(dfs, parent, name, mode, fd_oflag, -- objectClass, o.chunk_size, NULL, &obj); -+ objectClass, o->chunk_size, NULL, &obj); - DCHECK(rc, "dfs_open() of %s Failed", name); - } -- if (!param->filePerProc) { -- MPI_Barrier(MPI_COMM_WORLD); -- if (rank != 0) { -- fd_oflag |= O_RDWR; -- rc = dfs_open(dfs, parent, name, mode, fd_oflag, -- objectClass, o.chunk_size, NULL, &obj); -- DCHECK(rc, "dfs_open() of %s Failed", name); -- } -+ -+ if (!hints->filePerProc) { -+ rc = share_file_handle(&obj, testComm); -+ DCHECK(rc, "global open of %s Failed", name); - } - -+out: - if (name) - free(name); - if (dir_name) - free(dir_name); -- -- return ((void *)obj); -+ return (aiori_fd_t *)(obj); - } - - /* - * Open a file through the DFS interface. - */ --static void * --DFS_Open(char *testFileName, IOR_param_t *param) -+static aiori_fd_t * -+DFS_Open(char *testFileName, int flags, aiori_mod_opt_t *param) - { -+ DFS_options_t *o = (DFS_options_t*) param; - char *name = NULL, *dir_name = NULL; - dfs_obj_t *obj = NULL, *parent = NULL; -- mode_t mode; -- int rc; -+ mode_t mode = 0664; - int fd_oflag = 0; -+ int rc; - - fd_oflag |= O_RDWR; -- mode = S_IFREG | param->mode; -+ mode = S_IFREG | flags; - - rc = parse_filename(testFileName, &name, &dir_name); - DCHECK(rc, "Failed to parse path %s", testFileName); -- - assert(dir_name); - assert(name); - -- parent = lookup_insert_dir(dir_name, NULL); -- if (parent == NULL) -- GERR("Failed to lookup parent dir"); -+ if (hints->filePerProc || rank == 0) { -+ parent = lookup_insert_dir(dir_name, NULL); -+ if (parent == NULL) -+ DERR("Failed to lookup parent: %s", dir_name); - -- rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, -- o.chunk_size, NULL, &obj); -- DCHECK(rc, "dfs_open() of %s Failed", name); -+ rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, -+ o->chunk_size, NULL, &obj); -+ DCHECK(rc, "dfs_open() of %s Failed", name); -+ } - -+ if (!hints->filePerProc) { -+ rc = share_file_handle(&obj, testComm); -+ DCHECK(rc, "global open of %s Failed", name); -+ } -+ -+out: - if (name) - free(name); - if (dir_name) - free(dir_name); - -- return ((void *)obj); -+ return (aiori_fd_t *)(obj); - } - - /* - * Write or read access to file using the DFS interface. - */ - static IOR_offset_t --DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, -- IOR_param_t *param) -+DFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, IOR_offset_t length, -+ IOR_offset_t off, aiori_mod_opt_t *param) - { - int xferRetries = 0; - long long remaining = (long long)length; -@@ -647,21 +794,24 @@ DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, - - /* write/read file */ - if (access == WRITE) { -- rc = dfs_write(dfs, obj, &sgl, param->offset, NULL); -- if (rc) { -- fprintf(stderr, "dfs_write() failed (%d)", rc); -- return -1; -- } -+ rc = dfs_write(dfs, obj, &sgl, off, NULL); -+ if (rc) -+ ERRF("dfs_write(%p, %lld) failed (%d): %s\n", -+ (void*)ptr, remaining, rc, strerror(rc)); - ret = remaining; - } else { -- rc = dfs_read(dfs, obj, &sgl, param->offset, &ret, NULL); -- if (rc || ret == 0) -- fprintf(stderr, "dfs_read() failed(%d)", rc); -+ rc = dfs_read(dfs, obj, &sgl, off, &ret, NULL); -+ if (rc) -+ ERRF("dfs_read(%p, %lld) failed (%d): %s\n", -+ (void*)ptr, remaining, rc, strerror(rc)); -+ if (ret == 0) -+ ERRF("dfs_read(%p, %lld) returned EOF prematurely", -+ (void*)ptr, remaining); - } - - if (ret < remaining) { -- if (param->singleXferAttempt == TRUE) -- exit(-1); -+ if (hints->singleXferAttempt == TRUE) -+ exit(EXIT_FAILURE); - if (xferRetries > MAX_RETRY) - ERR("too many retries -- aborting"); - } -@@ -680,7 +830,7 @@ DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, - * Perform fsync(). - */ - static void --DFS_Fsync(void *fd, IOR_param_t * param) -+DFS_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) - { - /* no cache in DFS, so this is a no-op currently */ - dfs_sync(dfs); -@@ -691,7 +841,7 @@ DFS_Fsync(void *fd, IOR_param_t * param) - * Perform sync() on the dfs mount. - */ - static void --DFS_Sync(IOR_param_t * param) -+DFS_Sync(aiori_mod_opt_t * param) - { - /* no cache in DFS, so this is a no-op currently */ - dfs_sync(dfs); -@@ -702,7 +852,7 @@ DFS_Sync(IOR_param_t * param) - * Close a file through the DFS interface. - */ - static void --DFS_Close(void *fd, IOR_param_t * param) -+DFS_Close(aiori_fd_t *fd, aiori_mod_opt_t * param) - { - dfs_release((dfs_obj_t *)fd); - } -@@ -711,7 +861,7 @@ DFS_Close(void *fd, IOR_param_t * param) - * Delete a file through the DFS interface. - */ - static void --DFS_Delete(char *testFileName, IOR_param_t * param) -+DFS_Delete(char *testFileName, aiori_mod_opt_t * param) - { - char *name = NULL, *dir_name = NULL; - dfs_obj_t *parent = NULL; -@@ -725,11 +875,11 @@ DFS_Delete(char *testFileName, IOR_param_t * param) - - parent = lookup_insert_dir(dir_name, NULL); - if (parent == NULL) -- GERR("Failed to lookup parent dir"); -+ DERR("Failed to lookup parent: %s", dir_name); - - rc = dfs_remove(dfs, parent, name, false, NULL); -- DCHECK(rc, "dfs_remove() of %s Failed", name); -- -+ DCHECK(rc, "Failed to remove path %s", testFileName); -+out: - if (name) - free(name); - if (dir_name) -@@ -748,56 +898,67 @@ static char* DFS_GetVersion() - * Use DFS stat() to return aggregate file size. - */ - static IOR_offset_t --DFS_GetFileSize(IOR_param_t * test, MPI_Comm comm, char *testFileName) -+DFS_GetFileSize(aiori_mod_opt_t * test, char *testFileName) - { - dfs_obj_t *obj; -- daos_size_t fsize, tmpMin, tmpMax, tmpSum; -+ MPI_Comm comm; -+ daos_size_t fsize; - int rc; - -- rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); -- if (rc) { -- fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); -- return -1; -+ if (hints->filePerProc == TRUE) { -+ comm = MPI_COMM_SELF; -+ } else { -+ comm = testComm; - } - -- rc = dfs_get_size(dfs, obj, &fsize); -- if (rc) -- return -1; -+ if (hints->filePerProc || rank == 0) { -+ rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); -+ if (rc) { -+ fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); -+ return -1; -+ } - -- dfs_release(obj); -+ rc = dfs_get_size(dfs, obj, &fsize); -+ dfs_release(obj); -+ if (rc) -+ return -1; -+ } - -- if (test->filePerProc == TRUE) { -- MPI_CHECK(MPI_Allreduce(&fsize, &tmpSum, 1, -- MPI_LONG_LONG_INT, MPI_SUM, comm), -- "cannot total data moved"); -- fsize = tmpSum; -- } else { -- MPI_CHECK(MPI_Allreduce(&fsize, &tmpMin, 1, -- MPI_LONG_LONG_INT, MPI_MIN, comm), -- "cannot total data moved"); -- MPI_CHECK(MPI_Allreduce(&fsize, &tmpMax, 1, -- MPI_LONG_LONG_INT, MPI_MAX, comm), -- "cannot total data moved"); -- if (tmpMin != tmpMax) { -- if (rank == 0) { -- WARN("inconsistent file size by different tasks"); -- } -- /* incorrect, but now consistent across tasks */ -- fsize = tmpMin; -- } -+ if (!hints->filePerProc) { -+ rc = MPI_Bcast(&fsize, 1, MPI_UINT64_T, 0, comm); -+ if (rc) -+ return rc; - } - - return (fsize); - } - - static int --DFS_Statfs(const char *path, ior_aiori_statfs_t *sfs, IOR_param_t * param) -+DFS_Statfs(const char *path, ior_aiori_statfs_t *sfs, aiori_mod_opt_t * param) - { -- return 0; -+ daos_pool_info_t info = {.pi_bits = DPI_SPACE}; -+ int rc; -+ -+ rc = daos_pool_query(poh, NULL, &info, NULL, NULL); -+ DCHECK(rc, "Failed to query pool"); -+ -+ sfs->f_blocks = info.pi_space.ps_space.s_total[DAOS_MEDIA_SCM] -+ + info.pi_space.ps_space.s_total[DAOS_MEDIA_NVME]; -+ sfs->f_bfree = info.pi_space.ps_space.s_free[DAOS_MEDIA_SCM] -+ + info.pi_space.ps_space.s_free[DAOS_MEDIA_NVME]; -+ sfs->f_bsize = 1; -+ sfs->f_files = -1; -+ sfs->f_ffree = -1; -+ sfs->f_bavail = sfs->f_bfree; -+ -+out: -+ if (rc) -+ rc = -1; -+ return rc; - } - - static int --DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) -+DFS_Mkdir(const char *path, mode_t mode, aiori_mod_opt_t * param) - { - dfs_obj_t *parent = NULL; - char *name = NULL, *dir_name = NULL; -@@ -812,20 +973,64 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) - - parent = lookup_insert_dir(dir_name, NULL); - if (parent == NULL) -- GERR("Failed to lookup parent dir"); -+ DERR("Failed to lookup parent: %s", dir_name); - - rc = dfs_mkdir(dfs, parent, name, mode, dir_oclass); -- DCHECK(rc, "dfs_mkdir() of %s Failed", name); - -+out: - if (name) - free(name); - if (dir_name) - free(dir_name); -+ if (rc) -+ rc = -1; - return rc; - } - - static int --DFS_Rmdir(const char *path, IOR_param_t * param) -+DFS_Rename(const char *oldfile, const char *newfile, aiori_mod_opt_t * param) -+{ -+ dfs_obj_t *old_parent = NULL, *new_parent = NULL; -+ char *old_name = NULL, *old_dir_name = NULL; -+ char *new_name = NULL, *new_dir_name = NULL; -+ int rc; -+ -+ rc = parse_filename(oldfile, &old_name, &old_dir_name); -+ DCHECK(rc, "Failed to parse path %s", oldfile); -+ assert(old_dir_name); -+ assert(old_name); -+ -+ rc = parse_filename(newfile, &new_name, &new_dir_name); -+ DCHECK(rc, "Failed to parse path %s", newfile); -+ assert(new_dir_name); -+ assert(new_name); -+ -+ old_parent = lookup_insert_dir(old_dir_name, NULL); -+ if (old_parent == NULL) -+ DERR("Failed to lookup parent: %s", old_dir_name); -+ -+ new_parent = lookup_insert_dir(new_dir_name, NULL); -+ if (new_parent == NULL) -+ DERR("Failed to lookup parent: %s", new_dir_name); -+ -+ rc = dfs_move(dfs, old_parent, old_name, new_parent, new_name, NULL); -+ -+out: -+ if (old_name) -+ free(old_name); -+ if (old_dir_name) -+ free(old_dir_name); -+ if (new_name) -+ free(new_name); -+ if (new_dir_name) -+ free(new_dir_name); -+ if (rc) -+ return -1; -+ return rc; -+} -+ -+static int -+DFS_Rmdir(const char *path, aiori_mod_opt_t * param) - { - dfs_obj_t *parent = NULL; - char *name = NULL, *dir_name = NULL; -@@ -839,11 +1044,11 @@ DFS_Rmdir(const char *path, IOR_param_t * param) - - parent = lookup_insert_dir(dir_name, NULL); - if (parent == NULL) -- GERR("Failed to lookup parent dir"); -+ DERR("Failed to lookup parent: %s", dir_name); - - rc = dfs_remove(dfs, parent, name, false, NULL); -- DCHECK(rc, "dfs_remove() of %s Failed", name); - -+out: - if (name) - free(name); - if (dir_name) -@@ -854,24 +1059,42 @@ DFS_Rmdir(const char *path, IOR_param_t * param) - } - - static int --DFS_Access(const char *path, int mode, IOR_param_t * param) -+DFS_Access(const char *path, int mode, aiori_mod_opt_t * param) - { -+ dfs_obj_t *parent = NULL; - dfs_obj_t *obj = NULL; -- mode_t fmode; -+ char *name = NULL, *dir_name = NULL; -+ int rc; - -- obj = lookup_insert_dir(path, &fmode); -- if (obj == NULL) -- return -1; -+ rc = parse_filename(path, &name, &dir_name); -+ DCHECK(rc, "Failed to parse path %s", path); - -- /** just close if it's a file */ -- if (S_ISREG(fmode)) -- dfs_release(obj); -+ assert(dir_name); -+ assert(name); - -- return 0; -+ parent = lookup_insert_dir(dir_name, NULL); -+ if (parent == NULL) -+ DERR("Failed to lookup parent: %s", dir_name); -+ -+ if (strcmp(name, "/") == 0) { -+ free(name); -+ name = NULL; -+ } -+ -+ rc = dfs_access(dfs, parent, name, mode); -+ -+out: -+ if (name) -+ free(name); -+ if (dir_name) -+ free(dir_name); -+ if (rc) -+ return -1; -+ return rc; - } - - static int --DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) -+DFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t * param) - { - dfs_obj_t *parent = NULL; - char *name = NULL, *dir_name = NULL; -@@ -885,11 +1108,11 @@ DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) - - parent = lookup_insert_dir(dir_name, NULL); - if (parent == NULL) -- GERR("Failed to lookup parent dir"); -+ DERR("Failed to lookup parent: %s", dir_name); - - rc = dfs_stat(dfs, parent, name, buf); -- DCHECK(rc, "dfs_stat() of Failed (%d)", rc); - -+out: - if (name) - free(name); - if (dir_name) -diff --git a/src/aiori-DUMMY.c b/src/aiori-DUMMY.c -index f368c79..8a4d814 100755 ---- a/src/aiori-DUMMY.c -+++ b/src/aiori-DUMMY.c -@@ -19,13 +19,15 @@ - /************************** O P T I O N S *****************************/ - typedef struct { - uint64_t delay_creates; -+ uint64_t delay_close; -+ uint64_t delay_sync; - uint64_t delay_xfer; - int delay_rank_0_only; - } dummy_options_t; - - static char * current = (char*) 1; - --static option_help * DUMMY_options(void ** init_backend_options, void * init_values){ -+static option_help * DUMMY_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ - dummy_options_t * o = malloc(sizeof(dummy_options_t)); - if (init_values != NULL){ - memcpy(o, init_values, sizeof(dummy_options_t)); -@@ -33,10 +35,12 @@ static option_help * DUMMY_options(void ** init_backend_options, void * init_val - memset(o, 0, sizeof(dummy_options_t)); - } - -- *init_backend_options = o; -+ *init_backend_options = (aiori_mod_opt_t*) o; - - option_help h [] = { - {0, "dummy.delay-create", "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_creates}, -+ {0, "dummy.delay-close", "Delay per close in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_close}, -+ {0, "dummy.delay-sync", "Delay for sync in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_sync}, - {0, "dummy.delay-xfer", "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_xfer}, - {0, "dummy.delay-only-rank0", "Delay only Rank0", OPTION_FLAG, 'd', & o->delay_rank_0_only}, - LAST_OPTION -@@ -46,30 +50,38 @@ static option_help * DUMMY_options(void ** init_backend_options, void * init_val - return help; - } - --static void *DUMMY_Create(char *testFileName, IOR_param_t * param) -+static int count_init = 0; -+ -+static aiori_fd_t *DUMMY_Create(char *testFileName, int iorflags, aiori_mod_opt_t * options) - { -+ if(count_init <= 0){ -+ ERR("DUMMY missing initialization in create\n"); -+ } - if(verbose > 4){ - fprintf(out_logfile, "DUMMY create: %s = %p\n", testFileName, current); - } -- dummy_options_t * o = (dummy_options_t*) param->backend_options; -+ dummy_options_t * o = (dummy_options_t*) options; - if (o->delay_creates){ - if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ - struct timespec wait = { o->delay_creates / 1000 / 1000, 1000l * (o->delay_creates % 1000000)}; - nanosleep( & wait, NULL); - } - } -- return current++; -+ return (aiori_fd_t*) current++; - } - --static void *DUMMY_Open(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *DUMMY_Open(char *testFileName, int flags, aiori_mod_opt_t * options) - { -+ if(count_init <= 0){ -+ ERR("DUMMY missing initialization in open\n"); -+ } - if(verbose > 4){ - fprintf(out_logfile, "DUMMY open: %s = %p\n", testFileName, current); - } -- return current++; -+ return (aiori_fd_t*) current++; - } - --static void DUMMY_Fsync(void *fd, IOR_param_t * param) -+static void DUMMY_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options) - { - if(verbose > 4){ - fprintf(out_logfile, "DUMMY fsync %p\n", fd); -@@ -77,18 +89,33 @@ static void DUMMY_Fsync(void *fd, IOR_param_t * param) - } - - --static void DUMMY_Sync(IOR_param_t * param) -+static void DUMMY_Sync(aiori_mod_opt_t * options) - { -+ dummy_options_t * o = (dummy_options_t*) options; -+ if (o->delay_sync){ -+ if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ -+ struct timespec wait = { o->delay_sync / 1000 / 1000, 1000l * (o->delay_sync % 1000000)}; -+ nanosleep( & wait, NULL); -+ } -+ } - } - --static void DUMMY_Close(void *fd, IOR_param_t * param) -+static void DUMMY_Close(aiori_fd_t *fd, aiori_mod_opt_t * options) - { - if(verbose > 4){ - fprintf(out_logfile, "DUMMY close %p\n", fd); - } -+ -+ dummy_options_t * o = (dummy_options_t*) options; -+ if (o->delay_close){ -+ if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ -+ struct timespec wait = { o->delay_close / 1000 / 1000, 1000l * (o->delay_close % 1000000)}; -+ nanosleep( & wait, NULL); -+ } -+ } - } - --static void DUMMY_Delete(char *testFileName, IOR_param_t * param) -+static void DUMMY_Delete(char *testFileName, aiori_mod_opt_t * options) - { - if(verbose > 4){ - fprintf(out_logfile, "DUMMY delete: %s\n", testFileName); -@@ -100,7 +127,7 @@ static char * DUMMY_getVersion() - return "0.5"; - } - --static IOR_offset_t DUMMY_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) -+static IOR_offset_t DUMMY_GetFileSize(aiori_mod_opt_t * options, char *testFileName) - { - if(verbose > 4){ - fprintf(out_logfile, "DUMMY getFileSize: %s\n", testFileName); -@@ -108,11 +135,11 @@ static IOR_offset_t DUMMY_GetFileSize(IOR_param_t * test, MPI_Comm testComm, cha - return 0; - } - --static IOR_offset_t DUMMY_Xfer(int access, void *file, IOR_size_t * buffer, IOR_offset_t length, IOR_param_t * param){ -+static IOR_offset_t DUMMY_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ - if(verbose > 4){ - fprintf(out_logfile, "DUMMY xfer: %p\n", file); - } -- dummy_options_t * o = (dummy_options_t*) param->backend_options; -+ dummy_options_t * o = (dummy_options_t*) options; - if (o->delay_xfer){ - if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ - struct timespec wait = {o->delay_xfer / 1000 / 1000, 1000l * (o->delay_xfer % 1000000)}; -@@ -122,7 +149,7 @@ static IOR_offset_t DUMMY_Xfer(int access, void *file, IOR_size_t * buffer, IOR_ - return length; - } - --static int DUMMY_statfs (const char * path, ior_aiori_statfs_t * stat, IOR_param_t * param){ -+static int DUMMY_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options){ - stat->f_bsize = 1; - stat->f_blocks = 1; - stat->f_bfree = 1; -@@ -132,26 +159,45 @@ static int DUMMY_statfs (const char * path, ior_aiori_statfs_t * stat, IOR_param - return 0; - } - --static int DUMMY_mkdir (const char *path, mode_t mode, IOR_param_t * param){ -+static int DUMMY_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ - return 0; - } - --static int DUMMY_rmdir (const char *path, IOR_param_t * param){ -+static int DUMMY_rmdir (const char *path, aiori_mod_opt_t * options){ - return 0; - } - --static int DUMMY_access (const char *path, int mode, IOR_param_t * param){ -+static int DUMMY_access (const char *path, int mode, aiori_mod_opt_t * options){ - return 0; - } - --static int DUMMY_stat (const char *path, struct stat *buf, IOR_param_t * param){ -+static int DUMMY_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options){ - return 0; - } - --static int DUMMY_check_params(IOR_param_t * test){ -- return 1; -+static int DUMMY_rename (const char *path, const char *path2, aiori_mod_opt_t * options){ -+ return 0; -+} -+ -+ -+static int DUMMY_check_params(aiori_mod_opt_t * options){ -+ return 0; - } - -+static void DUMMY_init(aiori_mod_opt_t * options){ -+ WARN("DUMMY initialized"); -+ count_init++; -+} -+ -+static void DUMMY_final(aiori_mod_opt_t * options){ -+ WARN("DUMMY finalized"); -+ if(count_init <= 0){ -+ ERR("DUMMY invalid finalization\n"); -+ } -+ count_init--; -+} -+ -+ - ior_aiori_t dummy_aiori = { - .name = "DUMMY", - .name_legacy = NULL, -@@ -166,13 +212,13 @@ ior_aiori_t dummy_aiori = { - .statfs = DUMMY_statfs, - .mkdir = DUMMY_mkdir, - .rmdir = DUMMY_rmdir, -+ .rename = DUMMY_rename, - .access = DUMMY_access, - .stat = DUMMY_stat, -- .initialize = NULL, -- .finalize = NULL, -+ .initialize = DUMMY_init, -+ .finalize = DUMMY_final, - .get_options = DUMMY_options, -- .enable_mdtest = true, - .check_params = DUMMY_check_params, - .sync = DUMMY_Sync, -- .enable_mdtest = true -+ .enable_mdtest = true - }; -diff --git a/src/aiori-Gfarm.c b/src/aiori-Gfarm.c -index a7af0ea..e94022f 100644 ---- a/src/aiori-Gfarm.c -+++ b/src/aiori-Gfarm.c -@@ -14,6 +14,14 @@ struct gfarm_file { - GFS_File gf; - }; - -+static aiori_xfer_hint_t *hints = NULL; -+ -+void -+Gfarm_xfer_hints(aiori_xfer_hint_t *params) -+{ -+ hints = params; -+} -+ - void - Gfarm_initialize() - { -@@ -26,14 +34,14 @@ Gfarm_finalize() - gfarm_terminate(); - } - --void * --Gfarm_create(char *fn, IOR_param_t *param) -+aiori_fd_t * -+Gfarm_create(char *fn, int flag, aiori_mod_opt_t *param) - { - GFS_File gf; - struct gfarm_file *fp; - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return (NULL); - - e = gfs_pio_create(fn, GFARM_FILE_RDWR, 0664, &gf); -@@ -43,17 +51,17 @@ Gfarm_create(char *fn, IOR_param_t *param) - if (fp == NULL) - ERR("no memory"); - fp->gf = gf; -- return (fp); -+ return ((aiori_fd_t *)fp); - } - --void * --Gfarm_open(char *fn, IOR_param_t *param) -+aiori_fd_t * -+Gfarm_open(char *fn, int flag, aiori_mod_opt_t *param) - { - GFS_File gf; - struct gfarm_file *fp; - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return (NULL); - - e = gfs_pio_open(fn, GFARM_FILE_RDWR, &gf); -@@ -63,14 +71,14 @@ Gfarm_open(char *fn, IOR_param_t *param) - if (fp == NULL) - ERR("no memory"); - fp->gf = gf; -- return (fp); -+ return ((aiori_fd_t *)fp); - } - - IOR_offset_t --Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, -- IOR_param_t *param) -+Gfarm_xfer(int access, aiori_fd_t *fd, IOR_size_t *buffer, -+ IOR_offset_t len, IOR_offset_t offset, aiori_mod_opt_t *param) - { -- struct gfarm_file *fp = fd; -+ struct gfarm_file *fp = (struct gfarm_file *)fd; - IOR_offset_t rem = len; - gfarm_off_t off; - gfarm_error_t e; -@@ -78,7 +86,7 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, - int sz, n; - char *buf = (char *)buffer; - -- if (param->dryRun) -+ if (hints->dryRun) - return (len); - - if (len > MAX_SZ) -@@ -86,7 +94,7 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, - else - sz = len; - -- e = gfs_pio_seek(fp->gf, param->offset, GFARM_SEEK_SET, &off); -+ e = gfs_pio_seek(fp->gf, offset, GFARM_SEEK_SET, &off); - if (e != GFARM_ERR_NO_ERROR) - ERR("gfs_pio_seek failed"); - while (rem > 0) { -@@ -105,11 +113,11 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, - } - - void --Gfarm_close(void *fd, IOR_param_t *param) -+Gfarm_close(aiori_fd_t *fd, aiori_mod_opt_t *param) - { -- struct gfarm_file *fp = fd; -+ struct gfarm_file *fp = (struct gfarm_file *)fd; - -- if (param->dryRun) -+ if (hints->dryRun) - return; - - if (gfs_pio_close(fp->gf) != GFARM_ERR_NO_ERROR) -@@ -118,11 +126,11 @@ Gfarm_close(void *fd, IOR_param_t *param) - } - - void --Gfarm_delete(char *fn, IOR_param_t *param) -+Gfarm_delete(char *fn, aiori_mod_opt_t *param) - { - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return; - - e = gfs_unlink(fn); -@@ -137,11 +145,11 @@ Gfarm_version() - } - - void --Gfarm_fsync(void *fd, IOR_param_t *param) -+Gfarm_fsync(aiori_fd_t *fd, aiori_mod_opt_t *param) - { -- struct gfarm_file *fp = fd; -+ struct gfarm_file *fp = (struct gfarm_file *)fd; - -- if (param->dryRun) -+ if (hints->dryRun) - return; - - if (gfs_pio_sync(fp->gf) != GFARM_ERR_NO_ERROR) -@@ -149,12 +157,12 @@ Gfarm_fsync(void *fd, IOR_param_t *param) - } - - IOR_offset_t --Gfarm_get_file_size(IOR_param_t *param, MPI_Comm comm, char *fn) -+Gfarm_get_file_size(aiori_mod_opt_t *param, char *fn) - { - struct gfs_stat st; - IOR_offset_t size, sum, min, max; - -- if (param->dryRun) -+ if (hints->dryRun) - return (0); - - if (gfs_stat(fn, &st) != GFARM_ERR_NO_ERROR) -@@ -162,34 +170,17 @@ Gfarm_get_file_size(IOR_param_t *param, MPI_Comm comm, char *fn) - size = st.st_size; - gfs_stat_free(&st); - -- if (param->filePerProc == TRUE) { -- MPI_CHECK(MPI_Allreduce(&size, &sum, 1, MPI_LONG_LONG_INT, -- MPI_SUM, comm), "cannot total data moved"); -- size = sum; -- } else { -- MPI_CHECK(MPI_Allreduce(&size, &min, 1, MPI_LONG_LONG_INT, -- MPI_MIN, comm), "cannot total data moved"); -- MPI_CHECK(MPI_Allreduce(&size, &max, 1, MPI_LONG_LONG_INT, -- MPI_MAX, comm), "cannot total data moved"); -- if (min != max) { -- if (rank == 0) -- WARN("inconsistent file size by different " -- "tasks"); -- /* incorrect, but now consistent across tasks */ -- size = min; -- } -- } - return (size); - } - - int --Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, IOR_param_t *param) -+Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, aiori_mod_opt_t *param) - { - gfarm_off_t used, avail, files; - gfarm_error_t e; - int bsize = 4096; - -- if (param->dryRun) -+ if (hints->dryRun) - return (0); - - e = gfs_statfs_by_path(fn, &used, &avail, &files); -@@ -206,11 +197,11 @@ Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, IOR_param_t *param) - } - - int --Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) -+Gfarm_mkdir(const char *fn, mode_t mode, aiori_mod_opt_t *param) - { - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return (0); - - e = gfs_mkdir(fn, mode); -@@ -221,11 +212,11 @@ Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) - } - - int --Gfarm_rmdir(const char *fn, IOR_param_t *param) -+Gfarm_rmdir(const char *fn, aiori_mod_opt_t *param) - { - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return (0); - - e = gfs_rmdir(fn); -@@ -236,12 +227,12 @@ Gfarm_rmdir(const char *fn, IOR_param_t *param) - } - - int --Gfarm_access(const char *fn, int mode, IOR_param_t *param) -+Gfarm_access(const char *fn, int mode, aiori_mod_opt_t *param) - { - struct gfs_stat st; - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return (0); - - e = gfs_stat(fn, &st); -@@ -259,12 +250,12 @@ Gfarm_access(const char *fn, int mode, IOR_param_t *param) - #define STAT_BLKSIZ 512 /* for st_blocks */ - - int --Gfarm_stat(const char *fn, struct stat *buf, IOR_param_t *param) -+Gfarm_stat(const char *fn, struct stat *buf, aiori_mod_opt_t *param) - { - struct gfs_stat st; - gfarm_error_t e; - -- if (param->dryRun) -+ if (hints->dryRun) - return (0); - - e = gfs_stat(fn, &st); -@@ -293,11 +284,22 @@ Gfarm_stat(const char *fn, struct stat *buf, IOR_param_t *param) - return (0); - } - -+void -+Gfarm_sync(aiori_mod_opt_t *param) -+{ -+ if (hints->dryRun) -+ return; -+ -+ /* no cache in libgfarm */ -+ return; -+} -+ - ior_aiori_t gfarm_aiori = { - .name = "Gfarm", - .name_legacy = NULL, - .create = Gfarm_create, - .open = Gfarm_open, -+ .xfer_hints = Gfarm_xfer_hints, - .xfer = Gfarm_xfer, - .close = Gfarm_close, - .delete = Gfarm_delete, -@@ -312,5 +314,6 @@ ior_aiori_t gfarm_aiori = { - .initialize = Gfarm_initialize, - .finalize = Gfarm_finalize, - .get_options = NULL, -+ .sync = Gfarm_sync, - .enable_mdtest = true, - }; -diff --git a/src/aiori-HDF5.c b/src/aiori-HDF5.c -index ab329db..aee5a4b 100755 ---- a/src/aiori-HDF5.c -+++ b/src/aiori-HDF5.c -@@ -48,19 +48,20 @@ - #if H5_VERS_MAJOR > 1 && H5_VERS_MINOR > 6 - #define HDF5_CHECK(HDF5_RETURN, MSG) do { \ - char resultString[1024]; \ -+ herr_t _HDF5_RETURN = (HDF5_RETURN); \ - \ -- if (HDF5_RETURN < 0) { \ -+ if (_HDF5_RETURN < 0) { \ - fprintf(stdout, "** error **\n"); \ - fprintf(stdout, "ERROR in %s (line %d): %s.\n", \ - __FILE__, __LINE__, MSG); \ -- strcpy(resultString, H5Eget_major((H5E_major_t)HDF5_RETURN)); \ -+ strcpy(resultString, H5Eget_major((H5E_major_t)_HDF5_RETURN)); \ - if (strcmp(resultString, "Invalid major error number") != 0) \ - fprintf(stdout, "HDF5 %s\n", resultString); \ -- strcpy(resultString, H5Eget_minor((H5E_minor_t)HDF5_RETURN)); \ -+ strcpy(resultString, H5Eget_minor((H5E_minor_t)_HDF5_RETURN)); \ - if (strcmp(resultString, "Invalid minor error number") != 0) \ - fprintf(stdout, "%s\n", resultString); \ - fprintf(stdout, "** exiting **\n"); \ -- exit(-1); \ -+ exit(EXIT_FAILURE); \ - } \ - } while(0) - #else /* ! (H5_VERS_MAJOR > 1 && H5_VERS_MINOR > 6) */ -@@ -75,45 +76,61 @@ - * char* mesg, size_t size) \ - */ \ - fprintf(stdout, "** exiting **\n"); \ -- exit(-1); \ -+ exit(EXIT_FAILURE); \ - } \ - } while(0) - #endif /* H5_VERS_MAJOR > 1 && H5_VERS_MINOR > 6 */ - /**************************** P R O T O T Y P E S *****************************/ - --static IOR_offset_t SeekOffset(void *, IOR_offset_t, IOR_param_t *); --static void SetupDataSet(void *, IOR_param_t *); --static void *HDF5_Create(char *, IOR_param_t *); --static void *HDF5_Open(char *, IOR_param_t *); --static IOR_offset_t HDF5_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void HDF5_Close(void *, IOR_param_t *); --static void HDF5_Delete(char *, IOR_param_t *); -+static IOR_offset_t SeekOffset(void *, IOR_offset_t, aiori_mod_opt_t *); -+static void SetupDataSet(void *, int flags, aiori_mod_opt_t *); -+static aiori_fd_t *HDF5_Create(char *, int flags, aiori_mod_opt_t *); -+static aiori_fd_t *HDF5_Open(char *, int flags, aiori_mod_opt_t *); -+static IOR_offset_t HDF5_Xfer(int, aiori_fd_t *, IOR_size_t *, -+ IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); -+static void HDF5_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void HDF5_Delete(char *, aiori_mod_opt_t *); - static char* HDF5_GetVersion(); --static void HDF5_Fsync(void *, IOR_param_t *); --static IOR_offset_t HDF5_GetFileSize(IOR_param_t *, MPI_Comm, char *); --static int HDF5_Access(const char *, int, IOR_param_t *); -+static void HDF5_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static IOR_offset_t HDF5_GetFileSize(aiori_mod_opt_t *, char *); -+static int HDF5_Access(const char *, int, aiori_mod_opt_t *); -+static void HDF5_init_xfer_options(aiori_xfer_hint_t * params); -+static int HDF5_check_params(aiori_mod_opt_t * options); - - /************************** O P T I O N S *****************************/ - typedef struct{ -+ mpiio_options_t mpio; -+ - int collective_md; -+ int individualDataSets; /* datasets not shared by all procs */ -+ int noFill; /* no fill in file creation */ -+ IOR_offset_t setAlignment; /* alignment in bytes */ - } HDF5_options_t; - /***************************** F U N C T I O N S ******************************/ - --static option_help * HDF5_options(void ** init_backend_options, void * init_values){ -+static option_help * HDF5_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ - HDF5_options_t * o = malloc(sizeof(HDF5_options_t)); - - if (init_values != NULL){ - memcpy(o, init_values, sizeof(HDF5_options_t)); - }else{ -+ memset(o, 0, sizeof(HDF5_options_t)); - /* initialize the options properly */ - o->collective_md = 0; -+ o->setAlignment = 1; - } - -- *init_backend_options = o; -+ *init_backend_options = (aiori_mod_opt_t*) o; - - option_help h [] = { -+ /* options imported from MPIIO */ -+ {0, "hdf5.hintsFileName","Full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & o->mpio.hintsFileName}, -+ {0, "hdf5.showHints", "Show MPI hints", OPTION_FLAG, 'd', & o->mpio.showHints}, -+ /* generic options */ - {0, "hdf5.collectiveMetadata", "Use collectiveMetadata (available since HDF5-1.10.0)", OPTION_FLAG, 'd', & o->collective_md}, -+ {0, "hdf5.individualDataSets", "Datasets not shared by all procs [not working]", OPTION_FLAG, 'd', & o->individualDataSets}, -+ {0, "hdf5.setAlignment", "HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', & o->setAlignment}, -+ {0, "hdf5.noFill", "No fill in HDF5 file creation", OPTION_FLAG, 'd', & o->noFill}, - LAST_OPTION - }; - option_help * help = malloc(sizeof(h)); -@@ -133,6 +150,7 @@ ior_aiori_t hdf5_aiori = { - .close = HDF5_Close, - .delete = HDF5_Delete, - .get_version = HDF5_GetVersion, -+ .xfer_hints = HDF5_init_xfer_options, - .fsync = HDF5_Fsync, - .get_file_size = HDF5_GetFileSize, - .statfs = aiori_posix_statfs, -@@ -140,7 +158,8 @@ ior_aiori_t hdf5_aiori = { - .rmdir = aiori_posix_rmdir, - .access = HDF5_Access, - .stat = aiori_posix_stat, -- .get_options = HDF5_options -+ .get_options = HDF5_options, -+ .check_params = HDF5_check_params - }; - - static hid_t xferPropList; /* xfer property list */ -@@ -151,20 +170,37 @@ hid_t memDataSpace; /* memory data space id */ - int newlyOpenedFile; /* newly opened file */ - - /***************************** F U N C T I O N S ******************************/ -+static aiori_xfer_hint_t * hints = NULL; -+ -+static void HDF5_init_xfer_options(aiori_xfer_hint_t * params){ -+ hints = params; -+ /** HDF5 utilizes the MPIIO backend too, so init hints there */ -+ MPIIO_xfer_hints(params); -+} -+ -+static int HDF5_check_params(aiori_mod_opt_t * options){ -+ HDF5_options_t *o = (HDF5_options_t*) options; -+ if (o->setAlignment < 0) -+ ERR("alignment must be non-negative integer"); -+ if (o->individualDataSets) -+ ERR("individual data sets not implemented"); -+ return 0; -+} - - /* - * Create and open a file through the HDF5 interface. - */ --static void *HDF5_Create(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *HDF5_Create(char *testFileName, int flags, aiori_mod_opt_t * param) - { -- return HDF5_Open(testFileName, param); -+ return HDF5_Open(testFileName, flags, param); - } - - /* - * Open a file through the HDF5 interface. - */ --static void *HDF5_Open(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *HDF5_Open(char *testFileName, int flags, aiori_mod_opt_t * param) - { -+ HDF5_options_t *o = (HDF5_options_t*) param; - hid_t accessPropList, createPropList; - hsize_t memStart[NUM_DIMS], - dataSetDims[NUM_DIMS], -@@ -182,36 +218,27 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - /* - * HDF5 uses different flags than those for POSIX/MPIIO - */ -- if (param->open == WRITE) { /* WRITE flags */ -- param->openFlags = IOR_TRUNC; -- } else { /* READ or check WRITE/READ flags */ -- param->openFlags = IOR_RDONLY; -- } -- - /* set IOR file flags to HDF5 flags */ - /* -- file open flags -- */ -- if (param->openFlags & IOR_RDONLY) { -+ if (flags & IOR_RDONLY) { - fd_mode |= H5F_ACC_RDONLY; - } -- if (param->openFlags & IOR_WRONLY) { -- fprintf(stdout, "File write only not implemented in HDF5\n"); -- } -- if (param->openFlags & IOR_RDWR) { -+ if (flags & IOR_WRONLY || flags & IOR_RDWR) { - fd_mode |= H5F_ACC_RDWR; - } -- if (param->openFlags & IOR_APPEND) { -+ if (flags & IOR_APPEND) { - fprintf(stdout, "File append not implemented in HDF5\n"); - } -- if (param->openFlags & IOR_CREAT) { -+ if (flags & IOR_CREAT) { - fd_mode |= H5F_ACC_CREAT; - } -- if (param->openFlags & IOR_EXCL) { -+ if (flags & IOR_EXCL) { - fd_mode |= H5F_ACC_EXCL; - } -- if (param->openFlags & IOR_TRUNC) { -+ if (flags & IOR_TRUNC) { - fd_mode |= H5F_ACC_TRUNC; - } -- if (param->openFlags & IOR_DIRECT) { -+ if (flags & IOR_DIRECT) { - fprintf(stdout, "O_DIRECT not implemented in HDF5\n"); - } - -@@ -231,13 +258,12 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - * someday HDF5 implementation will allow subsets of MPI_COMM_WORLD - */ - /* store MPI communicator info for the file access property list */ -- if (param->filePerProc) { -+ if (hints->filePerProc) { - comm = MPI_COMM_SELF; - } else { - comm = testComm; - } - -- SetHints(&mpiHints, param->hintsFileName); - /* - * note that with MP_HINTS_FILTERED=no, all key/value pairs will - * be in the info object. The info object that is attached to -@@ -245,7 +271,8 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - * deemed valid by the implementation. - */ - /* show hints passed to file */ -- if (rank == 0 && param->showHints) { -+ SetHints(&mpiHints, o->mpio.hintsFileName); -+ if (rank == 0 && o->mpio.showHints) { - fprintf(stdout, "\nhints passed to access property list {\n"); - ShowHints(&mpiHints); - fprintf(stdout, "}\n"); -@@ -254,12 +281,10 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - "cannot set file access property list"); - - /* set alignment */ -- HDF5_CHECK(H5Pset_alignment(accessPropList, param->setAlignment, -- param->setAlignment), -+ HDF5_CHECK(H5Pset_alignment(accessPropList, o->setAlignment, o->setAlignment), - "cannot set alignment"); - - #ifdef HAVE_H5PSET_ALL_COLL_METADATA_OPS -- HDF5_options_t *o = (HDF5_options_t*) param->backend_options; - if (o->collective_md) { - /* more scalable metadata */ - -@@ -271,10 +296,9 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - #endif - - /* open file */ -- if(! param->dryRun){ -- if (param->open == WRITE) { /* WRITE */ -- *fd = H5Fcreate(testFileName, fd_mode, -- createPropList, accessPropList); -+ if(! hints->dryRun){ -+ if (flags & IOR_CREAT) { /* WRITE */ -+ *fd = H5Fcreate(testFileName, H5F_ACC_TRUNC, createPropList, accessPropList); - HDF5_CHECK(*fd, "cannot create file"); - } else { /* READ or CHECK */ - *fd = H5Fopen(testFileName, fd_mode, accessPropList); -@@ -283,41 +307,18 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - } - - /* show hints actually attached to file handle */ -- if (param->showHints || (1) /* WEL - this needs fixing */ ) { -- if (rank == 0 -- && (param->showHints) /* WEL - this needs fixing */ ) { -- WARN("showHints not working for HDF5"); -- } -- } else { -- MPI_Info mpiHintsCheck = MPI_INFO_NULL; -- hid_t apl; -- apl = H5Fget_access_plist(*fd); -- HDF5_CHECK(H5Pget_fapl_mpio(apl, &comm, &mpiHintsCheck), -- "cannot get info object through HDF5"); -+ if (o->mpio.showHints) { -+ MPI_File *fd_mpiio; -+ HDF5_CHECK(H5Fget_vfd_handle(*fd, accessPropList, (void **) &fd_mpiio), "cannot get file handle"); -+ MPI_Info info_used; -+ MPI_CHECK(MPI_File_get_info(*fd_mpiio, &info_used), "cannot get file info"); - if (rank == 0) { -- fprintf(stdout, -- "\nhints returned from opened file (HDF5) {\n"); -- ShowHints(&mpiHintsCheck); -- fprintf(stdout, "}\n"); -- if (1 == 1) { /* request the MPIIO file handle and its hints */ -- MPI_File *fd_mpiio; -- HDF5_CHECK(H5Fget_vfd_handle -- (*fd, apl, (void **)&fd_mpiio), -- "cannot get MPIIO file handle"); -- if (mpiHintsCheck != MPI_INFO_NULL) -- MPI_Info_free(&mpiHintsCheck); -- MPI_CHECK(MPI_File_get_info -- (*fd_mpiio, &mpiHintsCheck), -- "cannot get info object through MPIIO"); -- fprintf(stdout, -- "\nhints returned from opened file (MPIIO) {\n"); -- ShowHints(&mpiHintsCheck); -+ /* print the MPI file hints currently used */ -+ fprintf(stdout, "\nhints returned from opened file {\n"); -+ ShowHints(&info_used); - fprintf(stdout, "}\n"); -- if (mpiHintsCheck != MPI_INFO_NULL) -- MPI_Info_free(&mpiHintsCheck); - } -- } -- MPI_CHECK(MPI_Barrier(testComm), "barrier error"); -+ MPI_CHECK(MPI_Info_free(&info_used), "cannot free file info"); - } - - /* this is necessary for resetting various parameters -@@ -334,7 +335,7 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - HDF5_CHECK(xferPropList, "cannot create transfer property list"); - - /* set data transfer mode */ -- if (param->collective) { -+ if (hints->collective) { - HDF5_CHECK(H5Pset_dxpl_mpio(xferPropList, H5FD_MPIO_COLLECTIVE), - "cannot set collective data transfer mode"); - } else { -@@ -346,9 +347,9 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - /* set up memory data space for transfer */ - memStart[0] = (hsize_t) 0; - memCount[0] = (hsize_t) 1; -- memStride[0] = (hsize_t) (param->transferSize / sizeof(IOR_size_t)); -- memBlock[0] = (hsize_t) (param->transferSize / sizeof(IOR_size_t)); -- memDataSpaceDims[0] = (hsize_t) param->transferSize; -+ memStride[0] = (hsize_t) (hints->transferSize / sizeof(IOR_size_t)); -+ memBlock[0] = (hsize_t) (hints->transferSize / sizeof(IOR_size_t)); -+ memDataSpaceDims[0] = (hsize_t) hints->transferSize; - memDataSpace = H5Screate_simple(NUM_DIMS, memDataSpaceDims, NULL); - HDF5_CHECK(memDataSpace, "cannot create simple memory data space"); - -@@ -358,18 +359,18 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - memBlock), "cannot create hyperslab"); - - /* set up parameters for fpp or different dataset count */ -- if (param->filePerProc) { -+ if (hints->filePerProc) { - tasksPerDataSet = 1; - } else { -- if (param->individualDataSets) { -+ if (o->individualDataSets) { - /* each task in segment has single data set */ - tasksPerDataSet = 1; - } else { - /* share single data set across all tasks in segment */ -- tasksPerDataSet = param->numTasks; -+ tasksPerDataSet = hints->numTasks; - } - } -- dataSetDims[0] = (hsize_t) ((param->blockSize / sizeof(IOR_size_t)) -+ dataSetDims[0] = (hsize_t) ((hints->blockSize / sizeof(IOR_size_t)) - * tasksPerDataSet); - - /* create a simple data space containing information on size -@@ -379,14 +380,14 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) - if (mpiHints != MPI_INFO_NULL) - MPI_Info_free(&mpiHints); - -- return (fd); -+ return (aiori_fd_t*)(fd); - } - - /* - * Write or read access to file using the HDF5 interface. - */ --static IOR_offset_t HDF5_Xfer(int access, void *fd, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) -+static IOR_offset_t HDF5_Xfer(int access, aiori_fd_t *fd, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param) - { - static int firstReadCheck = FALSE, startNewDataSet; - IOR_offset_t segmentPosition, segmentSize; -@@ -405,17 +406,16 @@ static IOR_offset_t HDF5_Xfer(int access, void *fd, IOR_size_t * buffer, - } - - /* determine by offset if need to start new data set */ -- if (param->filePerProc == TRUE) { -+ if (hints->filePerProc == TRUE) { - segmentPosition = (IOR_offset_t) 0; -- segmentSize = param->blockSize; -+ segmentSize = hints->blockSize; - } else { - segmentPosition = -- (IOR_offset_t) ((rank + rankOffset) % param->numTasks) -- * param->blockSize; -- segmentSize = -- (IOR_offset_t) (param->numTasks) * param->blockSize; -+ (IOR_offset_t) ((rank + rankOffset) % hints->numTasks) -+ * hints->blockSize; -+ segmentSize = (IOR_offset_t) (hints->numTasks) * hints->blockSize; - } -- if ((IOR_offset_t) ((param->offset - segmentPosition) % segmentSize) == -+ if ((IOR_offset_t) ((offset - segmentPosition) % segmentSize) == - 0) { - /* - * ordinarily start a new data set, unless this is the -@@ -427,7 +427,7 @@ static IOR_offset_t HDF5_Xfer(int access, void *fd, IOR_size_t * buffer, - } - } - -- if(param->dryRun) -+ if(hints->dryRun) - return length; - - /* create new data set */ -@@ -438,10 +438,10 @@ static IOR_offset_t HDF5_Xfer(int access, void *fd, IOR_size_t * buffer, - HDF5_CHECK(H5Sclose(fileDataSpace), - "cannot close file data space"); - } -- SetupDataSet(fd, param); -+ SetupDataSet(fd, access == WRITE ? IOR_CREAT : IOR_RDWR, param); - } - -- SeekOffset(fd, param->offset, param); -+ SeekOffset(fd, offset, param); - - /* this is necessary to reset variables for reaccessing file */ - startNewDataSet = FALSE; -@@ -465,19 +465,19 @@ static IOR_offset_t HDF5_Xfer(int access, void *fd, IOR_size_t * buffer, - /* - * Perform fsync(). - */ --static void HDF5_Fsync(void *fd, IOR_param_t * param) -+static void HDF5_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) - { -- ; -+ HDF5_CHECK(H5Fflush(*(hid_t *) fd, H5F_SCOPE_LOCAL), "cannot flush file to disk"); - } - - /* - * Close a file through the HDF5 interface. - */ --static void HDF5_Close(void *fd, IOR_param_t * param) -+static void HDF5_Close(aiori_fd_t *fd, aiori_mod_opt_t * param) - { -- if(param->dryRun) -+ if(hints->dryRun) - return; -- if (param->fd_fppReadCheck == NULL) { -+ //if (hints->fd_fppReadCheck == NULL) { - HDF5_CHECK(H5Dclose(dataSet), "cannot close data set"); - HDF5_CHECK(H5Sclose(dataSpace), "cannot close data space"); - HDF5_CHECK(H5Sclose(fileDataSpace), -@@ -486,7 +486,7 @@ static void HDF5_Close(void *fd, IOR_param_t * param) - "cannot close memory data space"); - HDF5_CHECK(H5Pclose(xferPropList), - " cannot close transfer property list"); -- } -+ //} - HDF5_CHECK(H5Fclose(*(hid_t *) fd), "cannot close file"); - free(fd); - } -@@ -494,9 +494,9 @@ static void HDF5_Close(void *fd, IOR_param_t * param) - /* - * Delete a file through the HDF5 interface. - */ --static void HDF5_Delete(char *testFileName, IOR_param_t * param) -+static void HDF5_Delete(char *testFileName, aiori_mod_opt_t * param) - { -- if(param->dryRun) -+ if(hints->dryRun) - return - MPIIO_Delete(testFileName, param); - return; -@@ -528,23 +528,24 @@ static char * HDF5_GetVersion() - * Seek to offset in file using the HDF5 interface and set up hyperslab. - */ - static IOR_offset_t SeekOffset(void *fd, IOR_offset_t offset, -- IOR_param_t * param) -+ aiori_mod_opt_t * param) - { -+ HDF5_options_t *o = (HDF5_options_t*) param; - IOR_offset_t segmentSize; - hsize_t hsStride[NUM_DIMS], hsCount[NUM_DIMS], hsBlock[NUM_DIMS]; - hsize_t hsStart[NUM_DIMS]; - -- if (param->filePerProc == TRUE) { -- segmentSize = (IOR_offset_t) param->blockSize; -+ if (hints->filePerProc == TRUE) { -+ segmentSize = (IOR_offset_t) hints->blockSize; - } else { - segmentSize = -- (IOR_offset_t) (param->numTasks) * param->blockSize; -+ (IOR_offset_t) (hints->numTasks) * hints->blockSize; - } - - /* create a hyperslab representing the file data space */ -- if (param->individualDataSets) { -+ if (o->individualDataSets) { - /* start at zero offset if not */ -- hsStart[0] = (hsize_t) ((offset % param->blockSize) -+ hsStart[0] = (hsize_t) ((offset % hints->blockSize) - / sizeof(IOR_size_t)); - } else { - /* start at a unique offset if shared */ -@@ -552,8 +553,8 @@ static IOR_offset_t SeekOffset(void *fd, IOR_offset_t offset, - (hsize_t) ((offset % segmentSize) / sizeof(IOR_size_t)); - } - hsCount[0] = (hsize_t) 1; -- hsStride[0] = (hsize_t) (param->transferSize / sizeof(IOR_size_t)); -- hsBlock[0] = (hsize_t) (param->transferSize / sizeof(IOR_size_t)); -+ hsStride[0] = (hsize_t) (hints->transferSize / sizeof(IOR_size_t)); -+ hsBlock[0] = (hsize_t) (hints->transferSize / sizeof(IOR_size_t)); - - /* retrieve data space from data set for hyperslab */ - fileDataSpace = H5Dget_space(dataSet); -@@ -567,8 +568,9 @@ static IOR_offset_t SeekOffset(void *fd, IOR_offset_t offset, - /* - * Create HDF5 data set. - */ --static void SetupDataSet(void *fd, IOR_param_t * param) -+static void SetupDataSet(void *fd, int flags, aiori_mod_opt_t * param) - { -+ HDF5_options_t *o = (HDF5_options_t*) param; - char dataSetName[MAX_STR]; - hid_t dataSetPropList; - int dataSetID; -@@ -582,8 +584,8 @@ static void SetupDataSet(void *fd, IOR_param_t * param) - dataSetSuffix = 0; - - /* may want to use individual access to each data set someday */ -- if (param->individualDataSets) { -- dataSetID = (rank + rankOffset) % param->numTasks; -+ if (o->individualDataSets) { -+ dataSetID = (rank + rankOffset) % hints->numTasks; - } else { - dataSetID = 0; - } -@@ -591,14 +593,10 @@ static void SetupDataSet(void *fd, IOR_param_t * param) - sprintf(dataSetName, "%s-%04d.%04d", "Dataset", dataSetID, - dataSetSuffix++); - -- if (param->open == WRITE) { /* WRITE */ -+ if (flags & IOR_CREAT) { /* WRITE */ - /* create data set */ - dataSetPropList = H5Pcreate(H5P_DATASET_CREATE); -- /* check if hdf5 available */ --#if defined (H5_VERS_MAJOR) && defined (H5_VERS_MINOR) -- /* no-fill option not available until hdf5-1.6.x */ --#if (H5_VERS_MAJOR > 0 && H5_VERS_MINOR > 5) -- if (param->noFill == TRUE) { -+ if (o->noFill == TRUE) { - if (rank == 0 && verbose >= VERBOSE_1) { - fprintf(stdout, "\nusing 'no fill' option\n"); - } -@@ -606,15 +604,6 @@ static void SetupDataSet(void *fd, IOR_param_t * param) - H5D_FILL_TIME_NEVER), - "cannot set fill time for property list"); - } --#else -- char errorString[MAX_STR]; -- sprintf(errorString, "'no fill' option not available in %s", -- test->apiVersion); -- ERR(errorString); --#endif --#else -- WARN("unable to determine HDF5 version for 'no fill' usage"); --#endif - dataSet = - H5Dcreate(*(hid_t *) fd, dataSetName, H5T_NATIVE_LLONG, - dataSpace, dataSetPropList); -@@ -629,19 +618,19 @@ static void SetupDataSet(void *fd, IOR_param_t * param) - * Use MPIIO call to get file size. - */ - static IOR_offset_t --HDF5_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) -+HDF5_GetFileSize(aiori_mod_opt_t * test, char *testFileName) - { -- if(test->dryRun) -+ if(hints->dryRun) - return 0; -- return(MPIIO_GetFileSize(test, testComm, testFileName)); -+ return(MPIIO_GetFileSize(test, testFileName)); - } - - /* - * Use MPIIO call to check for access. - */ --static int HDF5_Access(const char *path, int mode, IOR_param_t *param) -+static int HDF5_Access(const char *path, int mode, aiori_mod_opt_t *param) - { -- if(param->dryRun) -+ if(hints->dryRun) - return 0; - return(MPIIO_Access(path, mode, param)); - } -diff --git a/src/aiori-HDFS.c b/src/aiori-HDFS.c -index 2d4dcb1..7503425 100755 ---- a/src/aiori-HDFS.c -+++ b/src/aiori-HDFS.c -@@ -77,14 +77,17 @@ - #include - #include - /* --#ifdef HAVE_LUSTRE_LUSTRE_USER_H --#include -+#ifdef HAVE_LUSTRE_USER -+# ifdef HAVE_LINUX_LUSTRE_LUSTRE_USER_H -+# include -+# elif defined(HAVE_LUSTRE_LUSTRE_USER_H) -+# include -+# endif - #endif - */ -- - #include "ior.h" - #include "aiori.h" --#include "iordef.h" -+#include "utilities.h" - - #ifndef open64 /* necessary for TRU64 -- */ - # define open64 open /* unlikely, but may pose */ -@@ -101,15 +104,23 @@ - #include "hdfs.h" - - /**************************** P R O T O T Y P E S *****************************/ --static void *HDFS_Create(char *, IOR_param_t *); --static void *HDFS_Open(char *, IOR_param_t *); --static IOR_offset_t HDFS_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void HDFS_Close(void *, IOR_param_t *); --static void HDFS_Delete(char *, IOR_param_t *); --static void HDFS_SetVersion(IOR_param_t *); --static void HDFS_Fsync(void *, IOR_param_t *); --static IOR_offset_t HDFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); -+static aiori_fd_t *HDFS_Create(char *testFileName, int flags, aiori_mod_opt_t * param); -+static aiori_fd_t *HDFS_Open(char *testFileName, int flags, aiori_mod_opt_t * param); -+static IOR_offset_t HDFS_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param); -+static void HDFS_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void HDFS_Delete(char *testFileName, aiori_mod_opt_t * param); -+static void HDFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static IOR_offset_t HDFS_GetFileSize(aiori_mod_opt_t *,char *); -+static void hdfs_xfer_hints(aiori_xfer_hint_t * params); -+static option_help * HDFS_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); -+static int HDFS_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options); -+static int HDFS_rmdir (const char *path, aiori_mod_opt_t * options); -+static int HDFS_access (const char *path, int mode, aiori_mod_opt_t * options); -+static int HDFS_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options); -+static int HDFS_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options); -+ -+static aiori_xfer_hint_t * hints = NULL; - - /************************** D E C L A R A T I O N S ***************************/ - -@@ -121,13 +132,120 @@ ior_aiori_t hdfs_aiori = { - .xfer = HDFS_Xfer, - .close = HDFS_Close, - .delete = HDFS_Delete, -- .set_version = HDFS_SetVersion, -+ .get_options = HDFS_options, -+ .get_version = aiori_get_version, -+ .xfer_hints = hdfs_xfer_hints, - .fsync = HDFS_Fsync, - .get_file_size = HDFS_GetFileSize, -+ .statfs = HDFS_statfs, -+ .mkdir = HDFS_mkdir, -+ .rmdir = HDFS_rmdir, -+ .access = HDFS_access, -+ .stat = HDFS_stat, -+ .enable_mdtest = true - }; - - /***************************** F U N C T I O N S ******************************/ - -+void hdfs_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+} -+ -+/************************** O P T I O N S *****************************/ -+typedef struct { -+ char * user; -+ char * name_node; -+ int replicas; /* n block replicas. (0 gets default) */ -+ int direct_io; -+ IOR_offset_t block_size; /* internal blk-size. (0 gets default) */ -+ // runtime options -+ hdfsFS fs; /* file-system handle */ -+ tPort name_node_port; /* (uint16_t) */ -+} hdfs_options_t; -+ -+static void hdfs_connect( hdfs_options_t* o ); -+ -+option_help * HDFS_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ -+ hdfs_options_t * o = malloc(sizeof(hdfs_options_t)); -+ -+ if (init_values != NULL){ -+ memcpy(o, init_values, sizeof(hdfs_options_t)); -+ }else{ -+ memset(o, 0, sizeof(hdfs_options_t)); -+ char *hdfs_user; -+ hdfs_user = getenv("USER"); -+ if (!hdfs_user){ -+ hdfs_user = ""; -+ } -+ o->user = strdup(hdfs_user); -+ o->name_node = "default"; -+ } -+ -+ *init_backend_options = (aiori_mod_opt_t*) o; -+ -+ option_help h [] = { -+ {0, "hdfs.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, -+ {0, "hdfs.user", "Username", OPTION_OPTIONAL_ARGUMENT, 's', & o->user}, -+ {0, "hdfs.name_node", "Namenode", OPTION_OPTIONAL_ARGUMENT, 's', & o->name_node}, -+ {0, "hdfs.replicas", "Number of replicas", OPTION_OPTIONAL_ARGUMENT, 'd', & o->replicas}, -+ {0, "hdfs.block_size", "Blocksize", OPTION_OPTIONAL_ARGUMENT, 'l', & o->block_size}, -+ LAST_OPTION -+ }; -+ option_help * help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); -+ return help; -+} -+ -+ -+int HDFS_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ -+ hdfs_options_t * o = (hdfs_options_t*) options; -+ hdfs_connect(o); -+ return hdfsCreateDirectory(o->fs, path); -+} -+ -+int HDFS_rmdir (const char *path, aiori_mod_opt_t * options){ -+ hdfs_options_t * o = (hdfs_options_t*) options; -+ hdfs_connect(o); -+ return hdfsDelete(o->fs, path, 1); -+} -+ -+int HDFS_access (const char *path, int mode, aiori_mod_opt_t * options){ -+ hdfs_options_t * o = (hdfs_options_t*) options; -+ hdfs_connect(o); -+ return hdfsExists(o->fs, path); -+} -+ -+int HDFS_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options){ -+ hdfsFileInfo * stat; -+ hdfs_options_t * o = (hdfs_options_t*) options; -+ hdfs_connect(o); -+ stat = hdfsGetPathInfo(o->fs, path); -+ if(stat == NULL){ -+ return 1; -+ } -+ memset(buf, 0, sizeof(struct stat)); -+ buf->st_atime = stat->mLastAccess; -+ buf->st_size = stat->mSize; -+ buf->st_mtime = stat->mLastMod; -+ buf->st_mode = stat->mPermissions; -+ -+ hdfsFreeFileInfo(stat, 1); -+ return 0; -+} -+ -+int HDFS_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options){ -+ hdfs_options_t * o = (hdfs_options_t*) options; -+ hdfs_connect(o); -+ -+ stat->f_bsize = hdfsGetDefaultBlockSize(o->fs); -+ stat->f_blocks = hdfsGetCapacity(o->fs) / hdfsGetDefaultBlockSize(o->fs); -+ stat->f_bfree = stat->f_blocks - hdfsGetUsed(o->fs) / hdfsGetDefaultBlockSize(o->fs); -+ stat->f_bavail = 1; -+ stat->f_files = 1; -+ stat->f_ffree = 1; -+ return 0; -+} -+ - /* This is identical to the one in aiori-POSIX.c Doesn't seem like - * it would be appropriate in utilities.c. - */ -@@ -159,16 +277,16 @@ void hdfs_set_o_direct_flag(int *fd) - * NOTE: It's okay to call this thing whenever you need to be sure the HDFS - * filesystem is connected. - */ --static void hdfs_connect( IOR_param_t* param ) { -- if (param->verbose >= VERBOSE_4) { -+void hdfs_connect( hdfs_options_t* o ) { -+ if (verbose >= VERBOSE_4) { - printf("-> hdfs_connect [nn:\"%s\", port:%d, user:%s]\n", -- param->hdfs_name_node, -- param->hdfs_name_node_port, -- param->hdfs_user ); -+ o->name_node, -+ o->name_node_port, -+ o->user ); - } - -- if ( param->hdfs_fs ) { -- if (param->verbose >= VERBOSE_4) { -+ if ( o->fs ) { -+ if (verbose >= VERBOSE_4) { - printf("<- hdfs_connect [nothing to do]\n"); /* DEBUGGING */ - } - return; -@@ -176,34 +294,35 @@ static void hdfs_connect( IOR_param_t* param ) { - - /* initialize a builder, holding parameters for hdfsBuilderConnect() */ - struct hdfsBuilder* builder = hdfsNewBuilder(); -- if ( ! builder ) -- ERR_SIMPLE("couldn't create an hdfsBuilder"); -+ if ( ! builder ){ -+ ERR("couldn't create an hdfsBuilder"); -+ } - - hdfsBuilderSetForceNewInstance ( builder ); /* don't use cached instance */ - -- hdfsBuilderSetNameNode ( builder, param->hdfs_name_node ); -- hdfsBuilderSetNameNodePort( builder, param->hdfs_name_node_port ); -- hdfsBuilderSetUserName ( builder, param->hdfs_user ); -+ hdfsBuilderSetNameNode ( builder, o->name_node ); -+ hdfsBuilderSetNameNodePort( builder, o->name_node_port ); -+ hdfsBuilderSetUserName ( builder, o->user ); - - /* NOTE: hdfsBuilderConnect() frees the builder */ -- param->hdfs_fs = hdfsBuilderConnect( builder ); -- if ( ! param->hdfs_fs ) -- ERR_SIMPLE("hdsfsBuilderConnect failed"); -+ o->fs = hdfsBuilderConnect( builder ); -+ if ( ! o->fs ) -+ ERR("hdsfsBuilderConnect failed"); - -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- hdfs_connect [success]\n"); - } - } - --static void hdfs_disconnect( IOR_param_t* param ) { -- if (param->verbose >= VERBOSE_4) { -+static void hdfs_disconnect( hdfs_options_t* o ) { -+ if (verbose >= VERBOSE_4) { - printf("-> hdfs_disconnect\n"); - } -- if ( param->hdfs_fs ) { -- hdfsDisconnect( param->hdfs_fs ); -- param->hdfs_fs = NULL; -+ if ( o->fs ) { -+ hdfsDisconnect( o->fs ); -+ o->fs = NULL; - } -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- hdfs_disconnect\n"); - } - } -@@ -214,16 +333,17 @@ static void hdfs_disconnect( IOR_param_t* param ) { - * Return an hdfsFile. - */ - --static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsigned char createFile ) { -- if (param->verbose >= VERBOSE_4) { -+static void *HDFS_Create_Or_Open( char *testFileName, int flags, aiori_mod_opt_t *param, unsigned char createFile ) { -+ if (verbose >= VERBOSE_4) { - printf("-> HDFS_Create_Or_Open\n"); - } -+ hdfs_options_t * o = (hdfs_options_t*) param; - - hdfsFile hdfs_file = NULL; - int fd_oflags = 0, hdfs_return; - - /* initialize file-system handle, if needed */ -- hdfs_connect( param ); -+ hdfs_connect( o ); - - /* - * Check for unsupported flags. -@@ -234,15 +354,15 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - * The other two, we just note that they are not supported and don't do them. - */ - -- if ( param->openFlags & IOR_RDWR ) { -+ if ( flags & IOR_RDWR ) { - ERR( "Opening or creating a file in RDWR is not implemented in HDFS" ); - } - -- if ( param->openFlags & IOR_EXCL ) { -+ if ( flags & IOR_EXCL ) { - fprintf( stdout, "Opening or creating a file in Exclusive mode is not implemented in HDFS\n" ); - } - -- if ( param->openFlags & IOR_APPEND ) { -+ if ( flags & IOR_APPEND ) { - fprintf( stdout, "Opening or creating a file for appending is not implemented in HDFS\n" ); - } - -@@ -254,8 +374,8 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - fd_oflags = O_CREAT; - } - -- if ( param->openFlags & IOR_WRONLY ) { -- if ( !param->filePerProc ) { -+ if ( flags & IOR_WRONLY ) { -+ if ( ! hints->filePerProc ) { - - // in N-1 mode, only rank 0 truncates the file - if ( rank != 0 ) { -@@ -279,7 +399,7 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - * Now see if O_DIRECT is needed. - */ - -- if ( param->useO_DIRECT == TRUE ) { -+ if ( o->direct_io == TRUE ) { - hdfs_set_o_direct_flag( &fd_oflags ); - } - -@@ -290,10 +410,7 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - * truncate each other's writes - */ - -- if (( param->openFlags & IOR_WRONLY ) && -- ( !param->filePerProc ) && -- ( rank != 0 )) { -- -+ if (( flags & IOR_WRONLY ) && ( ! hints->filePerProc ) && ( rank != 0 )) { - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - } - -@@ -301,21 +418,16 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - * Now rank zero can open and truncate, if necessary. - */ - -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsOpenFile(0x%llx, %s, 0%o, %d, %d, %d)\n", -- param->hdfs_fs, -+ if (verbose >= VERBOSE_4) { -+ printf("\thdfsOpenFile(%p, %s, 0%o, %lld, %d, %lld)\n", -+ o->fs, - testFileName, - fd_oflags, /* shown in octal to compare w/ */ -- param->transferSize, -- param->hdfs_replicas, -- param->hdfs_block_size); -- } -- hdfs_file = hdfsOpenFile( param->hdfs_fs, -- testFileName, -- fd_oflags, -- param->transferSize, -- param->hdfs_replicas, -- param->hdfs_block_size); -+ hints->transferSize, -+ o->replicas, -+ o->block_size); -+ } -+ hdfs_file = hdfsOpenFile( o->fs, testFileName, fd_oflags, hints->transferSize, o->replicas, o->block_size); - if ( ! hdfs_file ) { - ERR( "Failed to open the file" ); - } -@@ -324,14 +436,14 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - * For N-1 write, Rank 0 waits for the other ranks to open the file after it has. - */ - -- if (( param->openFlags & IOR_WRONLY ) && -- ( !param->filePerProc ) && -+ if (( flags & IOR_WRONLY ) && -+ ( !hints->filePerProc ) && - ( rank == 0 )) { - - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - } - -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Create_Or_Open\n"); - } - return ((void *) hdfs_file ); -@@ -341,36 +453,36 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign - * Create and open a file through the HDFS interface. - */ - --static void *HDFS_Create( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_4) { -+static aiori_fd_t *HDFS_Create(char *testFileName, int flags, aiori_mod_opt_t * param) { -+ if (verbose >= VERBOSE_4) { - printf("-> HDFS_Create\n"); - } - -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Create\n"); - } -- return HDFS_Create_Or_Open( testFileName, param, TRUE ); -+ return HDFS_Create_Or_Open( testFileName, flags, param, TRUE ); - } - - /* - * Open a file through the HDFS interface. - */ --static void *HDFS_Open( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_4) { -+static aiori_fd_t *HDFS_Open(char *testFileName, int flags, aiori_mod_opt_t * param) { -+ if (verbose >= VERBOSE_4) { - printf("-> HDFS_Open\n"); - } - -- if ( param->openFlags & IOR_CREAT ) { -- if (param->verbose >= VERBOSE_4) { -+ if ( flags & IOR_CREAT ) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Open( ... TRUE)\n"); - } -- return HDFS_Create_Or_Open( testFileName, param, TRUE ); -+ return HDFS_Create_Or_Open( testFileName, flags, param, TRUE ); - } - else { -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Open( ... FALSE)\n"); - } -- return HDFS_Create_Or_Open( testFileName, param, FALSE ); -+ return HDFS_Create_Or_Open( testFileName, flags, param, FALSE ); - } - } - -@@ -378,19 +490,18 @@ static void *HDFS_Open( char *testFileName, IOR_param_t * param ) { - * Write or read to file using the HDFS interface. - */ - --static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) { -- if (param->verbose >= VERBOSE_4) { -- printf("-> HDFS_Xfer(acc:%d, file:0x%llx, buf:0x%llx, len:%llu, 0x%llx)\n", -+static IOR_offset_t HDFS_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param) { -+ if (verbose >= VERBOSE_4) { -+ printf("-> HDFS_Xfer(acc:%d, file:%p, buf:%p, len:%llu, %p)\n", - access, file, buffer, length, param); - } -- -+ hdfs_options_t * o = (hdfs_options_t*) param; - int xferRetries = 0; - long long remaining = (long long)length; - char* ptr = (char *)buffer; - long long rc; -- off_t offset = param->offset; -- hdfsFS hdfs_fs = param->hdfs_fs; /* (void*) */ -+ hdfsFS hdfs_fs = o->fs; /* (void*) */ - hdfsFile hdfs_file = (hdfsFile)file; /* (void*) */ - - -@@ -401,37 +512,34 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, - if (verbose >= VERBOSE_4) { - fprintf( stdout, "task %d writing to offset %lld\n", - rank, -- param->offset + length - remaining); -+ offset + length - remaining); - } - -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsWrite( 0x%llx, 0x%llx, 0x%llx, %lld)\n", -+ if (verbose >= VERBOSE_4) { -+ printf("\thdfsWrite( %p, %p, %p, %lld)\n", - hdfs_fs, hdfs_file, ptr, remaining ); /* DEBUGGING */ - } - rc = hdfsWrite( hdfs_fs, hdfs_file, ptr, remaining ); - if ( rc < 0 ) { - ERR( "hdfsWrite() failed" ); - } -- - offset += rc; - -- if ( param->fsyncPerWrite == TRUE ) { -- HDFS_Fsync( hdfs_file, param ); -+ if ( hints->fsyncPerWrite == TRUE ) { -+ HDFS_Fsync( file, param ); - } - } - else { /* READ or CHECK */ - if (verbose >= VERBOSE_4) { - fprintf( stdout, "task %d reading from offset %lld\n", -- rank, -- param->offset + length - remaining ); -+ rank, offset + length - remaining ); - } - -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsRead( 0x%llx, 0x%llx, 0x%llx, %lld)\n", -+ if (verbose >= VERBOSE_4) { -+ printf("\thdfsRead( %p, %p, %p, %lld)\n", - hdfs_fs, hdfs_file, ptr, remaining ); /* DEBUGGING */ - } -- rc = hdfsRead( hdfs_fs, hdfs_file, ptr, remaining ); -- -+ rc = hdfsPread(hdfs_fs, hdfs_file, offset, ptr, remaining); - if ( rc == 0 ) { - ERR( "hdfs_read() returned EOF prematurely" ); - } -@@ -449,9 +557,9 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, - rank, - access == WRITE ? "hdfsWrite()" : "hdfs_read()", - rc, remaining, -- param->offset + length - remaining ); -+ offset + length - remaining ); - -- if ( param->singleXferAttempt == TRUE ) { -+ if ( hints->singleXferAttempt == TRUE ) { - MPI_CHECK( MPI_Abort( MPI_COMM_WORLD, -1 ), "barrier error" ); - } - -@@ -467,7 +575,16 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, - xferRetries++; - } - -- if (param->verbose >= VERBOSE_4) { -+ if(access == WRITE){ -+ // flush user buffer, this makes the write visible to readers -+ // it is the expected semantics of read/writes -+ rc = hdfsHFlush(hdfs_fs, hdfs_file); -+ if(rc != 0){ -+ WARN("Error during flush"); -+ } -+ } -+ -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Xfer\n"); - } - return ( length ); -@@ -476,39 +593,17 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, - /* - * Perform hdfs_sync(). - */ -- --static void HDFS_Fsync( void *fd, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_4) { -- printf("-> HDFS_Fsync\n"); -- } -- hdfsFS hdfs_fs = param->hdfs_fs; /* (void *) */ -+static void HDFS_Fsync(aiori_fd_t * fd, aiori_mod_opt_t * param) { -+ hdfs_options_t * o = (hdfs_options_t*) param; -+ hdfsFS hdfs_fs = o->fs; /* (void *) */ - hdfsFile hdfs_file = (hdfsFile)fd; /* (void *) */ - --#if 0 -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsHSync(0x%llx, 0x%llx)\n", hdfs_fs, hdfs_file); -+ if (verbose >= VERBOSE_4) { -+ printf("\thdfsFlush(%p, %p)\n", hdfs_fs, hdfs_file); - } - if ( hdfsHSync( hdfs_fs, hdfs_file ) != 0 ) { -- EWARN( "hdfsHSync() failed" ); -- } --#elif 0 -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsHFlush(0x%llx, 0x%llx)\n", hdfs_fs, hdfs_file); -- } -- if ( hdfsHFlush( hdfs_fs, hdfs_file ) != 0 ) { -- EWARN( "hdfsHFlush() failed" ); -- } --#else -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsFlush(0x%llx, 0x%llx)\n", hdfs_fs, hdfs_file); -- } -- if ( hdfsFlush( hdfs_fs, hdfs_file ) != 0 ) { -- EWARN( "hdfsFlush() failed" ); -- } --#endif -- -- if (param->verbose >= VERBOSE_4) { -- printf("<- HDFS_Fsync\n"); -+ // Hsync is implemented to flush out data with newer Hadoop versions -+ WARN( "hdfsFlush() failed" ); - } - } - -@@ -516,27 +611,20 @@ static void HDFS_Fsync( void *fd, IOR_param_t * param ) { - * Close a file through the HDFS interface. - */ - --static void HDFS_Close( void *fd, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_4) { -+static void HDFS_Close(aiori_fd_t * fd, aiori_mod_opt_t * param) { -+ if (verbose >= VERBOSE_4) { - printf("-> HDFS_Close\n"); - } -+ hdfs_options_t * o = (hdfs_options_t*) param; - -- hdfsFS hdfs_fs = param->hdfs_fs; /* (void *) */ -+ hdfsFS hdfs_fs = o->fs; /* (void *) */ - hdfsFile hdfs_file = (hdfsFile)fd; /* (void *) */ - -- int open_flags; -- -- if ( param->openFlags & IOR_WRONLY ) { -- open_flags = O_CREAT | O_WRONLY; -- } else { -- open_flags = O_RDONLY; -- } -- - if ( hdfsCloseFile( hdfs_fs, hdfs_file ) != 0 ) { - ERR( "hdfsCloseFile() failed" ); - } - -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Close\n"); - } - } -@@ -547,119 +635,66 @@ static void HDFS_Close( void *fd, IOR_param_t * param ) { - * NOTE: The signature for ior_aiori.delete doesn't include a parameter to - * select recursive deletes. We'll assume that that is never needed. - */ --static void HDFS_Delete( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_4) { -+static void HDFS_Delete( char *testFileName, aiori_mod_opt_t * param ) { -+ if (verbose >= VERBOSE_4) { - printf("-> HDFS_Delete\n"); - } - -+ hdfs_options_t * o = (hdfs_options_t*) param; - char errmsg[256]; - - /* initialize file-system handle, if needed */ -- hdfs_connect( param ); -+ hdfs_connect(o); - -- if ( ! param->hdfs_fs ) -- ERR_SIMPLE( "Can't delete a file without an HDFS connection" ); -+ if ( ! o->fs ) -+ ERR( "Can't delete a file without an HDFS connection" ); - -- if ( hdfsDelete( param->hdfs_fs, testFileName, 0 ) != 0 ) { -- sprintf(errmsg, -- "[RANK %03d]: hdfsDelete() of file \"%s\" failed\n", -+ if ( hdfsDelete( o->fs, testFileName, 0 ) != 0 ) { -+ sprintf(errmsg, "[RANK %03d]: hdfsDelete() of file \"%s\" failed\n", - rank, testFileName); - -- EWARN( errmsg ); -+ WARN( errmsg ); - } -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_Delete\n"); - } - } - --/* -- * Determine api version. -- */ -- --static void HDFS_SetVersion( IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_4) { -- printf("-> HDFS_SetVersion\n"); -- } -- -- strcpy( param->apiVersion, param->api ); -- if (param->verbose >= VERBOSE_4) { -- printf("<- HDFS_SetVersion\n"); -- } --} -- - /* - * Use hdfsGetPathInfo() to get info about file? - * Is there an fstat we can use on hdfs? - * Should we just use POSIX fstat? - */ - --static IOR_offset_t --HDFS_GetFileSize(IOR_param_t * param, -- MPI_Comm testComm, -+static IOR_offset_t HDFS_GetFileSize(aiori_mod_opt_t * param, - char * testFileName) { -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("-> HDFS_GetFileSize(%s)\n", testFileName); - } -+ hdfs_options_t * o = (hdfs_options_t*) param; - - IOR_offset_t aggFileSizeFromStat; - IOR_offset_t tmpMin, tmpMax, tmpSum; - - /* make sure file-system is connected */ -- hdfs_connect( param ); -+ hdfs_connect( o ); - - /* file-info struct includes size in bytes */ -- if (param->verbose >= VERBOSE_4) { -- printf("\thdfsGetPathInfo(%s) ...", testFileName);fflush(stdout); -+ if (verbose >= VERBOSE_4) { -+ printf("\thdfsGetPathInfo(%s) ...", testFileName); -+ fflush(stdout); - } - -- hdfsFileInfo* info = hdfsGetPathInfo( param->hdfs_fs, testFileName ); -+ hdfsFileInfo* info = hdfsGetPathInfo( o->fs, testFileName ); - if ( ! info ) -- ERR_SIMPLE( "hdfsGetPathInfo() failed" ); -- if (param->verbose >= VERBOSE_4) { -+ ERR( "hdfsGetPathInfo() failed" ); -+ if (verbose >= VERBOSE_4) { - printf("done.\n");fflush(stdout); - } - - aggFileSizeFromStat = info->mSize; - -- if ( param->filePerProc == TRUE ) { -- if (param->verbose >= VERBOSE_4) { -- printf("\tall-reduce (1)\n"); -- } -- MPI_CHECK( -- MPI_Allreduce( -- &aggFileSizeFromStat, &tmpSum, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm ), -- "cannot total data moved" ); -- -- aggFileSizeFromStat = tmpSum; -- } -- else { -- if (param->verbose >= VERBOSE_4) { -- printf("\tall-reduce (2a)\n"); -- } -- MPI_CHECK( -- MPI_Allreduce( -- &aggFileSizeFromStat, &tmpMin, 1, MPI_LONG_LONG_INT, MPI_MIN, testComm ), -- "cannot total data moved" ); -- -- if (param->verbose >= VERBOSE_4) { -- printf("\tall-reduce (2b)\n"); -- } -- MPI_CHECK( -- MPI_Allreduce( -- &aggFileSizeFromStat, &tmpMax, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm ), -- "cannot total data moved" ); -- -- if ( tmpMin != tmpMax ) { -- if ( rank == 0 ) { -- WARN( "inconsistent file size by different tasks" ); -- } -- -- /* incorrect, but now consistent across tasks */ -- aggFileSizeFromStat = tmpMin; -- } -- } -- -- if (param->verbose >= VERBOSE_4) { -+ if (verbose >= VERBOSE_4) { - printf("<- HDFS_GetFileSize [%llu]\n", aggFileSizeFromStat); - } - return ( aggFileSizeFromStat ); -diff --git a/src/aiori-IME.c b/src/aiori-IME.c -index 500f380..896dd67 100755 ---- a/src/aiori-IME.c -+++ b/src/aiori-IME.c -@@ -21,8 +21,8 @@ - #include - #include - #include --#include /* sys_errlist */ --#include /* IO operations */ -+#include /* sys_errlist */ -+#include /* IO operations */ - - #include "ior.h" - #include "iordef.h" -@@ -30,63 +30,68 @@ - #include "utilities.h" - #include "ime_native.h" - --#ifndef O_BINARY /* Required on Windows */ -+#define IME_UNUSED(x) (void)(x) /* Silence compiler warnings */ -+ -+#ifndef O_BINARY /* Required on Windows */ - # define O_BINARY 0 - #endif - - /**************************** P R O T O T Y P E S *****************************/ - --static void *IME_Create(char *, IOR_param_t *); --static void *IME_Open(char *, IOR_param_t *); --static void IME_Close(void *, IOR_param_t *); --static void IME_Delete(char *, IOR_param_t *); --static char *IME_GetVersion(); --static void IME_Fsync(void *, IOR_param_t *); --static int IME_Access(const char *, int, IOR_param_t *); --static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *); --static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static int IME_StatFS(const char *, ior_aiori_statfs_t *, -- IOR_param_t *); --static int IME_RmDir(const char *, IOR_param_t *); --static int IME_MkDir(const char *, mode_t, IOR_param_t *); --static int IME_Stat(const char *, struct stat *, IOR_param_t *); -+aiori_fd_t *IME_Create(char *, int, aiori_mod_opt_t *); -+aiori_fd_t *IME_Open(char *, int, aiori_mod_opt_t *); -+void IME_Close(aiori_fd_t *, aiori_mod_opt_t *); -+void IME_Delete(char *, aiori_mod_opt_t *); -+char *IME_GetVersion(); -+void IME_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+int IME_Access(const char *, int, aiori_mod_opt_t *); -+IOR_offset_t IME_GetFileSize(aiori_mod_opt_t *, char *); -+IOR_offset_t IME_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, -+ IOR_offset_t, aiori_mod_opt_t *); -+int IME_Statfs(const char *, ior_aiori_statfs_t *, -+ aiori_mod_opt_t *); -+int IME_Rmdir(const char *, aiori_mod_opt_t *); -+int IME_Mkdir(const char *, mode_t, aiori_mod_opt_t *); -+int IME_Stat(const char *, struct stat *, aiori_mod_opt_t *); -+void IME_Xferhints(aiori_xfer_hint_t *params); - - #if (IME_NATIVE_API_VERSION >= 132) --static int IME_Mknod(char *); --static void IME_Sync(IOR_param_t *); -+int IME_Mknod(char *); -+void IME_Sync(aiori_mod_opt_t *param); - #endif - --static void IME_Initialize(); --static void IME_Finalize(); -+void IME_Initialize(); -+void IME_Finalize(); - -+/****************************** O P T I O N S *********************************/ - --/************************** O P T I O N S *****************************/ - typedef struct{ -- int direct_io; -+ int direct_io; - } ime_options_t; - -+option_help *IME_Options(aiori_mod_opt_t **init_backend_options, -+ aiori_mod_opt_t *init_values) -+{ -+ ime_options_t *o = malloc(sizeof(ime_options_t)); - --option_help * IME_options(void ** init_backend_options, void * init_values){ -- ime_options_t * o = malloc(sizeof(ime_options_t)); -+ if (init_values != NULL) -+ memcpy(o, init_values, sizeof(ime_options_t)); -+ else -+ o->direct_io = 0; - -- if (init_values != NULL){ -- memcpy(o, init_values, sizeof(ime_options_t)); -- }else{ -- o->direct_io = 0; -- } -+ *init_backend_options = (aiori_mod_opt_t*)o; - -- *init_backend_options = o; -+ option_help h[] = { -+ {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, -+ LAST_OPTION -+ }; -+ option_help *help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); - -- option_help h [] = { -- {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, -- LAST_OPTION -- }; -- option_help * help = malloc(sizeof(h)); -- memcpy(help, h, sizeof(h)); -- return help; -+ return help; - } - -+ - /************************** D E C L A R A T I O N S ***************************/ - - extern int rank; -@@ -100,19 +105,20 @@ ior_aiori_t ime_aiori = { - .create = IME_Create, - .open = IME_Open, - .xfer = IME_Xfer, -+ .xfer_hints = IME_Xferhints, - .close = IME_Close, - .delete = IME_Delete, - .get_version = IME_GetVersion, - .fsync = IME_Fsync, - .get_file_size = IME_GetFileSize, - .access = IME_Access, -- .statfs = IME_StatFS, -- .rmdir = IME_RmDir, -- .mkdir = IME_MkDir, -+ .statfs = IME_Statfs, -+ .rmdir = IME_Rmdir, -+ .mkdir = IME_Mkdir, - .stat = IME_Stat, - .initialize = IME_Initialize, - .finalize = IME_Finalize, -- .get_options = IME_options, -+ .get_options = IME_Options, - #if (IME_NATIVE_API_VERSION >= 132) - .sync = IME_Sync, - .mknod = IME_Mknod, -@@ -120,72 +126,92 @@ ior_aiori_t ime_aiori = { - .enable_mdtest = true, - }; - -+static aiori_xfer_hint_t *hints = NULL; -+static bool ime_initialized = false; -+ -+ - /***************************** F U N C T I O N S ******************************/ - -+void IME_Xferhints(aiori_xfer_hint_t *params) -+{ -+ hints = params; -+} -+ - /* - * Initialize IME (before MPI is started). - */ --static void IME_Initialize() -+void IME_Initialize() - { -+ if (ime_initialized) -+ return; -+ - ime_native_init(); -+ ime_initialized = true; - } - - /* - * Finlize IME (after MPI is shutdown). - */ --static void IME_Finalize() -+void IME_Finalize() - { -+ if (!ime_initialized) -+ return; -+ - (void)ime_native_finalize(); -+ ime_initialized = false; - } - - /* - * Try to access a file through the IME interface. - */ --static int IME_Access(const char *path, int mode, IOR_param_t *param) -+ -+int IME_Access(const char *path, int mode, aiori_mod_opt_t *module_options) - { -- (void)param; -+ IME_UNUSED(module_options); - - return ime_native_access(path, mode); - } - - /* -- * Creat and open a file through the IME interface. -+ * Create and open a file through the IME interface. - */ --static void *IME_Create(char *testFileName, IOR_param_t *param) -+aiori_fd_t *IME_Create(char *testFileName, int flags, aiori_mod_opt_t *param) - { -- return IME_Open(testFileName, param); -+ return IME_Open(testFileName, flags, param); - } - - /* - * Open a file through the IME interface. - */ --static void *IME_Open(char *testFileName, IOR_param_t *param) -+aiori_fd_t *IME_Open(char *testFileName, int flags, aiori_mod_opt_t *param) - { - int fd_oflag = O_BINARY; - int *fd; - -+ if (hints->dryRun) -+ return NULL; -+ - fd = (int *)malloc(sizeof(int)); - if (fd == NULL) - ERR("Unable to malloc file descriptor"); - -- ime_options_t * o = (ime_options_t*) param->backend_options; -- if (o->direct_io == TRUE){ -- set_o_direct_flag(&fd_oflag); -- } -+ ime_options_t *o = (ime_options_t*) param; -+ if (o->direct_io == TRUE) -+ set_o_direct_flag(&fd_oflag); - -- if (param->openFlags & IOR_RDONLY) -+ if (flags & IOR_RDONLY) - fd_oflag |= O_RDONLY; -- if (param->openFlags & IOR_WRONLY) -+ if (flags & IOR_WRONLY) - fd_oflag |= O_WRONLY; -- if (param->openFlags & IOR_RDWR) -+ if (flags & IOR_RDWR) - fd_oflag |= O_RDWR; -- if (param->openFlags & IOR_APPEND) -+ if (flags & IOR_APPEND) - fd_oflag |= O_APPEND; -- if (param->openFlags & IOR_CREAT) -+ if (flags & IOR_CREAT) - fd_oflag |= O_CREAT; -- if (param->openFlags & IOR_EXCL) -+ if (flags & IOR_EXCL) - fd_oflag |= O_EXCL; -- if (param->openFlags & IOR_TRUNC) -+ if (flags & IOR_TRUNC) - fd_oflag |= O_TRUNC; - - *fd = ime_native_open(testFileName, fd_oflag, 0664); -@@ -194,14 +220,14 @@ static void *IME_Open(char *testFileName, IOR_param_t *param) - ERR("cannot open file"); - } - -- return((void *)fd); -+ return (aiori_fd_t*) fd; - } - - /* - * Write or read access to file using the IM interface. - */ --static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, -- IOR_offset_t length, IOR_param_t *param) -+IOR_offset_t IME_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *param) - { - int xferRetries = 0; - long long remaining = (long long)length; -@@ -209,25 +235,28 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, - int fd = *(int *)file; - long long rc; - -+ if (hints->dryRun) -+ return length; -+ - while (remaining > 0) { - /* write/read file */ - if (access == WRITE) { /* WRITE */ - if (verbose >= VERBOSE_4) { - fprintf(stdout, "task %d writing to offset %lld\n", -- rank, param->offset + length - remaining); -+ rank, offset + length - remaining); - } - -- rc = ime_native_pwrite(fd, ptr, remaining, param->offset); -+ rc = ime_native_pwrite(fd, ptr, remaining, offset); - -- if (param->fsyncPerWrite) -- IME_Fsync(&fd, param); -+ if (hints->fsyncPerWrite) -+ IME_Fsync(file, param); - } else { /* READ or CHECK */ - if (verbose >= VERBOSE_4) { - fprintf(stdout, "task %d reading from offset %lld\n", -- rank, param->offset + length - remaining); -+ rank, offset + length - remaining); - } - -- rc = ime_native_pread(fd, ptr, remaining, param->offset); -+ rc = ime_native_pread(fd, ptr, remaining, offset); - if (rc == 0) - ERR("hit EOF prematurely"); - else if (rc < 0) -@@ -238,9 +267,9 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, - fprintf(stdout, "WARNING: Task %d, partial %s, %lld of " - "%lld bytes at offset %lld\n", - rank, access == WRITE ? "write" : "read", rc, -- remaining, param->offset + length - remaining ); -+ remaining, offset + length - remaining ); - -- if (param->singleXferAttempt) { -+ if (hints->singleXferAttempt) { - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), - "barrier error"); - } -@@ -264,7 +293,7 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, - /* - * Perform fsync(). - */ --static void IME_Fsync(void *fd, IOR_param_t *param) -+void IME_Fsync(aiori_fd_t *fd, aiori_mod_opt_t *param) - { - if (ime_native_fsync(*(int *)fd) != 0) - WARN("cannot perform fsync on file"); -@@ -273,33 +302,34 @@ static void IME_Fsync(void *fd, IOR_param_t *param) - /* - * Close a file through the IME interface. - */ --static void IME_Close(void *fd, IOR_param_t *param) -+void IME_Close(aiori_fd_t *file, aiori_mod_opt_t *param) - { -- if (ime_native_close(*(int *)fd) != 0) -- { -- free(fd); -- ERR("cannot close file"); -- } -- else -- free(fd); -+ if (hints->dryRun) -+ return; -+ -+ if (ime_native_close(*(int*)file) != 0) -+ ERRF("Cannot close file descriptor: %d", *(int*)file); -+ -+ free(file); - } - - /* - * Delete a file through the IME interface. - */ --static void IME_Delete(char *testFileName, IOR_param_t *param) -+void IME_Delete(char *testFileName, aiori_mod_opt_t *param) - { -- char errmsg[256]; -- sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n", -- rank, testFileName); -+ if (hints->dryRun) -+ return; -+ - if (ime_native_unlink(testFileName) != 0) -- WARN(errmsg); -+ WARNF("[RANK %03d]: cannot delete file \"%s\"\n", -+ rank, testFileName); - } - - /* - * Determine API version. - */ --static char *IME_GetVersion() -+char *IME_GetVersion() - { - static char ver[1024] = {}; - #if (IME_NATIVE_API_VERSION >= 120) -@@ -310,18 +340,17 @@ static char *IME_GetVersion() - return ver; - } - --static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, -- IOR_param_t *param) -+int IME_Statfs(const char *path, ior_aiori_statfs_t *stat_buf, -+ aiori_mod_opt_t *module_options) - { -- (void)param; -+ IME_UNUSED(module_options); - - #if (IME_NATIVE_API_VERSION >= 130) - struct statvfs statfs_buf; - - int ret = ime_native_statvfs(path, &statfs_buf); - if (ret) -- return ret; -- -+ return ret; - stat_buf->f_bsize = statfs_buf.f_bsize; - stat_buf->f_blocks = statfs_buf.f_blocks; - stat_buf->f_bfree = statfs_buf.f_bfree; -@@ -330,38 +359,37 @@ static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, - - return 0; - #else -- (void)path; -- (void)stat_buf; -+ IME_UNUSED(path); -+ IME_UNUSED(stat_buf); - - WARN("statfs is currently not supported in IME backend!"); - return -1; - #endif - } - -- --static int IME_MkDir(const char *path, mode_t mode, IOR_param_t *param) -+int IME_Mkdir(const char *path, mode_t mode, aiori_mod_opt_t * module_options) - { -- (void)param; -+ IME_UNUSED(module_options); - - #if (IME_NATIVE_API_VERSION >= 130) - return ime_native_mkdir(path, mode); - #else -- (void)path; -- (void)mode; -+ IME_UNUSED(path); -+ IME_UNUSED(mode); - - WARN("mkdir not supported in IME backend!"); - return -1; - #endif - } - --static int IME_RmDir(const char *path, IOR_param_t *param) -+int IME_Rmdir(const char *path, aiori_mod_opt_t *module_options) - { -- (void)param; -+ IME_UNUSED(module_options); - - #if (IME_NATIVE_API_VERSION >= 130) - return ime_native_rmdir(path); - #else -- (void)path; -+ IME_UNUSED(path); - - WARN("rmdir not supported in IME backend!"); - return -1; -@@ -371,9 +399,10 @@ static int IME_RmDir(const char *path, IOR_param_t *param) - /* - * Perform stat() through the IME interface. - */ --static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param) -+int IME_Stat(const char *path, struct stat *buf, -+ aiori_mod_opt_t *module_options) - { -- (void)param; -+ IME_UNUSED(module_options); - - return ime_native_stat(path, buf); - } -@@ -381,62 +410,39 @@ static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param) - /* - * Use IME stat() to return aggregate file size. - */ --static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm, -- char *testFileName) -+IOR_offset_t IME_GetFileSize(aiori_mod_opt_t *test, char *testFileName) - { - struct stat stat_buf; -- IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; - -- if (ime_native_stat(testFileName, &stat_buf) != 0) { -- ERR("cannot get status of written file"); -- } -- aggFileSizeFromStat = stat_buf.st_size; -- -- if (test->filePerProc) { -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, -- MPI_LONG_LONG_INT, MPI_SUM, testComm), -- "cannot total data moved"); -- aggFileSizeFromStat = tmpSum; -- } else { -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, -- MPI_LONG_LONG_INT, MPI_MIN, testComm), -- "cannot total data moved"); -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, -- MPI_LONG_LONG_INT, MPI_MAX, testComm), -- "cannot total data moved"); -- -- if (tmpMin != tmpMax) { -- if (rank == 0) { -- WARN("inconsistent file size by different tasks"); -- } -- /* incorrect, but now consistent across tasks */ -- aggFileSizeFromStat = tmpMin; -- } -- } -+ if (hints->dryRun) -+ return 0; - -- return(aggFileSizeFromStat); -+ if (ime_native_stat(testFileName, &stat_buf) != 0) -+ ERRF("cannot get status of written file %s", -+ testFileName); -+ return stat_buf.st_size; - } - - #if (IME_NATIVE_API_VERSION >= 132) - /* - * Create a file through mknod interface. - */ --static int IME_Mknod(char *testFileName) -+int IME_Mknod(char *testFileName) - { -- int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); -- if (ret < 0) -- ERR("mknod failed"); -+ int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); -+ if (ret < 0) -+ ERR("mknod failed"); - -- return ret; -+ return ret; - } - - /* - * Use IME sync to flush page cache of all opened files. - */ --static void IME_Sync(IOR_param_t * param) -+void IME_Sync(aiori_mod_opt_t *param) - { -- int ret = ime_native_sync(0); -- if (ret != 0) -- FAIL("Error executing the sync command."); -+ int ret = ime_native_sync(0); -+ if (ret != 0) -+ FAIL("Error executing the sync command."); - } - #endif -diff --git a/src/aiori-MMAP.c b/src/aiori-MMAP.c -index 7be860a..3777be5 100644 ---- a/src/aiori-MMAP.c -+++ b/src/aiori-MMAP.c -@@ -22,18 +22,20 @@ - - #include "ior.h" - #include "aiori.h" -+#include "aiori-POSIX.h" - #include "iordef.h" - #include "utilities.h" - - /**************************** P R O T O T Y P E S *****************************/ --static void *MMAP_Create(char *, IOR_param_t *); --static void *MMAP_Open(char *, IOR_param_t *); --static IOR_offset_t MMAP_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void MMAP_Close(void *, IOR_param_t *); --static void MMAP_Fsync(void *, IOR_param_t *); --static option_help * MMAP_options(void ** init_backend_options, void * init_values); -- -+static aiori_fd_t *MMAP_Create(char *, int flags, aiori_mod_opt_t *); -+static aiori_fd_t *MMAP_Open(char *, int flags, aiori_mod_opt_t *); -+static IOR_offset_t MMAP_Xfer(int, aiori_fd_t *, IOR_size_t *, -+ IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); -+static void MMAP_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void MMAP_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static option_help * MMAP_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); -+static void MMAP_xfer_hints(aiori_xfer_hint_t * params); -+static int MMAP_check_params(aiori_mod_opt_t * options); - /************************** D E C L A R A T I O N S ***************************/ - - ior_aiori_t mmap_aiori = { -@@ -43,10 +45,12 @@ ior_aiori_t mmap_aiori = { - .xfer = MMAP_Xfer, - .close = MMAP_Close, - .delete = POSIX_Delete, -+ .xfer_hints = MMAP_xfer_hints, - .get_version = aiori_get_version, - .fsync = MMAP_Fsync, - .get_file_size = POSIX_GetFileSize, - .get_options = MMAP_options, -+ .check_params = MMAP_check_params - }; - - /***************************** F U N C T I O N S ******************************/ -@@ -58,7 +62,7 @@ typedef struct{ - int madv_pattern; - } mmap_options_t; - --static option_help * MMAP_options(void ** init_backend_options, void * init_values){ -+static option_help * MMAP_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ - mmap_options_t * o = malloc(sizeof(mmap_options_t)); - - if (init_values != NULL){ -@@ -67,7 +71,7 @@ static option_help * MMAP_options(void ** init_backend_options, void * init_valu - memset(o, 0, sizeof(mmap_options_t)); - } - -- *init_backend_options = o; -+ *init_backend_options = (aiori_mod_opt_t*) o; - - option_help h [] = { - {0, "mmap.madv_dont_need", "Use advise don't need", OPTION_FLAG, 'd', & o->madv_dont_need}, -@@ -79,25 +83,38 @@ static option_help * MMAP_options(void ** init_backend_options, void * init_valu - return help; - } - --static void ior_mmap_file(int *file, IOR_param_t *param) -+static aiori_xfer_hint_t * hints = NULL; -+ -+static void MMAP_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+ POSIX_xfer_hints(params); -+} -+ -+static int MMAP_check_params(aiori_mod_opt_t * options){ -+ if (hints->fsyncPerWrite && (hints->transferSize & (sysconf(_SC_PAGESIZE) - 1))) -+ ERR("transfer size must be aligned with PAGESIZE for MMAP with fsyncPerWrite"); -+ return 0; -+} -+ -+static void ior_mmap_file(int *file, int mflags, void *param) - { - int flags = PROT_READ; -- IOR_offset_t size = param->expectedAggFileSize; -+ IOR_offset_t size = hints->expectedAggFileSize; - -- if (param->open == WRITE) -+ if (mflags & IOR_WRONLY || mflags & IOR_RDWR) - flags |= PROT_WRITE; -- mmap_options_t *o = (mmap_options_t*) param->backend_options; -+ mmap_options_t *o = (mmap_options_t*) param; - - o->mmap_ptr = mmap(NULL, size, flags, MAP_SHARED, - *file, 0); - if (o->mmap_ptr == MAP_FAILED) - ERR("mmap() failed"); - -- if (param->randomOffset) -+ if (hints->randomOffset) - flags = POSIX_MADV_RANDOM; - else - flags = POSIX_MADV_SEQUENTIAL; -- -+ - if(o->madv_pattern){ - if (posix_madvise(o->mmap_ptr, size, flags) != 0) - ERR("madvise() failed"); -@@ -112,48 +129,47 @@ static void ior_mmap_file(int *file, IOR_param_t *param) - } - - /* -- * Creat and open a file through the POSIX interface, then setup mmap. -+ * Create and open a file through the POSIX interface, then setup mmap. - */ --static void *MMAP_Create(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *MMAP_Create(char *testFileName, int flags, aiori_mod_opt_t * param) - { - int *fd; - -- fd = POSIX_Create(testFileName, param); -- if (ftruncate(*fd, param->expectedAggFileSize) != 0) -+ fd = (int*) POSIX_Create(testFileName, flags, param); -+ if (ftruncate(*fd, hints->expectedAggFileSize) != 0) - ERR("ftruncate() failed"); -- ior_mmap_file(fd, param); -- return ((void *)fd); -+ ior_mmap_file(fd, flags, param); -+ return ((aiori_fd_t *)fd); - } - - /* - * Open a file through the POSIX interface and setup mmap. - */ --static void *MMAP_Open(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *MMAP_Open(char *testFileName, int flags, aiori_mod_opt_t * param) - { - int *fd; -- -- fd = POSIX_Open(testFileName, param); -- ior_mmap_file(fd, param); -- return ((void *)fd); -+ fd = (int*) POSIX_Open(testFileName, flags, param); -+ ior_mmap_file(fd, flags, param); -+ return ((aiori_fd_t *)fd); - } - - /* - * Write or read access to file using mmap - */ --static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) -+static IOR_offset_t MMAP_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param) - { -- mmap_options_t *o = (mmap_options_t*) param->backend_options; -+ mmap_options_t *o = (mmap_options_t*) param; - if (access == WRITE) { -- memcpy(o->mmap_ptr + param->offset, buffer, length); -+ memcpy(o->mmap_ptr + offset, buffer, length); - } else { -- memcpy(buffer, o->mmap_ptr + param->offset, length); -+ memcpy(buffer, o->mmap_ptr + offset, length); - } - -- if (param->fsyncPerWrite == TRUE) { -- if (msync(o->mmap_ptr + param->offset, length, MS_SYNC) != 0) -+ if (hints->fsyncPerWrite == TRUE) { -+ if (msync(o->mmap_ptr + offset, length, MS_SYNC) != 0) - ERR("msync() failed"); -- if (posix_madvise(o->mmap_ptr + param->offset, length, -+ if (posix_madvise(o->mmap_ptr + offset, length, - POSIX_MADV_DONTNEED) != 0) - ERR("madvise() failed"); - } -@@ -163,20 +179,20 @@ static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer, - /* - * Perform msync(). - */ --static void MMAP_Fsync(void *fd, IOR_param_t * param) -+static void MMAP_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) - { -- mmap_options_t *o = (mmap_options_t*) param->backend_options; -- if (msync(o->mmap_ptr, param->expectedAggFileSize, MS_SYNC) != 0) -- EWARN("msync() failed"); -+ mmap_options_t *o = (mmap_options_t*) param; -+ if (msync(o->mmap_ptr, hints->expectedAggFileSize, MS_SYNC) != 0) -+ WARN("msync() failed"); - } - - /* - * Close a file through the POSIX interface, after tear down the mmap. - */ --static void MMAP_Close(void *fd, IOR_param_t * param) -+static void MMAP_Close(aiori_fd_t *fd, aiori_mod_opt_t * param) - { -- mmap_options_t *o = (mmap_options_t*) param->backend_options; -- if (munmap(o->mmap_ptr, param->expectedAggFileSize) != 0) -+ mmap_options_t *o = (mmap_options_t*) param; -+ if (munmap(o->mmap_ptr, hints->expectedAggFileSize) != 0) - ERR("munmap failed"); - o->mmap_ptr = NULL; - POSIX_Close(fd, param); -diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c -index 04c10be..059755e 100755 ---- a/src/aiori-MPIIO.c -+++ b/src/aiori-MPIIO.c -@@ -31,23 +31,56 @@ - - /**************************** P R O T O T Y P E S *****************************/ - --static IOR_offset_t SeekOffset(MPI_File, IOR_offset_t, IOR_param_t *); -+static IOR_offset_t SeekOffset(MPI_File, IOR_offset_t, aiori_mod_opt_t *); - --static void *MPIIO_Create(char *, IOR_param_t *); --static void *MPIIO_Open(char *, IOR_param_t *); --static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void MPIIO_Close(void *, IOR_param_t *); -+static aiori_fd_t *MPIIO_Create(char *, int iorflags, aiori_mod_opt_t *); -+static aiori_fd_t *MPIIO_Open(char *, int flags, aiori_mod_opt_t *); -+static IOR_offset_t MPIIO_Xfer(int, aiori_fd_t *, IOR_size_t *, -+ IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); -+static void MPIIO_Close(aiori_fd_t *, aiori_mod_opt_t *); - static char* MPIIO_GetVersion(); --static void MPIIO_Fsync(void *, IOR_param_t *); -- -+static void MPIIO_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static int MPIIO_check_params(aiori_mod_opt_t * options); - - /************************** D E C L A R A T I O N S ***************************/ - -+typedef struct{ -+ MPI_File fd; -+ MPI_Datatype transferType; /* datatype for transfer */ -+ MPI_Datatype contigType; /* elem datatype */ -+ MPI_Datatype fileType; /* filetype for file view */ -+} mpiio_fd_t; -+ -+static option_help * MPIIO_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ -+ mpiio_options_t * o = malloc(sizeof(mpiio_options_t)); -+ if (init_values != NULL){ -+ memcpy(o, init_values, sizeof(mpiio_options_t)); -+ }else{ -+ memset(o, 0, sizeof(mpiio_options_t)); -+ } -+ *init_backend_options = (aiori_mod_opt_t*) o; -+ -+ option_help h [] = { -+ {0, "mpiio.hintsFileName","Full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & o->hintsFileName}, -+ {0, "mpiio.showHints", "Show MPI hints", OPTION_FLAG, 'd', & o->showHints}, -+ {0, "mpiio.preallocate", "Preallocate file size", OPTION_FLAG, 'd', & o->preallocate}, -+ {0, "mpiio.useStridedDatatype", "put strided access into datatype", OPTION_FLAG, 'd', & o->useStridedDatatype}, -+ //{'P', NULL, "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & params->useSharedFilePointer}, -+ {0, "mpiio.useFileView", "Use MPI_File_set_view", OPTION_FLAG, 'd', & o->useFileView}, -+ LAST_OPTION -+ }; -+ option_help * help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); -+ return help; -+} -+ -+ - ior_aiori_t mpiio_aiori = { - .name = "MPIIO", - .name_legacy = NULL, - .create = MPIIO_Create, -+ .get_options = MPIIO_options, -+ .xfer_hints = MPIIO_xfer_hints, - .open = MPIIO_Open, - .xfer = MPIIO_Xfer, - .close = MPIIO_Close, -@@ -60,18 +93,46 @@ ior_aiori_t mpiio_aiori = { - .rmdir = aiori_posix_rmdir, - .access = MPIIO_Access, - .stat = aiori_posix_stat, -+ .check_params = MPIIO_check_params - }; - - /***************************** F U N C T I O N S ******************************/ -+static aiori_xfer_hint_t * hints = NULL; -+ -+void MPIIO_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+} -+ -+static int MPIIO_check_params(aiori_mod_opt_t * module_options){ -+ mpiio_options_t * param = (mpiio_options_t*) module_options; -+ if ((param->useFileView == TRUE) -+ && (sizeof(MPI_Aint) < 8) /* used for 64-bit datatypes */ -+ &&((hints->numTasks * hints->blockSize) > -+ (2 * (IOR_offset_t) GIBIBYTE))) -+ ERR("segment size must be < 2GiB"); -+ if (param->useSharedFilePointer) -+ ERR("shared file pointer not implemented"); -+ if (param->useStridedDatatype && (hints->blockSize < sizeof(IOR_size_t) -+ || hints->transferSize < -+ sizeof(IOR_size_t))) -+ ERR("need larger file size for strided datatype in MPIIO"); -+ if (hints->randomOffset && hints->collective) -+ ERR("random offset not available with collective MPIIO"); -+ if (hints->randomOffset && param->useFileView) -+ ERR("random offset not available with MPIIO fileviews"); -+ -+ return 0; -+} - - /* - * Try to access a file through the MPIIO interface. - */ --int MPIIO_Access(const char *path, int mode, IOR_param_t *param) -+int MPIIO_Access(const char *path, int mode, aiori_mod_opt_t *module_options) - { -- if(param->dryRun){ -+ if(hints->dryRun){ - return MPI_SUCCESS; - } -+ mpiio_options_t * param = (mpiio_options_t*) module_options; - MPI_File fd; - int mpi_mode = MPI_MODE_UNIQUE_OPEN; - MPI_Info mpiHints = MPI_INFO_NULL; -@@ -98,60 +159,49 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) - /* - * Create and open a file through the MPIIO interface. - */ --static void *MPIIO_Create(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *MPIIO_Create(char *testFileName, int iorflags, aiori_mod_opt_t * module_options) - { -- if(param->dryRun){ -- return 0; -- } -- return MPIIO_Open(testFileName, param); -+ return MPIIO_Open(testFileName, iorflags, module_options); - } - - /* - * Open a file through the MPIIO interface. Setup file view. - */ --static void *MPIIO_Open(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *MPIIO_Open(char *testFileName, int flags, aiori_mod_opt_t * module_options) - { -+ mpiio_options_t * param = (mpiio_options_t*) module_options; - int fd_mode = (int)0, - offsetFactor, - tasksPerFile, -- transfersPerBlock = param->blockSize / param->transferSize; -- struct fileTypeStruct { -- int globalSizes[2], localSizes[2], startIndices[2]; -- } fileTypeStruct; -- MPI_File *fd; -- MPI_Comm comm; -- MPI_Info mpiHints = MPI_INFO_NULL; -+ transfersPerBlock = hints->blockSize / hints->transferSize; - -- fd = (MPI_File *) malloc(sizeof(MPI_File)); -- if (fd == NULL) -- ERR("malloc failed()"); - -- *fd = 0; -+ mpiio_fd_t * mfd = malloc(sizeof(mpiio_fd_t)); -+ memset(mfd, 0, sizeof(mpiio_fd_t)); -+ MPI_Comm comm; -+ MPI_Info mpiHints = MPI_INFO_NULL; - - /* set IOR file flags to MPIIO flags */ - /* -- file open flags -- */ -- if (param->openFlags & IOR_RDONLY) { -+ if (flags & IOR_RDONLY) { - fd_mode |= MPI_MODE_RDONLY; - } -- if (param->openFlags & IOR_WRONLY) { -+ if (flags & IOR_WRONLY) { - fd_mode |= MPI_MODE_WRONLY; - } -- if (param->openFlags & IOR_RDWR) { -+ if (flags & IOR_RDWR) { - fd_mode |= MPI_MODE_RDWR; - } -- if (param->openFlags & IOR_APPEND) { -+ if (flags & IOR_APPEND) { - fd_mode |= MPI_MODE_APPEND; - } -- if (param->openFlags & IOR_CREAT) { -+ if (flags & IOR_CREAT) { - fd_mode |= MPI_MODE_CREATE; - } -- if (param->openFlags & IOR_EXCL) { -+ if (flags & IOR_EXCL) { - fd_mode |= MPI_MODE_EXCL; - } -- if (param->openFlags & IOR_TRUNC) { -- fprintf(stdout, "File truncation not implemented in MPIIO\n"); -- } -- if (param->openFlags & IOR_DIRECT) { -+ if (flags & IOR_DIRECT) { - fprintf(stdout, "O_DIRECT not implemented in MPIIO\n"); - } - -@@ -162,7 +212,7 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) - */ - fd_mode |= MPI_MODE_UNIQUE_OPEN; - -- if (param->filePerProc) { -+ if (hints->filePerProc) { - comm = MPI_COMM_SELF; - } else { - comm = testComm; -@@ -181,16 +231,19 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) - ShowHints(&mpiHints); - fprintf(stdout, "}\n"); - } -- if(! param->dryRun){ -- MPI_CHECKF(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd), -+ if(! hints->dryRun){ -+ MPI_CHECKF(MPI_File_open(comm, testFileName, fd_mode, mpiHints, & mfd->fd), - "cannot open file: %s", testFileName); -+ if (flags & IOR_TRUNC) { -+ MPI_CHECKF(MPI_File_set_size(mfd->fd, 0), "cannot truncate file: %s", testFileName); -+ } - } - - /* show hints actually attached to file handle */ -- if (rank == 0 && param->showHints && ! param->dryRun) { -+ if (rank == 0 && param->showHints && ! hints->dryRun) { - if (mpiHints != MPI_INFO_NULL) - MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); -- MPI_CHECK(MPI_File_get_info(*fd, &mpiHints), -+ MPI_CHECK(MPI_File_get_info(mfd->fd, &mpiHints), - "cannot get file info"); - fprintf(stdout, "\nhints returned from opened file {\n"); - ShowHints(&mpiHints); -@@ -198,78 +251,88 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) - } - - /* preallocate space for file */ -- if (param->preallocate && param->open == WRITE && ! param->dryRun) { -- MPI_CHECK(MPI_File_preallocate(*fd, -- (MPI_Offset) (param->segmentCount -+ if (param->preallocate && flags & IOR_CREAT && ! hints->dryRun) { -+ MPI_CHECK(MPI_File_preallocate(mfd->fd, -+ (MPI_Offset) (hints->segmentCount - * -- param->blockSize * -- param->numTasks)), -+ hints->blockSize * -+ hints->numTasks)), - "cannot preallocate file"); - } -+ -+ - /* create file view */ - if (param->useFileView) { -+ /* Create in-memory datatype */ -+ MPI_CHECK(MPI_Type_contiguous (hints->transferSize / sizeof(IOR_size_t), MPI_LONG_LONG_INT, & mfd->contigType), "cannot create contiguous datatype"); -+ MPI_CHECK(MPI_Type_create_resized( mfd->contigType, 0, 0, & mfd->transferType), "cannot create resized type"); -+ MPI_CHECK(MPI_Type_commit(& mfd->contigType), "cannot commit datatype"); -+ MPI_CHECK(MPI_Type_commit(& mfd->transferType), "cannot commit datatype"); -+ - /* create contiguous transfer datatype */ -- MPI_CHECK(MPI_Type_contiguous -- (param->transferSize / sizeof(IOR_size_t), -- MPI_LONG_LONG_INT, ¶m->transferType), -- "cannot create contiguous datatype"); -- MPI_CHECK(MPI_Type_commit(¶m->transferType), -- "cannot commit datatype"); -- if (param->filePerProc) { -+ -+ if (hints->filePerProc) { - offsetFactor = 0; - tasksPerFile = 1; - } else { -- offsetFactor = (rank + rankOffset) % param->numTasks; -- tasksPerFile = param->numTasks; -+ offsetFactor = (rank + rankOffset) % hints->numTasks; -+ tasksPerFile = hints->numTasks; - } - -- /* -- * create file type using subarray -- */ -- fileTypeStruct.globalSizes[0] = 1; -- fileTypeStruct.globalSizes[1] = -- transfersPerBlock * tasksPerFile; -- fileTypeStruct.localSizes[0] = 1; -- fileTypeStruct.localSizes[1] = transfersPerBlock; -- fileTypeStruct.startIndices[0] = 0; -- fileTypeStruct.startIndices[1] = -- transfersPerBlock * offsetFactor; -- -- MPI_CHECK(MPI_Type_create_subarray -- (2, fileTypeStruct.globalSizes, -- fileTypeStruct.localSizes, -- fileTypeStruct.startIndices, MPI_ORDER_C, -- param->transferType, ¶m->fileType), -- "cannot create subarray"); -- MPI_CHECK(MPI_Type_commit(¶m->fileType), -- "cannot commit datatype"); -- -- if(! param->dryRun){ -- MPI_CHECK(MPI_File_set_view(*fd, (MPI_Offset) 0, -- param->transferType, -- param->fileType, "native", -+ if(! hints->dryRun) { -+ if(! param->useStridedDatatype){ -+ struct fileTypeStruct { -+ int globalSizes[2], localSizes[2], startIndices[2]; -+ } fileTypeStruct; -+ -+ /* -+ * create file type using subarray -+ */ -+ fileTypeStruct.globalSizes[0] = 1; -+ fileTypeStruct.globalSizes[1] = transfersPerBlock * tasksPerFile; -+ fileTypeStruct.localSizes[0] = 1; -+ fileTypeStruct.localSizes[1] = transfersPerBlock; -+ fileTypeStruct.startIndices[0] = 0; -+ fileTypeStruct.startIndices[1] = transfersPerBlock * offsetFactor; -+ -+ MPI_CHECK(MPI_Type_create_subarray -+ (2, fileTypeStruct.globalSizes, -+ fileTypeStruct.localSizes, -+ fileTypeStruct.startIndices, MPI_ORDER_C, -+ mfd->contigType, & mfd->fileType), -+ "cannot create subarray"); -+ MPI_CHECK(MPI_Type_commit(& mfd->fileType), "cannot commit datatype"); -+ MPI_CHECK(MPI_File_set_view(mfd->fd, 0, -+ mfd->contigType, -+ mfd->fileType, -+ "native", - (MPI_Info) MPI_INFO_NULL), - "cannot set file view"); -+ }else{ -+ MPI_CHECK(MPI_Type_create_resized(mfd->contigType, 0, tasksPerFile * hints->blockSize, & mfd->fileType), "cannot create MPI_Type_create_hvector"); -+ MPI_CHECK(MPI_Type_commit(& mfd->fileType), "cannot commit datatype"); -+ } - } - } - if (mpiHints != MPI_INFO_NULL) - MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); -- return ((void *)fd); -+ return ((void *) mfd); - } - - /* - * Write or read access to file using the MPIIO interface. - */ --static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) -+static IOR_offset_t MPIIO_Xfer(int access, aiori_fd_t * fdp, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * module_options) - { - /* NOTE: The second arg is (void *) for reads, and (const void *) - for writes. Therefore, one of the two sets of assignments below - will get "assignment from incompatible pointer-type" warnings, - if we only use this one set of signatures. */ -- -- if(param->dryRun) -+ mpiio_options_t * param = (mpiio_options_t*) module_options; -+ if(hints->dryRun) - return length; -+ mpiio_fd_t * mfd = (mpiio_fd_t*) fdp; - - int (MPIAPI * Access) (MPI_File, void *, int, - MPI_Datatype, MPI_Status *); -@@ -313,42 +376,59 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, - * Access_ordered = MPI_File_read_ordered; - */ - } -- -+ - /* - * 'useFileView' uses derived datatypes and individual file pointers - */ - if (param->useFileView) { - /* find offset in file */ -- if (SeekOffset(*(MPI_File *) fd, param->offset, param) < -+ if (SeekOffset(mfd->fd, offset, module_options) < - 0) { - /* if unsuccessful */ - length = -1; - } else { -+ - /* -- * 'useStridedDatatype' fits multi-strided pattern into a datatype; -- * must use 'length' to determine repetitions (fix this for -- * multi-segments someday, WEL): -- * e.g., 'IOR -s 2 -b 32K -t 32K -a MPIIO -S' -- */ -+ * 'useStridedDatatype' fits multi-strided pattern into a datatype; -+ * must use 'length' to determine repetitions (fix this for -+ * multi-segments someday, WEL): -+ * e.g., 'IOR -s 2 -b 32K -t 32K -a MPIIO --mpiio.useStridedDatatype --mpiio.useFileView' -+ */ - if (param->useStridedDatatype) { -- length = param->segmentCount; -- } else { -- length = 1; -+ if(offset >= (rank+1) * hints->blockSize){ -+ /* we shall write only once per transferSize */ -+ /* printf("FAKE access %d %lld\n", rank, offset); */ -+ return hints->transferSize; -+ } -+ length = hints->segmentCount; -+ MPI_CHECK(MPI_File_set_view(mfd->fd, offset, -+ mfd->contigType, -+ mfd->fileType, -+ "native", -+ (MPI_Info) MPI_INFO_NULL), "cannot set file view"); -+ /* printf("ACCESS %d %lld -> %lld\n", rank, offset, length); */ -+ }else{ -+ length = 1; - } -- if (param->collective) { -+ if (hints->collective) { - /* individual, collective call */ - MPI_CHECK(Access_all -- (*(MPI_File *) fd, buffer, length, -- param->transferType, &status), -+ (mfd->fd, buffer, length, -+ mfd->transferType, &status), - "cannot access collective"); - } else { - /* individual, noncollective call */ - MPI_CHECK(Access -- (*(MPI_File *) fd, buffer, length, -- param->transferType, &status), -+ (mfd->fd, buffer, length, -+ mfd->transferType, &status), - "cannot access noncollective"); - } -- length *= param->transferSize; /* for return value in bytes */ -+ /* MPI-IO driver does "nontcontiguous" by transfering -+ * 'segment' regions of 'transfersize' bytes, but -+ * our caller WriteOrReadSingle does not know how to -+ * deal with us reporting that we wrote N times more -+ * data than requested. */ -+ length = hints->transferSize; - } - } else { - /* -@@ -358,7 +438,7 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, - if (param->useSharedFilePointer) { - /* find offset in file */ - if (SeekOffset -- (*(MPI_File *) fd, param->offset, param) < 0) { -+ (mfd->fd, offset, module_options) < 0) { - /* if unsuccessful */ - length = -1; - } else { -@@ -374,63 +454,65 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, - "useSharedFilePointer not implemented\n"); - } - } else { -- if (param->collective) { -+ if (hints->collective) { - /* explicit, collective call */ - MPI_CHECK(Access_at_all -- (*(MPI_File *) fd, param->offset, -+ (mfd->fd, offset, - buffer, length, MPI_BYTE, &status), - "cannot access explicit, collective"); - } else { - /* explicit, noncollective call */ - MPI_CHECK(Access_at -- (*(MPI_File *) fd, param->offset, -+ (mfd->fd, offset, - buffer, length, MPI_BYTE, &status), - "cannot access explicit, noncollective"); - } - } - } -- if((access == WRITE) && (param->fsyncPerWrite == TRUE)) -- MPIIO_Fsync(fd, param); -- return (length); -+ return hints->transferSize; - } - - /* - * Perform fsync(). - */ --static void MPIIO_Fsync(void *fdp, IOR_param_t * param) -+static void MPIIO_Fsync(aiori_fd_t *fdp, aiori_mod_opt_t * module_options) - { -- if(param->dryRun) -+ mpiio_options_t * param = (mpiio_options_t*) module_options; -+ if(hints->dryRun) - return; -- if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS) -- EWARN("fsync() failed"); -+ mpiio_fd_t * mfd = (mpiio_fd_t*) fdp; -+ if (MPI_File_sync(mfd->fd) != MPI_SUCCESS) -+ WARN("fsync() failed"); - } - - /* - * Close a file through the MPIIO interface. - */ --static void MPIIO_Close(void *fd, IOR_param_t * param) -+static void MPIIO_Close(aiori_fd_t *fdp, aiori_mod_opt_t * module_options) - { -- if(! param->dryRun){ -- MPI_CHECK(MPI_File_close((MPI_File *) fd), "cannot close file"); -+ mpiio_options_t * param = (mpiio_options_t*) module_options; -+ mpiio_fd_t * mfd = (mpiio_fd_t*) fdp; -+ if(! hints->dryRun){ -+ MPI_CHECK(MPI_File_close(& mfd->fd), "cannot close file"); - } -- if ((param->useFileView == TRUE) && (param->fd_fppReadCheck == NULL)) { -- /* -- * need to free the datatype, so done in the close process -- */ -- MPI_CHECK(MPI_Type_free(¶m->fileType), -- "cannot free MPI file datatype"); -- MPI_CHECK(MPI_Type_free(¶m->transferType), -- "cannot free MPI transfer datatype"); -+ if (param->useFileView == TRUE) { -+ /* -+ * need to free the datatype, so done in the close process -+ */ -+ MPI_CHECK(MPI_Type_free(& mfd->fileType), "cannot free MPI file datatype"); -+ MPI_CHECK(MPI_Type_free(& mfd->transferType), "cannot free MPI transfer datatype"); -+ MPI_CHECK(MPI_Type_free(& mfd->contigType), "cannot free type"); - } -- free(fd); -+ free(fdp); - } - - /* - * Delete a file through the MPIIO interface. - */ --void MPIIO_Delete(char *testFileName, IOR_param_t * param) -+void MPIIO_Delete(char *testFileName, aiori_mod_opt_t * module_options) - { -- if(param->dryRun) -+ mpiio_options_t * param = (mpiio_options_t*) module_options; -+ if(hints->dryRun) - return; - MPI_CHECKF(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL), - "cannot delete file: %s", testFileName); -@@ -452,36 +534,37 @@ static char* MPIIO_GetVersion() - * Seek to offset in file using the MPIIO interface. - */ - static IOR_offset_t SeekOffset(MPI_File fd, IOR_offset_t offset, -- IOR_param_t * param) -+ aiori_mod_opt_t * module_options) - { -+ mpiio_options_t * param = (mpiio_options_t*) module_options; - int offsetFactor, tasksPerFile; - IOR_offset_t tempOffset; - - tempOffset = offset; - -- if (param->filePerProc) { -+ if (hints->filePerProc) { - offsetFactor = 0; - tasksPerFile = 1; - } else { -- offsetFactor = (rank + rankOffset) % param->numTasks; -- tasksPerFile = param->numTasks; -+ offsetFactor = (rank + rankOffset) % hints->numTasks; -+ tasksPerFile = hints->numTasks; - } - if (param->useFileView) { - /* recall that offsets in a file view are - counted in units of transfer size */ -- if (param->filePerProc) { -- tempOffset = tempOffset / param->transferSize; -+ if (hints->filePerProc) { -+ tempOffset = tempOffset / hints->transferSize; - } else { - /* - * this formula finds a file view offset for a task - * from an absolute offset - */ -- tempOffset = ((param->blockSize / param->transferSize) -+ tempOffset = ((hints->blockSize / hints->transferSize) - * (tempOffset / -- (param->blockSize * tasksPerFile))) -- + (((tempOffset % (param->blockSize * tasksPerFile)) -- - (offsetFactor * param->blockSize)) -- / param->transferSize); -+ (hints->blockSize * tasksPerFile))) -+ + (((tempOffset % (hints->blockSize * tasksPerFile)) -+ - (offsetFactor * hints->blockSize)) -+ / hints->transferSize); - } - } - MPI_CHECK(MPI_File_seek(fd, tempOffset, MPI_SEEK_SET), -@@ -493,17 +576,17 @@ static IOR_offset_t SeekOffset(MPI_File fd, IOR_offset_t offset, - * Use MPI_File_get_size() to return aggregate file size. - * NOTE: This function is used by the HDF5 and NCMPI backends. - */ --IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, -- char *testFileName) -+IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * module_options, char *testFileName) - { -- if(test->dryRun) -+ mpiio_options_t * test = (mpiio_options_t*) module_options; -+ if(hints->dryRun) - return 0; - IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; - MPI_File fd; - MPI_Comm comm; - MPI_Info mpiHints = MPI_INFO_NULL; - -- if (test->filePerProc == TRUE) { -+ if (hints->filePerProc == TRUE) { - comm = MPI_COMM_SELF; - } else { - comm = testComm; -@@ -519,26 +602,5 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, - if (mpiHints != MPI_INFO_NULL) - MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); - -- if (test->filePerProc == TRUE) { -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, -- MPI_LONG_LONG_INT, MPI_SUM, testComm), -- "cannot total data moved"); -- aggFileSizeFromStat = tmpSum; -- } else { -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, -- MPI_LONG_LONG_INT, MPI_MIN, testComm), -- "cannot total data moved"); -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, -- MPI_LONG_LONG_INT, MPI_MAX, testComm), -- "cannot total data moved"); -- if (tmpMin != tmpMax) { -- if (rank == 0) { -- WARN("inconsistent file size by different tasks"); -- } -- /* incorrect, but now consistent across tasks */ -- aggFileSizeFromStat = tmpMin; -- } -- } -- - return (aggFileSizeFromStat); - } -diff --git a/src/aiori-NCMPI.c b/src/aiori-NCMPI.c -index 5fc1375..b098470 100755 ---- a/src/aiori-NCMPI.c -+++ b/src/aiori-NCMPI.c -@@ -32,33 +32,73 @@ - * NCMPI_CHECK will display a custom error message and then exit the program - */ - #define NCMPI_CHECK(NCMPI_RETURN, MSG) do { \ -+ int _NCMPI_RETURN = (NCMPI_RETURN); \ - \ -- if (NCMPI_RETURN < 0) { \ -+ if (_NCMPI_RETURN != NC_NOERR) { \ - fprintf(stdout, "** error **\n"); \ - fprintf(stdout, "ERROR in %s (line %d): %s.\n", \ - __FILE__, __LINE__, MSG); \ -- fprintf(stdout, "ERROR: %s.\n", ncmpi_strerror(NCMPI_RETURN)); \ -+ fprintf(stdout, "ERROR: %s.\n", ncmpi_strerror(_NCMPI_RETURN)); \ - fprintf(stdout, "** exiting **\n"); \ -- exit(-1); \ -+ exit(EXIT_FAILURE); \ - } \ - } while(0) - - /**************************** P R O T O T Y P E S *****************************/ - --static int GetFileMode(IOR_param_t *); -+static int GetFileMode(int flags); - --static void *NCMPI_Create(char *, IOR_param_t *); --static void *NCMPI_Open(char *, IOR_param_t *); --static IOR_offset_t NCMPI_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void NCMPI_Close(void *, IOR_param_t *); --static void NCMPI_Delete(char *, IOR_param_t *); -+static aiori_fd_t *NCMPI_Create(char *, int iorflags, aiori_mod_opt_t *); -+static aiori_fd_t *NCMPI_Open(char *, int iorflags, aiori_mod_opt_t *); -+static IOR_offset_t NCMPI_Xfer(int, aiori_fd_t *, IOR_size_t *, -+ IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); -+static void NCMPI_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void NCMPI_Delete(char *, aiori_mod_opt_t *); - static char *NCMPI_GetVersion(); --static void NCMPI_Fsync(void *, IOR_param_t *); --static IOR_offset_t NCMPI_GetFileSize(IOR_param_t *, MPI_Comm, char *); --static int NCMPI_Access(const char *, int, IOR_param_t *); -+static void NCMPI_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static IOR_offset_t NCMPI_GetFileSize(aiori_mod_opt_t *, char *); -+static int NCMPI_Access(const char *, int, aiori_mod_opt_t *); - - /************************** D E C L A R A T I O N S ***************************/ -+static aiori_xfer_hint_t * hints = NULL; -+ -+static void NCMPI_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+ -+ MPIIO_xfer_hints(params); -+} -+ -+typedef struct { -+ mpiio_options_t mpio; -+ -+ /* runtime variables */ -+ int var_id; /* variable id handle for data set */ -+ int firstReadCheck; -+ int startDataSet; -+} ncmpi_options_t; -+ -+ -+static option_help * NCMPI_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ -+ ncmpi_options_t * o = malloc(sizeof(ncmpi_options_t)); -+ if (init_values != NULL){ -+ memcpy(o, init_values, sizeof(ncmpi_options_t)); -+ }else{ -+ memset(o, 0, sizeof(ncmpi_options_t)); -+ } -+ *init_backend_options = (aiori_mod_opt_t*) o; -+ -+ option_help h [] = { -+ {0, "ncmpi.hintsFileName","Full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & o->mpio.hintsFileName}, -+ {0, "ncmpi.showHints", "Show MPI hints", OPTION_FLAG, 'd', & o->mpio.showHints}, -+ {0, "ncmpi.preallocate", "Preallocate file size", OPTION_FLAG, 'd', & o->mpio.preallocate}, -+ {0, "ncmpi.useStridedDatatype", "put strided access into datatype", OPTION_FLAG, 'd', & o->mpio.useStridedDatatype}, -+ {0, "ncmpi.useFileView", "Use MPI_File_set_view", OPTION_FLAG, 'd', & o->mpio.useFileView}, -+ LAST_OPTION -+ }; -+ option_help * help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); -+ return help; -+} - - ior_aiori_t ncmpi_aiori = { - .name = "NCMPI", -@@ -76,6 +116,8 @@ ior_aiori_t ncmpi_aiori = { - .rmdir = aiori_posix_rmdir, - .access = NCMPI_Access, - .stat = aiori_posix_stat, -+ .get_options = NCMPI_options, -+ .xfer_hints = NCMPI_xfer_hints, - }; - - /***************************** F U N C T I O N S ******************************/ -@@ -83,15 +125,16 @@ ior_aiori_t ncmpi_aiori = { - /* - * Create and open a file through the NCMPI interface. - */ --static void *NCMPI_Create(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *NCMPI_Create(char *testFileName, int iorflags, aiori_mod_opt_t * param) - { - int *fd; - int fd_mode; - MPI_Info mpiHints = MPI_INFO_NULL; -+ ncmpi_options_t * o = (ncmpi_options_t*) param; - - /* read and set MPI file hints from hintsFile */ -- SetHints(&mpiHints, param->hintsFileName); -- if (rank == 0 && param->showHints) { -+ SetHints(&mpiHints, o->mpio.hintsFileName); -+ if (rank == 0 && o->mpio.showHints) { - fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); - ShowHints(&mpiHints); - fprintf(stdout, "}\n"); -@@ -101,7 +144,7 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) - if (fd == NULL) - ERR("malloc() failed"); - -- fd_mode = GetFileMode(param); -+ fd_mode = GetFileMode(iorflags); - NCMPI_CHECK(ncmpi_create(testComm, testFileName, fd_mode, - mpiHints, fd), "cannot create file"); - -@@ -111,10 +154,9 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) - - #if defined(PNETCDF_VERSION_MAJOR) && (PNETCDF_VERSION_MAJOR > 1 || PNETCDF_VERSION_MINOR >= 2) - /* ncmpi_get_file_info is first available in 1.2.0 */ -- if (rank == 0 && param->showHints) { -+ if (rank == 0 && o->mpio.showHints) { - MPI_Info info_used; -- MPI_CHECK(ncmpi_get_file_info(*fd, &info_used), -- "cannot inquire file info"); -+ NCMPI_CHECK(ncmpi_get_file_info(*fd, &info_used), "cannot inquire file info"); - /* print the MPI file hints currently used */ - fprintf(stdout, "\nhints returned from opened file {\n"); - ShowHints(&info_used); -@@ -123,21 +165,22 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) - } - #endif - -- return (fd); -+ return (aiori_fd_t*)(fd); - } - - /* - * Open a file through the NCMPI interface. - */ --static void *NCMPI_Open(char *testFileName, IOR_param_t * param) -+static aiori_fd_t *NCMPI_Open(char *testFileName, int iorflags, aiori_mod_opt_t * param) - { - int *fd; - int fd_mode; - MPI_Info mpiHints = MPI_INFO_NULL; -+ ncmpi_options_t * o = (ncmpi_options_t*) param; - - /* read and set MPI file hints from hintsFile */ -- SetHints(&mpiHints, param->hintsFileName); -- if (rank == 0 && param->showHints) { -+ SetHints(&mpiHints, o->mpio.hintsFileName); -+ if (rank == 0 && o->mpio.showHints) { - fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); - ShowHints(&mpiHints); - fprintf(stdout, "}\n"); -@@ -147,7 +190,7 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) - if (fd == NULL) - ERR("malloc() failed"); - -- fd_mode = GetFileMode(param); -+ fd_mode = GetFileMode(iorflags); - NCMPI_CHECK(ncmpi_open(testComm, testFileName, fd_mode, - mpiHints, fd), "cannot open file"); - -@@ -157,7 +200,7 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) - - #if defined(PNETCDF_VERSION_MAJOR) && (PNETCDF_VERSION_MAJOR > 1 || PNETCDF_VERSION_MINOR >= 2) - /* ncmpi_get_file_info is first available in 1.2.0 */ -- if (rank == 0 && param->showHints) { -+ if (rank == 0 && o->mpio.showHints) { - MPI_Info info_used; - MPI_CHECK(ncmpi_get_file_info(*fd, &info_used), - "cannot inquire file info"); -@@ -169,51 +212,43 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) - } - #endif - -- return (fd); -+ return (aiori_fd_t*)(fd); - } - - /* - * Write or read access to file using the NCMPI interface. - */ --static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) -+static IOR_offset_t NCMPI_Xfer(int access, aiori_fd_t *fd, IOR_size_t * buffer, IOR_offset_t transferSize, IOR_offset_t offset, aiori_mod_opt_t * param) - { - signed char *bufferPtr = (signed char *)buffer; -- static int firstReadCheck = FALSE, startDataSet; -+ ncmpi_options_t * o = (ncmpi_options_t*) param; - int var_id, dim_id[NUM_DIMS]; -- MPI_Offset bufSize[NUM_DIMS], offset[NUM_DIMS]; -+ MPI_Offset bufSize[NUM_DIMS], offsets[NUM_DIMS]; - IOR_offset_t segmentPosition; - int segmentNum, transferNum; - - /* determine by offset if need to start data set */ -- if (param->filePerProc == TRUE) { -+ if (hints->filePerProc == TRUE) { - segmentPosition = (IOR_offset_t) 0; - } else { -- segmentPosition = -- (IOR_offset_t) ((rank + rankOffset) % param->numTasks) -- * param->blockSize; -+ segmentPosition = (IOR_offset_t) ((rank + rankOffset) % hints->numTasks) * hints->blockSize; - } -- if ((int)(param->offset - segmentPosition) == 0) { -- startDataSet = TRUE; -+ if ((int)(offset - segmentPosition) == 0) { -+ o->startDataSet = TRUE; - /* - * this toggle is for the read check operation, which passes through - * this function twice; note that this function will open a data set - * only on the first read check and close only on the second - */ - if (access == READCHECK) { -- if (firstReadCheck == TRUE) { -- firstReadCheck = FALSE; -- } else { -- firstReadCheck = TRUE; -- } -+ o->firstReadCheck = ! o->firstReadCheck; - } - } - -- if (startDataSet == TRUE && -- (access != READCHECK || firstReadCheck == TRUE)) { -+ if (o->startDataSet == TRUE && -+ (access != READCHECK || o->firstReadCheck == TRUE)) { - if (access == WRITE) { -- int numTransfers = -- param->blockSize / param->transferSize; -+ int numTransfers = hints->blockSize / hints->transferSize; - - /* reshape 1D array to 3D array: - [segmentCount*numTasks][numTransfers][transferSize] -@@ -229,7 +264,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, - "cannot define data set dimensions"); - NCMPI_CHECK(ncmpi_def_dim - (*(int *)fd, "transfer_size", -- param->transferSize, &dim_id[2]), -+ hints->transferSize, &dim_id[2]), - "cannot define data set dimensions"); - NCMPI_CHECK(ncmpi_def_var - (*(int *)fd, "data_var", NC_BYTE, NUM_DIMS, -@@ -244,80 +279,72 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, - "cannot retrieve data set variable"); - } - -- if (param->collective == FALSE) { -+ if (hints->collective == FALSE) { - NCMPI_CHECK(ncmpi_begin_indep_data(*(int *)fd), - "cannot enable independent data mode"); - } - -- param->var_id = var_id; -- startDataSet = FALSE; -+ o->var_id = var_id; -+ o->startDataSet = FALSE; - } - -- var_id = param->var_id; -+ var_id = o->var_id; - - /* calculate the segment number */ -- segmentNum = param->offset / (param->numTasks * param->blockSize); -+ segmentNum = offset / (hints->numTasks * hints->blockSize); - - /* calculate the transfer number in each block */ -- transferNum = param->offset % param->blockSize / param->transferSize; -+ transferNum = offset % hints->blockSize / hints->transferSize; - - /* read/write the 3rd dim of the dataset, each is of - amount param->transferSize */ - bufSize[0] = 1; - bufSize[1] = 1; -- bufSize[2] = param->transferSize; -+ bufSize[2] = transferSize; - -- offset[0] = segmentNum * param->numTasks + rank; -- offset[1] = transferNum; -- offset[2] = 0; -+ offsets[0] = segmentNum * hints->numTasks + rank; -+ offsets[1] = transferNum; -+ offsets[2] = 0; - - /* access the file */ - if (access == WRITE) { /* WRITE */ -- if (param->collective) { -+ if (hints->collective) { - NCMPI_CHECK(ncmpi_put_vara_schar_all -- (*(int *)fd, var_id, offset, bufSize, -- bufferPtr), -+ (*(int *)fd, var_id, offsets, bufSize, bufferPtr), - "cannot write to data set"); - } else { - NCMPI_CHECK(ncmpi_put_vara_schar -- (*(int *)fd, var_id, offset, bufSize, -- bufferPtr), -+ (*(int *)fd, var_id, offsets, bufSize, bufferPtr), - "cannot write to data set"); - } - } else { /* READ or CHECK */ -- if (param->collective == TRUE) { -+ if (hints->collective == TRUE) { - NCMPI_CHECK(ncmpi_get_vara_schar_all -- (*(int *)fd, var_id, offset, bufSize, -- bufferPtr), -+ (*(int *)fd, var_id, offsets, bufSize, bufferPtr), - "cannot read from data set"); - } else { - NCMPI_CHECK(ncmpi_get_vara_schar -- (*(int *)fd, var_id, offset, bufSize, -- bufferPtr), -+ (*(int *)fd, var_id, offsets, bufSize, bufferPtr), - "cannot read from data set"); - } - } - -- return (length); -+ return (transferSize); - } - - /* - * Perform fsync(). - */ --static void NCMPI_Fsync(void *fd, IOR_param_t * param) -+static void NCMPI_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) - { -- ; -+ NCMPI_CHECK(ncmpi_sync(*(int *)fd), "cannot sync file"); - } - - /* - * Close a file through the NCMPI interface. - */ --static void NCMPI_Close(void *fd, IOR_param_t * param) -+static void NCMPI_Close(aiori_fd_t *fd, aiori_mod_opt_t * param) - { -- if (param->collective == FALSE) { -- NCMPI_CHECK(ncmpi_end_indep_data(*(int *)fd), -- "cannot disable independent data mode"); -- } - NCMPI_CHECK(ncmpi_close(*(int *)fd), "cannot close file"); - free(fd); - } -@@ -325,9 +352,9 @@ static void NCMPI_Close(void *fd, IOR_param_t * param) - /* - * Delete a file through the NCMPI interface. - */ --static void NCMPI_Delete(char *testFileName, IOR_param_t * param) -+static void NCMPI_Delete(char *testFileName, aiori_mod_opt_t * param) - { -- return(MPIIO_Delete(testFileName, param)); -+ NCMPI_CHECK(ncmpi_delete(testFileName, MPI_INFO_NULL), "cannot delete file"); - } - - /* -@@ -341,39 +368,39 @@ static char* NCMPI_GetVersion() - /* - * Return the correct file mode for NCMPI. - */ --static int GetFileMode(IOR_param_t * param) -+static int GetFileMode(int flags) - { - int fd_mode = 0; - - /* set IOR file flags to NCMPI flags */ - /* -- file open flags -- */ -- if (param->openFlags & IOR_RDONLY) { -+ if (flags & IOR_RDONLY) { - fd_mode |= NC_NOWRITE; - } -- if (param->openFlags & IOR_WRONLY) { -- fprintf(stdout, "File write only not implemented in NCMPI\n"); -+ if (flags & IOR_WRONLY) { -+ WARN("File write only not implemented in NCMPI"); - } -- if (param->openFlags & IOR_RDWR) { -+ if (flags & IOR_RDWR) { - fd_mode |= NC_WRITE; - } -- if (param->openFlags & IOR_APPEND) { -- fprintf(stdout, "File append not implemented in NCMPI\n"); -+ if (flags & IOR_APPEND) { -+ WARN("File append not implemented in NCMPI"); - } -- if (param->openFlags & IOR_CREAT) { -+ if (flags & IOR_CREAT) { - fd_mode |= NC_CLOBBER; - } -- if (param->openFlags & IOR_EXCL) { -- fprintf(stdout, "Exclusive access not implemented in NCMPI\n"); -+ if (flags & IOR_EXCL) { -+ WARN("Exclusive access not implemented in NCMPI"); - } -- if (param->openFlags & IOR_TRUNC) { -- fprintf(stdout, "File truncation not implemented in NCMPI\n"); -+ if (flags & IOR_TRUNC) { -+ fd_mode |= NC_CLOBBER; - } -- if (param->openFlags & IOR_DIRECT) { -- fprintf(stdout, "O_DIRECT not implemented in NCMPI\n"); -+ if (flags & IOR_DIRECT) { -+ WARN("O_DIRECT not implemented in NCMPI"); - } - -- /* to enable > 4GB file size */ -- fd_mode |= NC_64BIT_OFFSET; -+ /* to enable > 4GB variable size */ -+ fd_mode |= NC_64BIT_DATA; - - return (fd_mode); - } -@@ -381,16 +408,16 @@ static int GetFileMode(IOR_param_t * param) - /* - * Use MPIIO call to get file size. - */ --static IOR_offset_t NCMPI_GetFileSize(IOR_param_t * test, MPI_Comm testComm, -+static IOR_offset_t NCMPI_GetFileSize(aiori_mod_opt_t * opt, - char *testFileName) - { -- return(MPIIO_GetFileSize(test, testComm, testFileName)); -+ return(MPIIO_GetFileSize(opt, testFileName)); - } - - /* - * Use MPIIO call to check for access. - */ --static int NCMPI_Access(const char *path, int mode, IOR_param_t *param) -+static int NCMPI_Access(const char *path, int mode, aiori_mod_opt_t *param) - { - return(MPIIO_Access(path, mode, param)); - } -diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c -new file mode 100644 -index 0000000..79b41b4 ---- /dev/null -+++ b/src/aiori-PMDK.c -@@ -0,0 +1,243 @@ -+/******************************************************************************\ -+ * * -+ * Copyright (c) 2019 EPCC, The University of Edinburgh * -+ * Written by Adrian Jackson a.jackson@epcc.ed.ac.uk * -+ * * -+ ******************************************************************************* -+ * * -+ * * -+ * This file implements the abstract I/O interface for the low-level PMDK API * -+ * * -+\******************************************************************************/ -+ -+ -+#include "aiori.h" /* abstract IOR interface */ -+#include /* sys_errlist */ -+#include /* only for fprintf() */ -+#include -+#include -+#include -+ -+ -+ -+static option_help options [] = { -+ LAST_OPTION -+}; -+ -+ -+/**************************** P R O T O T Y P E S *****************************/ -+ -+static option_help * PMDK_options(); -+static aiori_fd_t *PMDK_Create(char *,int iorflags, aiori_mod_opt_t *); -+static aiori_fd_t *PMDK_Open(char *, int iorflags, aiori_mod_opt_t *); -+static IOR_offset_t PMDK_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); -+static void PMDK_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+static void PMDK_Close(aiori_fd_t *, aiori_mod_opt_t *); -+static void PMDK_Delete(char *, aiori_mod_opt_t *); -+static IOR_offset_t PMDK_GetFileSize(aiori_mod_opt_t *, char *); -+ -+static aiori_xfer_hint_t * hints = NULL; -+ -+static void PMDK_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+} -+ -+/************************** D E C L A R A T I O N S ***************************/ -+ -+extern int errno; -+extern int rank; -+extern int rankOffset; -+extern int verbose; -+extern MPI_Comm testComm; -+ -+ior_aiori_t pmdk_aiori = { -+ .name = "PMDK", -+ .name_legacy = NULL, -+ .create = PMDK_Create, -+ .open = PMDK_Open, -+ .xfer = PMDK_Xfer, -+ .close = PMDK_Close, -+ .delete = PMDK_Delete, -+ .get_version = aiori_get_version, -+ .fsync = PMDK_Fsync, -+ .xfer_hints = PMDK_xfer_hints, -+ .get_file_size = PMDK_GetFileSize, -+ .statfs = aiori_posix_statfs, -+ .mkdir = aiori_posix_mkdir, -+ .rmdir = aiori_posix_rmdir, -+ .access = aiori_posix_access, -+ .stat = aiori_posix_stat, -+ .get_options = PMDK_options, -+ .enable_mdtest = false, -+}; -+ -+ -+/***************************** F U N C T I O N S ******************************/ -+ -+/******************************************************************************/ -+ -+static option_help * PMDK_options(){ -+ return options; -+} -+ -+ -+/* -+ * Create and open a memory space through the PMDK interface. -+ */ -+static aiori_fd_t *PMDK_Create(char * testFileName, int iorflags, aiori_mod_opt_t * param){ -+ char *pmemaddr = NULL; -+ int is_pmem; -+ size_t mapped_len; -+ size_t open_length; -+ -+ if(! hints->filePerProc){ -+ fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ open_length = hints->blockSize * hints->segmentCount; -+ -+ if((pmemaddr = pmem_map_file(testFileName, open_length, -+ PMEM_FILE_CREATE|PMEM_FILE_EXCL, -+ 0666, &mapped_len, &is_pmem)) == NULL) { -+ fprintf(stdout, "\nFailed to pmem_map_file for filename: %s in IOR_Create_PMDK\n", testFileName); -+ perror("pmem_map_file"); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ if(!is_pmem){ -+ fprintf(stdout, "\n is_pmem is %d\n",is_pmem); -+ fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ -+ -+ return((void *)pmemaddr); -+} /* PMDK_Create() */ -+ -+ -+/******************************************************************************/ -+/* -+ * Open a memory space through the PMDK interface. -+ */ -+static aiori_fd_t *PMDK_Open(char * testFileName,int iorflags, aiori_mod_opt_t * param){ -+ -+ char *pmemaddr = NULL; -+ int is_pmem; -+ size_t mapped_len; -+ size_t open_length; -+ -+ if(!hints->filePerProc){ -+ fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ open_length = hints->blockSize * hints->segmentCount; -+ -+ if((pmemaddr = pmem_map_file(testFileName, 0, -+ PMEM_FILE_EXCL, -+ 0666, &mapped_len, &is_pmem)) == NULL) { -+ fprintf(stdout, "\nFailed to pmem_map_file for filename: %s\n in IOR_Open_PMDK", testFileName); -+ perror("pmem_map_file"); -+ fprintf(stdout, "\n %ld %ld\n",open_length, mapped_len); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ if(!is_pmem){ -+ fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ return((void *)pmemaddr); -+} /* PMDK_Open() */ -+ -+ -+/******************************************************************************/ -+/* -+ * Write or read access to a memory space created with PMDK. Include drain/flush functionality. -+ */ -+ -+static IOR_offset_t PMDK_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param){ -+ int xferRetries = 0; -+ long long remaining = (long long)length; -+ char * ptr = (char *)buffer; -+ long long rc; -+ long long i; -+ long long offset_size; -+ -+ offset_size = offset; -+ -+ if(access == WRITE){ -+ if(hints->fsyncPerWrite){ -+ pmem_memcpy_nodrain(&file[offset_size], ptr, length); -+ }else{ -+ pmem_memcpy_persist(&file[offset_size], ptr, length); -+ } -+ }else{ -+ memcpy(ptr, &file[offset_size], length); -+ } -+ -+ return(length); -+} /* PMDK_Xfer() */ -+ -+ -+/******************************************************************************/ -+/* -+ * Perform fsync(). -+ */ -+ -+static void PMDK_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) -+{ -+ pmem_drain(); -+} /* PMDK_Fsync() */ -+ -+ -+/******************************************************************************/ -+/* -+ * Stub for close functionality that is not required for PMDK -+ */ -+ -+static void PMDK_Close(aiori_fd_t *fd, aiori_mod_opt_t * param){ -+ size_t open_length; -+ open_length = hints->transferSize; -+ pmem_unmap(fd, open_length); -+} /* PMDK_Close() */ -+ -+ -+/******************************************************************************/ -+/* -+ * Delete the file backing a memory space through PMDK -+ */ -+ -+static void PMDK_Delete(char *testFileName, aiori_mod_opt_t * param) -+{ -+ char errmsg[256]; -+ sprintf(errmsg,"[RANK %03d]:cannot delete file %s\n",rank,testFileName); -+ if (unlink(testFileName) != 0) WARN(errmsg); -+} /* PMDK_Delete() */ -+ -+/******************************************************************************/ -+/* -+ * Use POSIX stat() to return aggregate file size. -+ */ -+ -+static IOR_offset_t PMDK_GetFileSize(aiori_mod_opt_t * test, -+ char * testFileName) -+{ -+ struct stat stat_buf; -+ IOR_offset_t aggFileSizeFromStat, -+ tmpMin, tmpMax, tmpSum; -+ if (hints->filePerProc == FALSE) { -+ fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); -+ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); -+ } -+ -+ if (stat(testFileName, &stat_buf) != 0) { -+ ERR("cannot get status of written file"); -+ } -+ aggFileSizeFromStat = stat_buf.st_size; -+ -+ return(aggFileSizeFromStat); -+} /* PMDK_GetFileSize() */ -diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c -index 463a9c8..67ea4a8 100755 ---- a/src/aiori-POSIX.c -+++ b/src/aiori-POSIX.c -@@ -32,11 +32,14 @@ - #include - - --#ifdef HAVE_LINUX_LUSTRE_LUSTRE_USER_H --# include --#elif defined(HAVE_LUSTRE_LUSTRE_USER_H) --# include --#endif -+#ifdef HAVE_LUSTRE_USER -+# ifdef HAVE_LINUX_LUSTRE_LUSTRE_USER_H -+# include -+# elif defined(HAVE_LUSTRE_LUSTRE_USER_H) -+# include -+# endif -+#endif /* HAVE_LUSTRE_USER */ -+ - #ifdef HAVE_GPFS_H - # include - #endif -@@ -45,9 +48,9 @@ - #endif - - #ifdef HAVE_BEEGFS_BEEGFS_H --#include --#include --#include -+# include -+# include -+# include - #endif - - #include "ior.h" -@@ -55,6 +58,22 @@ - #include "iordef.h" - #include "utilities.h" - -+#include "aiori-POSIX.h" -+ -+#ifdef HAVE_GPU_DIRECT -+typedef long long loff_t; -+# include -+# include -+#endif -+ -+typedef struct { -+ int fd; -+#ifdef HAVE_GPU_DIRECT -+ CUfileHandle_t cf_handle; -+#endif -+} posix_fd; -+ -+ - #ifndef open64 /* necessary for TRU64 -- */ - # define open64 open /* unlikely, but may pose */ - #endif /* not open64 */ /* conflicting prototypes */ -@@ -67,32 +86,73 @@ - # define O_BINARY 0 - #endif - --/**************************** P R O T O T Y P E S *****************************/ --static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); --static void POSIX_Fsync(void *, IOR_param_t *); --static void POSIX_Sync(IOR_param_t * ); -+#ifdef HAVE_GPU_DIRECT -+static const char* cuFileGetErrorString(CUfileError_t status){ -+ if(IS_CUDA_ERR(status)){ -+ return cudaGetErrorString(status.err); -+ } -+ return strerror(status.err); -+} - --/************************** O P T I O N S *****************************/ --typedef struct{ -- /* in case of a change, please update depending MMAP module too */ -- int direct_io; --} posix_options_t; -+static void init_cufile(posix_fd * pfd){ -+ CUfileDescr_t cf_descr = (CUfileDescr_t){ -+ .handle.fd = pfd->fd, -+ .type = CU_FILE_HANDLE_TYPE_OPAQUE_FD -+ }; -+ CUfileError_t status = cuFileHandleRegister(& pfd->cf_handle, & cf_descr); -+ if(status.err != CU_FILE_SUCCESS){ -+ WARNF("Could not register handle %s", cuFileGetErrorString(status)); -+ } -+} -+#endif -+ -+/**************************** P R O T O T Y P E S *****************************/ -+static void POSIX_Initialize(aiori_mod_opt_t * options); -+static void POSIX_Finalize(aiori_mod_opt_t * options); - -+static IOR_offset_t POSIX_Xfer(int, aiori_fd_t *, IOR_size_t *, -+ IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); - --option_help * POSIX_options(void ** init_backend_options, void * init_values){ -+option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ - posix_options_t * o = malloc(sizeof(posix_options_t)); - - if (init_values != NULL){ - memcpy(o, init_values, sizeof(posix_options_t)); - }else{ -+ memset(o, 0, sizeof(posix_options_t)); - o->direct_io = 0; -+ o->lustre_stripe_count = -1; -+ o->lustre_start_ost = -1; -+ o->beegfs_numTargets = -1; -+ o->beegfs_chunkSize = -1; - } - -- *init_backend_options = o; -+ *init_backend_options = (aiori_mod_opt_t*) o; - - option_help h [] = { - {0, "posix.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, -+#ifdef HAVE_BEEGFS_BEEGFS_H -+ {0, "posix.beegfs.NumTargets", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->beegfs_numTargets}, -+ {0, "posix.beegfs.ChunkSize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->beegfs_chunkSize}, -+#endif -+#ifdef HAVE_GPFS_FCNTL_H -+ {0, "posix.gpfs.hintaccess", "", OPTION_FLAG, 'd', & o->gpfs_hint_access}, -+ {0, "posix.gpfs.releasetoken", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->gpfs_release_token}, -+#ifdef HAVE_GPFSFINEGRAINWRITESHARING_T -+ {0, "posix.gpfs.finegrainwritesharing", " Enable fine grain write sharing", OPTION_FLAG, 'd', & o->gpfs_finegrain_writesharing}, -+ {0, "posix.gpfs.finegrainreadsharing", " Enable fine grain read sharing", OPTION_FLAG, 'd', & o->gpfs_finegrain_readsharing}, -+#endif -+ -+#endif -+#ifdef HAVE_LUSTRE_USER -+ {0, "posix.lustre.stripecount", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_count}, -+ {0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size}, -+ {0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost}, -+ {0, "posix.lustre.ignorelocks", "", OPTION_FLAG, 'd', & o->lustre_ignore_locks}, -+#endif /* HAVE_LUSTRE_USER */ -+#ifdef HAVE_GPU_DIRECT -+ {0, "gpuDirect", "allocate I/O buffers on the GPU", OPTION_FLAG, 'd', & o->gpuDirect}, -+#endif - LAST_OPTION - }; - option_help * help = malloc(sizeof(h)); -@@ -107,27 +167,54 @@ option_help * POSIX_options(void ** init_backend_options, void * init_values){ - ior_aiori_t posix_aiori = { - .name = "POSIX", - .name_legacy = NULL, -+ .initialize = POSIX_Initialize, -+ .finalize = POSIX_Finalize, - .create = POSIX_Create, - .mknod = POSIX_Mknod, - .open = POSIX_Open, - .xfer = POSIX_Xfer, - .close = POSIX_Close, - .delete = POSIX_Delete, -+ .xfer_hints = POSIX_xfer_hints, - .get_version = aiori_get_version, - .fsync = POSIX_Fsync, - .get_file_size = POSIX_GetFileSize, - .statfs = aiori_posix_statfs, - .mkdir = aiori_posix_mkdir, - .rmdir = aiori_posix_rmdir, -+ .rename = POSIX_Rename, - .access = aiori_posix_access, - .stat = aiori_posix_stat, - .get_options = POSIX_options, - .enable_mdtest = true, -- .sync = POSIX_Sync -+ .sync = POSIX_Sync, -+ .check_params = POSIX_check_params - }; - - /***************************** F U N C T I O N S ******************************/ - -+static aiori_xfer_hint_t * hints = NULL; -+ -+void POSIX_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+} -+ -+int POSIX_check_params(aiori_mod_opt_t * param){ -+ posix_options_t * o = (posix_options_t*) param; -+ if (o->beegfs_chunkSize != -1 && (!ISPOWEROFTWO(o->beegfs_chunkSize) || o->beegfs_chunkSize < (1<<16))) -+ ERR("beegfsChunkSize must be a power of two and >64k"); -+ if(o->lustre_stripe_count != -1 || o->lustre_stripe_size != 0) -+ o->lustre_set_striping = 1; -+ if(o->gpuDirect && ! o->direct_io){ -+ ERR("GPUDirect required direct I/O to be used!"); -+ } -+#ifndef HAVE_GPU_DIRECT -+ if(o->gpuDirect){ -+ ERR("GPUDirect support is not compiled"); -+ } -+#endif -+ return 0; -+} - - #ifdef HAVE_GPFS_FCNTL_H - void gpfs_free_all_locks(int fd) -@@ -148,10 +235,10 @@ void gpfs_free_all_locks(int fd) - - rc = gpfs_fcntl(fd, &release_all); - if (verbose >= VERBOSE_0 && rc != 0) { -- EWARNF("gpfs_fcntl(%d, ...) release all locks hint failed.", fd); -+ WARNF("gpfs_fcntl(%d, ...) release all locks hint failed.", fd); - } - } --void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int access) -+void gpfs_access_start(int fd, IOR_offset_t length, IOR_offset_t offset, int access) - { - int rc; - struct { -@@ -165,17 +252,17 @@ void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int acce - - take_locks.access.structLen = sizeof(take_locks.access); - take_locks.access.structType = GPFS_ACCESS_RANGE; -- take_locks.access.start = param->offset; -+ take_locks.access.start = offset; - take_locks.access.length = length; - take_locks.access.isWrite = (access == WRITE); - - rc = gpfs_fcntl(fd, &take_locks); - if (verbose >= VERBOSE_2 && rc != 0) { -- EWARNF("gpfs_fcntl(%d, ...) access range hint failed.", fd); -+ WARNF("gpfs_fcntl(%d, ...) access range hint failed.", fd); - } - } - --void gpfs_access_end(int fd, IOR_offset_t length, IOR_param_t *param, int access) -+void gpfs_access_end(int fd, IOR_offset_t length, IOR_offset_t offset, int access) - { - int rc; - struct { -@@ -190,15 +277,70 @@ void gpfs_access_end(int fd, IOR_offset_t length, IOR_param_t *param, int access - - free_locks.free.structLen = sizeof(free_locks.free); - free_locks.free.structType = GPFS_FREE_RANGE; -- free_locks.free.start = param->offset; -+ free_locks.free.start = offset; - free_locks.free.length = length; - - rc = gpfs_fcntl(fd, &free_locks); - if (verbose >= VERBOSE_2 && rc != 0) { -- EWARNF("gpfs_fcntl(%d, ...) free range hint failed.", fd); -+ WARNF("gpfs_fcntl(%d, ...) free range hint failed.", fd); - } - } - -+#ifdef HAVE_GPFSFINEGRAINWRITESHARING_T -+/* This hint optimizes the performance of small strided -+ writes to a shared file from a parallel application */ -+void gpfs_fineGrainWriteSharing(int fd) -+{ -+ struct -+ { -+ gpfsFcntlHeader_t header; -+ gpfsFineGrainWriteSharing_t write; -+ } sharingHint; -+ int rc; -+ -+ sharingHint.header.totalLength = sizeof(sharingHint); -+ sharingHint.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; -+ sharingHint.header.fcntlReserved = 0; -+ -+ sharingHint.write.structLen = sizeof(sharingHint.write); -+ sharingHint.write.structType = GPFS_FINE_GRAIN_WRITE_SHARING; -+ sharingHint.write.fineGrainWriteSharing = 1; -+ sharingHint.write.taskId = -1; -+ sharingHint.write.totalTasks = -1; -+ sharingHint.write.recordSize = -1; -+ -+ rc = gpfs_fcntl(fd, &sharingHint); -+ if (verbose >= VERBOSE_2 && rc != 0) { -+ WARNF("gpfs_fcntl(%d, ...) fine grain write sharing hint failed.", fd); -+ } -+} -+ -+/* This hint optimizes the performance of small strided -+ reads from a shared file from a parallel application */ -+void gpfs_fineGrainReadSharing(int fd) -+{ -+ struct -+ { -+ gpfsFcntlHeader_t header; -+ gpfsPrefetch_t read; -+ } sharingHint; -+ int rc; -+ -+ sharingHint.header.totalLength = sizeof(sharingHint); -+ sharingHint.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; -+ sharingHint.header.fcntlReserved = 0; -+ -+ sharingHint.read.structLen = sizeof(sharingHint.read); -+ sharingHint.read.structType = GPFS_PREFETCH; -+ sharingHint.read.prefetchEnableRead = 0; -+ sharingHint.read.prefetchEnableWrite = 1; -+ -+ rc = gpfs_fcntl(fd, &sharingHint); -+ if (verbose >= VERBOSE_2 && rc != 0) { -+ WARNF("gpfs_fcntl(%d, ...) fine grain read sharing hint failed.", fd); -+ } -+} -+#endif - #endif - - #ifdef HAVE_BEEGFS_BEEGFS_H -@@ -315,126 +457,138 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun - #endif /* HAVE_BEEGFS_BEEGFS_H */ - - -+#ifdef HAVE_LUSTRE_USER -+void lustre_disable_file_locks(const int fd) { -+ int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; -+ if (verbose >= VERBOSE_1) { -+ INFO("** Disabling lustre range locking **\n"); -+ } -+ if (ioctl(fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) { -+ ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", fd); -+ } -+} -+#endif /* HAVE_LUSTRE_USER */ -+ - /* -- * Creat and open a file through the POSIX interface. -+ * Create and open a file through the POSIX interface. - */ --void *POSIX_Create(char *testFileName, IOR_param_t * param) -+aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) - { - int fd_oflag = O_BINARY; - int mode = 0664; -- int *fd; -- -- fd = (int *)malloc(sizeof(int)); -- if (fd == NULL) -- ERR("Unable to malloc file descriptor"); -- posix_options_t * o = (posix_options_t*) param->backend_options; -+ posix_fd * pfd = safeMalloc(sizeof(posix_fd)); -+ posix_options_t * o = (posix_options_t*) param; - if (o->direct_io == TRUE){ -- set_o_direct_flag(&fd_oflag); -+ set_o_direct_flag(& fd_oflag); - } - -- if(param->dryRun) -- return 0; -+ if(hints->dryRun) -+ return (aiori_fd_t*) 0; - --#ifdef HAVE_LUSTRE_LUSTRE_USER_H -+#ifdef HAVE_LUSTRE_USER - /* Add a #define for FASYNC if not available, as it forms part of - * the Lustre O_LOV_DELAY_CREATE definition. */ - #ifndef FASYNC - #define FASYNC 00020000 /* fcntl, for BSD compatibility */ - #endif -- -- if (param->lustre_set_striping) { -- /* In the single-shared-file case, task 0 has to creat the -- file with the Lustre striping options before any other processes -- open the file */ -- if (!param->filePerProc && rank != 0) { -+ if (o->lustre_set_striping) { -+ /* In the single-shared-file case, task 0 has to create the -+ file with the Lustre striping options before any other -+ processes open the file */ -+ if (!hints->filePerProc && rank != 0) { - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - fd_oflag |= O_RDWR; -- *fd = open64(testFileName, fd_oflag, mode); -- if (*fd < 0) -- ERRF("open64(\"%s\", %d, %#o) failed", -- testFileName, fd_oflag, mode); -+ pfd->fd = open64(testFileName, fd_oflag, mode); -+ if (pfd->fd < 0){ -+ ERRF("open64(\"%s\", %d, %#o) failed. Error: %s", -+ testFileName, fd_oflag, mode, strerror(errno)); -+ } - } else { - struct lov_user_md opts = { 0 }; - - /* Setup Lustre IOCTL striping pattern structure */ - opts.lmm_magic = LOV_USER_MAGIC; -- opts.lmm_stripe_size = param->lustre_stripe_size; -- opts.lmm_stripe_offset = param->lustre_start_ost; -- opts.lmm_stripe_count = param->lustre_stripe_count; -+ opts.lmm_stripe_size = o->lustre_stripe_size; -+ opts.lmm_stripe_offset = o->lustre_start_ost; -+ opts.lmm_stripe_count = o->lustre_stripe_count; - - /* File needs to be opened O_EXCL because we cannot set - * Lustre striping information on a pre-existing file.*/ - -- fd_oflag |= -- O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; -- *fd = open64(testFileName, fd_oflag, mode); -- if (*fd < 0) { -- fprintf(stdout, "\nUnable to open '%s': %s\n", -+ fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; -+ pfd->fd = open64(testFileName, fd_oflag, mode); -+ if (pfd->fd < 0) { -+ ERRF("Unable to open '%s': %s\n", - testFileName, strerror(errno)); -- MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), -- "MPI_Abort() error"); -- } else if (ioctl(*fd, LL_IOC_LOV_SETSTRIPE, &opts)) { -+ } else if (ioctl(pfd->fd, LL_IOC_LOV_SETSTRIPE, &opts)) { - char *errmsg = "stripe already set"; - if (errno != EEXIST && errno != EALREADY) - errmsg = strerror(errno); -- fprintf(stdout, -- "\nError on ioctl for '%s' (%d): %s\n", -- testFileName, *fd, errmsg); -- MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), -- "MPI_Abort() error"); -+ ERRF("Error on ioctl for '%s' (%d): %s\n", -+ testFileName, pfd->fd, errmsg); - } -- if (!param->filePerProc) -+ if (!hints->filePerProc) - MPI_CHECK(MPI_Barrier(testComm), - "barrier error"); - } - } else { --#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ -+#endif /* HAVE_LUSTRE_USER */ - - fd_oflag |= O_CREAT | O_RDWR; - - #ifdef HAVE_BEEGFS_BEEGFS_H -- if (beegfs_isOptionSet(param->beegfs_chunkSize) -- || beegfs_isOptionSet(param->beegfs_numTargets)) { -+ if (beegfs_isOptionSet(o->beegfs_chunkSize) -+ || beegfs_isOptionSet(o->beegfs_numTargets)) { - bool result = beegfs_createFilePath(testFileName, - mode, -- param->beegfs_numTargets, -- param->beegfs_chunkSize); -+ o->beegfs_numTargets, -+ o->beegfs_chunkSize); - if (result) { - fd_oflag &= ~O_CREAT; - } else { -- EWARN("BeeGFS tuning failed"); -+ WARN("BeeGFS tuning failed"); - } - } - #endif /* HAVE_BEEGFS_BEEGFS_H */ - -- *fd = open64(testFileName, fd_oflag, mode); -- if (*fd < 0) -- ERRF("open64(\"%s\", %d, %#o) failed", -- testFileName, fd_oflag, mode); -+ pfd->fd = open64(testFileName, fd_oflag, mode); -+ if (pfd->fd < 0){ -+ ERRF("open64(\"%s\", %d, %#o) failed. Error: %s", -+ testFileName, fd_oflag, mode, strerror(errno)); -+ } - --#ifdef HAVE_LUSTRE_LUSTRE_USER_H -+#ifdef HAVE_LUSTRE_USER - } - -- if (param->lustre_ignore_locks) { -- int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; -- if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) -- ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); -+ if (o->lustre_ignore_locks) { -+ lustre_disable_file_locks(pfd->fd); - } --#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ -+#endif /* HAVE_LUSTRE_USER */ - - #ifdef HAVE_GPFS_FCNTL_H - /* in the single shared file case, immediately release all locks, with - * the intent that we can avoid some byte range lock revocation: - * everyone will be writing/reading from individual regions */ -- if (param->gpfs_release_token ) { -- gpfs_free_all_locks(*fd); -+ if (o->gpfs_release_token ) { -+ gpfs_free_all_locks(pfd->fd); -+ } -+#ifdef HAVE_GPFSFINEGRAINWRITESHARING_T -+ /* Enable fine grain write sharing */ -+ if (o->gpfs_finegrain_writesharing) { -+ gpfs_fineGrainWriteSharing(pfd->fd); - } - #endif -- return ((void *)fd); -+#endif -+#ifdef HAVE_GPU_DIRECT -+ if(o->gpuDirect){ -+ init_cufile(pfd); -+ } -+#endif -+ return (aiori_fd_t*) pfd; - } - - /* -- * Creat a file through mknod interface. -+ * Create a file through mknod interface. - */ - int POSIX_Mknod(char *testFileName) - { -@@ -450,99 +604,122 @@ int POSIX_Mknod(char *testFileName) - /* - * Open a file through the POSIX interface. - */ --void *POSIX_Open(char *testFileName, IOR_param_t * param) -+aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) - { - int fd_oflag = O_BINARY; -- int *fd; -- -- fd = (int *)malloc(sizeof(int)); -- if (fd == NULL) -- ERR("Unable to malloc file descriptor"); -- -- posix_options_t * o = (posix_options_t*) param->backend_options; -- if (o->direct_io == TRUE) -+ if(flags & IOR_RDONLY){ -+ fd_oflag |= O_RDONLY; -+ }else if(flags & IOR_WRONLY){ -+ fd_oflag |= O_WRONLY; -+ }else{ -+ fd_oflag |= O_RDWR; -+ } -+ posix_fd * pfd = safeMalloc(sizeof(posix_fd)); -+ posix_options_t * o = (posix_options_t*) param; -+ if (o->direct_io == TRUE){ - set_o_direct_flag(&fd_oflag); -+ } - -- fd_oflag |= O_RDWR; -+ if(hints->dryRun) -+ return (aiori_fd_t*) 0; - -- if(param->dryRun) -- return 0; -+ pfd->fd = open64(testFileName, fd_oflag); -+ if (pfd->fd < 0) -+ ERRF("open64(\"%s\", %d) failed: %s", testFileName, fd_oflag, strerror(errno)); - -- *fd = open64(testFileName, fd_oflag); -- if (*fd < 0) -- ERRF("open64(\"%s\", %d) failed", testFileName, fd_oflag); -- --#ifdef HAVE_LUSTRE_LUSTRE_USER_H -- if (param->lustre_ignore_locks) { -- int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; -- if (verbose >= VERBOSE_1) { -- fprintf(stdout, -- "** Disabling lustre range locking **\n"); -- } -- if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) -- ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); -+#ifdef HAVE_LUSTRE_USER -+ if (o->lustre_ignore_locks) { -+ lustre_disable_file_locks(pfd->fd); - } --#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ -+#endif /* HAVE_LUSTRE_USER */ - - #ifdef HAVE_GPFS_FCNTL_H -- if(param->gpfs_release_token) { -- gpfs_free_all_locks(*fd); -+ if(o->gpfs_release_token) { -+ gpfs_free_all_locks(pfd->fd); -+ } -+#ifdef HAVE_GPFSFINEGRAINWRITESHARING_T -+ /* Enable fine grain read sharing */ -+ if (o->gpfs_finegrain_readsharing) { -+ gpfs_fineGrainReadSharing(pfd->fd); - } - #endif -- return ((void *)fd); -+#endif -+#ifdef HAVE_GPU_DIRECT -+ if(o->gpuDirect){ -+ init_cufile(pfd); -+ } -+#endif -+ return (aiori_fd_t*) pfd; - } - - /* - * Write or read access to file using the POSIX interface. - */ --static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, -- IOR_offset_t length, IOR_param_t * param) -+static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param) - { - int xferRetries = 0; - long long remaining = (long long)length; - char *ptr = (char *)buffer; - long long rc; - int fd; -+ posix_options_t * o = (posix_options_t*) param; - -- if(param->dryRun) -+ if(hints->dryRun) - return length; - -- fd = *(int *)file; -+ posix_fd * pfd = (posix_fd *) file; -+ fd = pfd->fd; - - #ifdef HAVE_GPFS_FCNTL_H -- if (param->gpfs_hint_access) { -- gpfs_access_start(fd, length, param, access); -+ if (o->gpfs_hint_access) { -+ gpfs_access_start(fd, length, offset, access); - } - #endif - - - /* seek to offset */ -- if (lseek64(fd, param->offset, SEEK_SET) == -1) -- ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, param->offset); -- -+ if (lseek64(fd, offset, SEEK_SET) == -1) -+ ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, offset); -+ off_t mem_offset = 0; - while (remaining > 0) { - /* write/read file */ - if (access == WRITE) { /* WRITE */ - if (verbose >= VERBOSE_4) { -- fprintf(stdout, -- "task %d writing to offset %lld\n", -+ INFOF("task %d writing to offset %lld\n", - rank, -- param->offset + length - remaining); -+ offset + length - remaining); -+ } -+#ifdef HAVE_GPU_DIRECT -+ if(o->gpuDirect){ -+ rc = cuFileWrite(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset); -+ }else{ -+#endif -+ rc = write(fd, ptr, remaining); -+#ifdef HAVE_GPU_DIRECT - } -- rc = write(fd, ptr, remaining); -+#endif - if (rc == -1) - ERRF("write(%d, %p, %lld) failed", - fd, (void*)ptr, remaining); -- if (param->fsyncPerWrite == TRUE) -- POSIX_Fsync(&fd, param); -+ if (hints->fsyncPerWrite == TRUE){ -+ POSIX_Fsync((aiori_fd_t*) &fd, param); -+ } - } else { /* READ or CHECK */ - if (verbose >= VERBOSE_4) { -- fprintf(stdout, -- "task %d reading from offset %lld\n", -+ INFOF("task %d reading from offset %lld\n", - rank, -- param->offset + length - remaining); -+ offset + length - remaining); -+ } -+#ifdef HAVE_GPU_DIRECT -+ if(o->gpuDirect){ -+ rc = cuFileRead(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset); -+ }else{ -+#endif -+ rc = read(fd, ptr, remaining); -+#ifdef HAVE_GPU_DIRECT - } -- rc = read(fd, ptr, remaining); -+#endif - if (rc == 0) - ERRF("read(%d, %p, %lld) returned EOF prematurely", - fd, (void*)ptr, remaining); -@@ -551,43 +728,38 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, - fd, (void*)ptr, remaining); - } - if (rc < remaining) { -- fprintf(stdout, -- "WARNING: Task %d, partial %s, %lld of %lld bytes at offset %lld\n", -+ WARNF("task %d, partial %s, %lld of %lld bytes at offset %lld\n", - rank, - access == WRITE ? "write()" : "read()", - rc, remaining, -- param->offset + length - remaining); -- if (param->singleXferAttempt == TRUE) -- MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), -- "barrier error"); -- if (xferRetries > MAX_RETRY) -+ offset + length - remaining); -+ if (xferRetries > MAX_RETRY || hints->singleXferAttempt) - ERR("too many retries -- aborting"); - } - assert(rc >= 0); - assert(rc <= remaining); - remaining -= rc; - ptr += rc; -+ mem_offset += rc; - xferRetries++; - } - #ifdef HAVE_GPFS_FCNTL_H -- if (param->gpfs_hint_access) { -- gpfs_access_end(fd, length, param, access); -+ if (o->gpfs_hint_access) { -+ gpfs_access_end(fd, length, offset, access); - } - #endif - return (length); - } - --/* -- * Perform fsync(). -- */ --static void POSIX_Fsync(void *fd, IOR_param_t * param) -+void POSIX_Fsync(aiori_fd_t *afd, aiori_mod_opt_t * param) - { -- if (fsync(*(int *)fd) != 0) -- EWARNF("fsync(%d) failed", *(int *)fd); -+ int fd = ((posix_fd*) afd)->fd; -+ if (fsync(fd) != 0) -+ WARNF("fsync(%d) failed", fd); - } - - --static void POSIX_Sync(IOR_param_t * param) -+void POSIX_Sync(aiori_mod_opt_t * param) - { - int ret = system("sync"); - if (ret != 0){ -@@ -599,35 +771,52 @@ static void POSIX_Sync(IOR_param_t * param) - /* - * Close a file through the POSIX interface. - */ --void POSIX_Close(void *fd, IOR_param_t * param) -+void POSIX_Close(aiori_fd_t *afd, aiori_mod_opt_t * param) - { -- if(param->dryRun) -+ if(hints->dryRun) - return; -- if (close(*(int *)fd) != 0) -- ERRF("close(%d) failed", *(int *)fd); -- free(fd); -+ posix_options_t * o = (posix_options_t*) param; -+ int fd = ((posix_fd*) afd)->fd; -+#ifdef HAVE_GPU_DIRECT -+ if(o->gpuDirect){ -+ cuFileHandleDeregister(((posix_fd*) afd)->cf_handle); -+ } -+#endif -+ if (close(fd) != 0){ -+ ERRF("close(%d) failed", fd); -+ } -+ free(afd); - } - - /* - * Delete a file through the POSIX interface. - */ --void POSIX_Delete(char *testFileName, IOR_param_t * param) -+void POSIX_Delete(char *testFileName, aiori_mod_opt_t * param) - { -- if(param->dryRun) -+ if(hints->dryRun) - return; - if (unlink(testFileName) != 0){ -- EWARNF("[RANK %03d]: unlink() of file \"%s\" failed\n", -- rank, testFileName); -+ WARNF("[RANK %03d]: unlink() of file \"%s\" failed", rank, testFileName); - } - } - -+int POSIX_Rename(const char * oldfile, const char * newfile, aiori_mod_opt_t * module_options){ -+ if(hints->dryRun) -+ return 0; -+ -+ if(rename(oldfile, newfile) != 0){ -+ WARNF("[RANK %03d]: rename() of file \"%s\" to \"%s\" failed", rank, oldfile, newfile); -+ return -1; -+ } -+ return 0; -+} -+ - /* - * Use POSIX stat() to return aggregate file size. - */ --IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, -- char *testFileName) -+IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName) - { -- if(test->dryRun) -+ if(hints->dryRun) - return 0; - struct stat stat_buf; - IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; -@@ -637,26 +826,17 @@ IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, - } - aggFileSizeFromStat = stat_buf.st_size; - -- if (test->filePerProc == TRUE) { -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, -- MPI_LONG_LONG_INT, MPI_SUM, testComm), -- "cannot total data moved"); -- aggFileSizeFromStat = tmpSum; -- } else { -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, -- MPI_LONG_LONG_INT, MPI_MIN, testComm), -- "cannot total data moved"); -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, -- MPI_LONG_LONG_INT, MPI_MAX, testComm), -- "cannot total data moved"); -- if (tmpMin != tmpMax) { -- if (rank == 0) { -- WARN("inconsistent file size by different tasks"); -- } -- /* incorrect, but now consistent across tasks */ -- aggFileSizeFromStat = tmpMin; -- } -- } -- - return (aggFileSizeFromStat); - } -+ -+void POSIX_Initialize(aiori_mod_opt_t * options){ -+#ifdef HAVE_GPU_DIRECT -+ CUfileError_t err = cuFileDriverOpen(); -+#endif -+} -+ -+void POSIX_Finalize(aiori_mod_opt_t * options){ -+#ifdef HAVE_GPU_DIRECT -+ CUfileError_t err = cuFileDriverClose(); -+#endif -+} -diff --git a/src/aiori-POSIX.h b/src/aiori-POSIX.h -new file mode 100644 -index 0000000..9a223b0 ---- /dev/null -+++ b/src/aiori-POSIX.h -@@ -0,0 +1,46 @@ -+#ifndef AIORI_POSIX_H -+#define AIORI_POSIX_H -+ -+#include "aiori.h" -+ -+/************************** O P T I O N S *****************************/ -+typedef struct{ -+ /* in case of a change, please update depending MMAP module too */ -+ int direct_io; -+ -+ /* Lustre variables */ -+ int lustre_set_striping; /* flag that we need to set lustre striping */ -+ int lustre_stripe_count; -+ int lustre_stripe_size; -+ int lustre_start_ost; -+ int lustre_ignore_locks; -+ -+ /* gpfs variables */ -+ int gpfs_hint_access; /* use gpfs "access range" hint */ -+ int gpfs_release_token; /* immediately release GPFS tokens after -+ creating or opening a file */ -+ int gpfs_finegrain_writesharing; /* Enable fine grain write sharing */ -+ int gpfs_finegrain_readsharing; /* Enable fine grain read sharing */ -+ -+ /* beegfs variables */ -+ int beegfs_numTargets; /* number storage targets to use */ -+ int beegfs_chunkSize; /* srtipe pattern for new files */ -+ int gpuDirect; -+} posix_options_t; -+ -+void POSIX_Sync(aiori_mod_opt_t * param); -+int POSIX_check_params(aiori_mod_opt_t * param); -+void POSIX_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -+int POSIX_check_params(aiori_mod_opt_t * options); -+aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * module_options); -+int POSIX_Mknod(char *testFileName); -+aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * module_options); -+IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName); -+void POSIX_Delete(char *testFileName, aiori_mod_opt_t * module_options); -+int POSIX_Rename(const char *oldfile, const char *newfile, aiori_mod_opt_t * module_options); -+void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * module_options); -+option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); -+void POSIX_xfer_hints(aiori_xfer_hint_t * params); -+ -+ -+#endif -diff --git a/src/aiori-S3.c b/src/aiori-S3-4c.c -similarity index 74% -rename from src/aiori-S3.c -rename to src/aiori-S3-4c.c -index 2c9a9af..fa32837 100755 ---- a/src/aiori-S3.c -+++ b/src/aiori-S3-4c.c -@@ -91,16 +91,6 @@ - - #include - #include --/* --#ifdef HAVE_LUSTRE_LUSTRE_USER_H --#include --#endif --*/ -- --#include "ior.h" --#include "aiori.h" --#include "iordef.h" -- - #include - - #include // from libxml2 -@@ -109,28 +99,28 @@ - #include "aws4c.h" // extended vers of "aws4c" lib for S3 via libcurl - #include "aws4c_extra.h" // utilities, e.g. for parsing XML in responses - -+#include "ior.h" -+#include "aiori.h" -+#include "aiori-debug.h" - -+extern int rank; -+extern MPI_Comm testComm; - -- --/* buffer is used to generate URLs, err_msgs, etc */ - #define BUFF_SIZE 1024 --static char buff[BUFF_SIZE]; -- - const int ETAG_SIZE = 32; -- - CURLcode rc; - --/* Any objects we create or delete will be under this bucket */ --const char* bucket_name = "ior"; -- - /* TODO: The following stuff goes into options! */ - /* REST/S3 variables */ - // CURL* curl; /* for libcurl "easy" fns (now managed by aws4c) */ --# define IOR_CURL_INIT 0x01 /* curl top-level inits were perfomed once? */ -+# define IOR_CURL_INIT 0x01 /* curl top-level inits were performed once? */ - # define IOR_CURL_NOCONTINUE 0x02 - # define IOR_CURL_S3_EMC_EXT 0x04 /* allow EMC extensions to S3? */ - --#ifdef USE_S3_AIORI -+#define MAX_UPLOAD_ID_SIZE 256 /* TODO don't know the actual value */ -+ -+ -+#ifdef USE_S3_4C_AIORI - # include - # include "aws4c.h" - #else -@@ -138,41 +128,60 @@ const char* bucket_name = "ior"; - typedef void IOBuf; /* unused, but needs a type */ - #endif - -- IOBuf* io_buf; /* aws4c places parsed header values here */ -- IOBuf* etags; /* accumulate ETags for N:1 parts */ - --/////////////////////////////////////////////// -+typedef struct { -+ /* Any objects we create or delete will be under this bucket */ -+ char* bucket_name; -+ char* user; -+ char* host; -+ /* Runtime data, this data isn't yet safe to allow concurrent access to multiple files, only open one file at a time */ -+ int curl_flags; -+ IOBuf* io_buf; /* aws4c places parsed header values here */ -+ IOBuf* etags; /* accumulate ETags for N:1 parts */ -+ size_t part_number; -+ char UploadId[MAX_UPLOAD_ID_SIZE]; /* key for multi-part-uploads */ -+ int written; /* did we write to the file */ -+} s3_options_t; - --/**************************** P R O T O T Y P E S *****************************/ --static void* S3_Create(char*, IOR_param_t*); --static void* S3_Open(char*, IOR_param_t*); --static IOR_offset_t S3_Xfer(int, void*, IOR_size_t*, IOR_offset_t, IOR_param_t*); --static void S3_Close(void*, IOR_param_t*); -+/////////////////////////////////////////////// - --static void* EMC_Create(char*, IOR_param_t*); --static void* EMC_Open(char*, IOR_param_t*); --static IOR_offset_t EMC_Xfer(int, void*, IOR_size_t*, IOR_offset_t, IOR_param_t*); --static void EMC_Close(void*, IOR_param_t*); -+static aiori_xfer_hint_t * hints = NULL; - --static void S3_Delete(char*, IOR_param_t*); --static void S3_Fsync(void*, IOR_param_t*); --static IOR_offset_t S3_GetFileSize(IOR_param_t*, MPI_Comm, char*); --static void S3_init(); --static void S3_finalize(); --static int S3_check_params(IOR_param_t *); -+static void S3_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+} - -+/**************************** P R O T O T Y P E S *****************************/ -+static aiori_fd_t* S3_Create(char *path, int iorflags, aiori_mod_opt_t * options); -+static aiori_fd_t* S3_Open(char *path, int flags, aiori_mod_opt_t * options); -+static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options); -+static void S3_Close(aiori_fd_t * afd, aiori_mod_opt_t * options); -+ -+static aiori_fd_t* EMC_Create(char *path, int iorflags, aiori_mod_opt_t * options); -+static aiori_fd_t* EMC_Open(char *path, int flags, aiori_mod_opt_t * options); -+static IOR_offset_t EMC_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options); -+static void EMC_Close(aiori_fd_t * afd, aiori_mod_opt_t * options); -+ -+static void S3_Delete(char *path, aiori_mod_opt_t * options); -+static void S3_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options); -+static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName); -+static void S3_init(aiori_mod_opt_t * options); -+static void S3_finalize(aiori_mod_opt_t * options); -+static int S3_check_params(aiori_mod_opt_t * options); -+static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); - - /************************** D E C L A R A T I O N S ***************************/ - - // "Pure S3" - // N:1 writes use multi-part upload - // N:N fails if "transfer-size" != "block-size" (because that requires "append") --ior_aiori_t s3_aiori = { -- .name = "S3", -+ior_aiori_t s3_4c_aiori = { -+ .name = "S3-4c", - .name_legacy = NULL, - .create = S3_Create, - .open = S3_Open, - .xfer = S3_Xfer, -+ .xfer_hints = S3_xfer_hints, - .close = S3_Close, - .delete = S3_Delete, - .get_version = aiori_get_version, -@@ -180,7 +189,9 @@ ior_aiori_t s3_aiori = { - .get_file_size = S3_GetFileSize, - .initialize = S3_init, - .finalize = S3_finalize, -- .check_params = S3_check_params -+ .check_params = S3_check_params, -+ .get_options = S3_options, -+ .enable_mdtest = true - }; - - // "S3", plus EMC-extensions enabled -@@ -193,7 +204,7 @@ ior_aiori_t s3_plus_aiori = { - .xfer = S3_Xfer, - .close = S3_Close, - .delete = S3_Delete, -- .set_version = S3_SetVersion, -+ .get_version = aiori_get_version, - .fsync = S3_Fsync, - .get_file_size = S3_GetFileSize, - .initialize = S3_init, -@@ -210,7 +221,7 @@ ior_aiori_t s3_emc_aiori = { - .xfer = EMC_Xfer, - .close = EMC_Close, - .delete = S3_Delete, -- .set_version = S3_SetVersion, -+ .get_version = aiori_get_version, - .fsync = S3_Fsync, - .get_file_size = S3_GetFileSize, - .initialize = S3_init, -@@ -218,33 +229,57 @@ ior_aiori_t s3_emc_aiori = { - }; - - --static void S3_init(){ -+static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ -+ s3_options_t * o = malloc(sizeof(s3_options_t)); -+ if (init_values != NULL){ -+ memcpy(o, init_values, sizeof(s3_options_t)); -+ }else{ -+ memset(o, 0, sizeof(s3_options_t)); -+ } -+ -+ *init_backend_options = (aiori_mod_opt_t*) o; -+ o->bucket_name = "ior"; -+ -+ option_help h [] = { -+ {0, "S3-4c.user", "The username (in ~/.awsAuth).", OPTION_OPTIONAL_ARGUMENT, 's', & o->user}, -+ {0, "S3-4C.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, -+ {0, "S3-4c.bucket-name", "The name of the bucket.", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_name}, -+ LAST_OPTION -+ }; -+ option_help * help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); -+ return help; -+} -+ -+ -+static void S3_init(aiori_mod_opt_t * options){ - /* This is supposed to be done before *any* threads are created. - * Could MPI_Init() create threads (or call multi-threaded - * libraries)? We'll assume so. */ - AWS4C_CHECK( aws_init() ); - } - --static void S3_finalize(){ -+static void S3_finalize(aiori_mod_opt_t * options){ - /* done once per program, after exiting all threads. - * NOTE: This fn doesn't return a value that can be checked for success. */ - aws_cleanup(); - } - --static int S3_check_params(IOR_param_t * test){ -+static int S3_check_params(aiori_mod_opt_t * test){ -+ if(! hints) return 0; - /* N:1 and N:N */ -- IOR_offset_t NtoN = test->filePerProc; -+ IOR_offset_t NtoN = hints->filePerProc; - IOR_offset_t Nto1 = ! NtoN; -- IOR_offset_t s = test->segmentCount; -- IOR_offset_t t = test->transferSize; -- IOR_offset_t b = test->blockSize; -+ IOR_offset_t s = hints->segmentCount; -+ IOR_offset_t t = hints->transferSize; -+ IOR_offset_t b = hints->blockSize; - - if (Nto1 && (s != 1) && (b != t)) { - ERR("N:1 (strided) requires xfer-size == block-size"); -- return 0; -+ return 1; - } - -- return 1; -+ return 0; - } - - /* modelled on similar macros in iordef.h */ -@@ -286,21 +321,21 @@ static int S3_check_params(IOR_param_t * test){ - * NOTE: Our custom version of aws4c can be configured so that connections - * are reused, instead of opened and closed on every operation. We - * do configure it that way, but you still need to call these -- * connect/disconnet functions, in order to insure that aws4c has -+ * connect/disconnect functions, in order to insure that aws4c has - * been configured. - * --------------------------------------------------------------------------- - */ - - --static void s3_connect( IOR_param_t* param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> s3_connect\n"); /* DEBUGGING */ -- } -+static void s3_connect( s3_options_t* param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> s3_connect\n"); /* DEBUGGING */ -+ //} - - if ( param->curl_flags & IOR_CURL_INIT ) { -- if (param->verbose >= VERBOSE_2) { -- printf("<- s3_connect [nothing to do]\n"); /* DEBUGGING */ -- } -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- s3_connect [nothing to do]\n"); /* DEBUGGING */ -+ //} - return; - } - -@@ -318,11 +353,11 @@ static void s3_connect( IOR_param_t* param ) { - // NOTE: These inits could be done in init_IORParam_t(), in ior.c, but - // would require conditional compilation, there. - -- aws_set_debug(param->verbose >= 4); -- aws_read_config(getenv("USER")); // requires ~/.awsAuth -+ aws_set_debug(0); // param->verbose >= 4 -+ aws_read_config(param->user); // requires ~/.awsAuth - aws_reuse_connections(1); - -- // initalize IOBufs. These are basically dynamically-extensible -+ // initialize IOBufs. These are basically dynamically-extensible - // linked-lists. "growth size" controls the increment of new memory - // allocated, whenever storage is used up. - param->io_buf = aws_iobuf_new(); -@@ -346,8 +381,8 @@ static void s3_connect( IOR_param_t* param ) { - // snprintf(buff, BUFF_SIZE, "10.140.0.%d", 15 + (rank % 4)); - // s3_set_host(buff); - -- snprintf(buff, BUFF_SIZE, "10.140.0.%d:9020", 15 + (rank % 4)); -- s3_set_host(buff); -+ //snprintf(options->buff, BUFF_SIZE, "10.140.0.%d:9020", 15 + (rank % 4)); -+ //s3_set_host(options->buff); - - #else - /* -@@ -366,23 +401,25 @@ static void s3_connect( IOR_param_t* param ) { - // s3_set_host( "10.143.0.1:80"); - #endif - -+ s3_set_host(param->host); -+ - // make sure test-bucket exists -- s3_set_bucket((char*)bucket_name); -+ s3_set_bucket((char*) param->bucket_name); - - if (rank == 0) { - AWS4C_CHECK( s3_head(param->io_buf, "") ); - if ( param->io_buf->code == 404 ) { // "404 Not Found" -- printf(" bucket '%s' doesn't exist\n", bucket_name); -+ printf(" bucket '%s' doesn't exist\n", param->bucket_name); - - AWS4C_CHECK( s3_put(param->io_buf, "") ); /* creates URL as bucket + obj */ - AWS4C_CHECK_OK( param->io_buf ); // assure "200 OK" -- printf("created bucket '%s'\n", bucket_name); -+ printf("created bucket '%s'\n", param->bucket_name); - } - else { // assure "200 OK" - AWS4C_CHECK_OK( param->io_buf ); - } - } -- MPI_CHECK(MPI_Barrier(param->testComm), "barrier error"); -+ MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - - - // Maybe allow EMC extensions to S3 -@@ -391,24 +428,22 @@ static void s3_connect( IOR_param_t* param ) { - // don't perform these inits more than once - param->curl_flags |= IOR_CURL_INIT; - -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- s3_connect [success]\n"); -- } -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- s3_connect [success]\n"); -+ //} - } - - static - void --s3_disconnect( IOR_param_t* param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> s3_disconnect\n"); -- } -- -+s3_disconnect( s3_options_t* param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> s3_disconnect\n"); -+ //} - // nothing to do here, if using new aws4c ... - -- if (param->verbose >= VERBOSE_2) { -- printf("<- s3_disconnect\n"); -- } -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- s3_disconnect\n"); -+ //} - } - - -@@ -416,8 +451,7 @@ s3_disconnect( IOR_param_t* param ) { - // After finalizing an S3 multi-part-upload, you must reset some things - // before you can use multi-part-upload again. This will also avoid (one - // particular set of) memory-leaks. --void --s3_MPU_reset(IOR_param_t* param) { -+void s3_MPU_reset(s3_options_t* param) { - aws_iobuf_reset(param->io_buf); - aws_iobuf_reset(param->etags); - param->part_number = 0; -@@ -453,46 +487,44 @@ s3_MPU_reset(IOR_param_t* param) { - * - */ - --static --void * --S3_Create_Or_Open_internal(char* testFileName, -- IOR_param_t* param, -- unsigned char createFile, -- int multi_part_upload_p ) { -- -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n", -- testFileName, createFile, multi_part_upload_p); -- } -+static aiori_fd_t * S3_Create_Or_Open_internal(char* testFileName, int openFlags, s3_options_t* param, int multi_part_upload_p ) { -+ unsigned char createFile = openFlags & IOR_CREAT; -+ -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n", -+ // testFileName, createFile, multi_part_upload_p); -+ //} - - /* initialize curl, if needed */ - s3_connect( param ); - - /* Check for unsupported flags */ -- if ( param->openFlags & IOR_EXCL ) { -- fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" ); -- } -- if ( param->useO_DIRECT == TRUE ) { -- fprintf( stdout, "Direct I/O mode is not implemented in S3\n" ); -- } -+ //if ( param->openFlags & IOR_EXCL ) { -+ // fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" ); -+ //} -+ //if ( param->useO_DIRECT == TRUE ) { -+ // fprintf( stdout, "Direct I/O mode is not implemented in S3\n" ); -+ //} - - // easier to think -- int n_to_n = param->filePerProc; -+ int n_to_n = hints->filePerProc; - int n_to_1 = ! n_to_n; - - /* check whether object needs reset to zero-length */ - int needs_reset = 0; - if (! multi_part_upload_p) - needs_reset = 1; /* so "append" can work */ -- else if ( param->openFlags & IOR_TRUNC ) -+ else if ( openFlags & IOR_TRUNC ) - needs_reset = 1; /* so "append" can work */ - else if (createFile) { - // AWS4C_CHECK( s3_head(param->io_buf, testFileName) ); - // if ( ! AWS4C_OK(param->io_buf) ) - needs_reset = 1; - } -- -- if ( param->open == WRITE ) { -+ char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ -+ param->written = 0; -+ if ( openFlags & IOR_WRONLY || openFlags & IOR_RDWR ) { -+ param->written = 1; - - /* initializations for N:1 or N:N writes using multi-part upload */ - if (multi_part_upload_p) { -@@ -522,23 +554,21 @@ S3_Create_Or_Open_internal(char* testFileName, - response->first->len, - NULL, NULL, 0); - if (doc == NULL) -- ERR_SIMPLE("Rank0 Failed to find POST response\n"); -+ ERR("Rank0 Failed to find POST response\n"); - - // navigate parsed XML-tree to find UploadId - xmlNode* root_element = xmlDocGetRootElement(doc); - const char* upload_id = find_element_named(root_element, (char*)"UploadId"); - if (! upload_id) -- ERR_SIMPLE("couldn't find 'UploadId' in returned XML\n"); -+ ERR("couldn't find 'UploadId' in returned XML\n"); - -- if (param->verbose >= VERBOSE_3) -- printf("got UploadId = '%s'\n", upload_id); -+ //if (param->verbose >= VERBOSE_3) -+ // printf("got UploadId = '%s'\n", upload_id); - - const size_t upload_id_len = strlen(upload_id); - if (upload_id_len > MAX_UPLOAD_ID_SIZE) { -- snprintf(buff, BUFF_SIZE, -- "UploadId length %d exceeds expected max (%d)", -- upload_id_len, MAX_UPLOAD_ID_SIZE); -- ERR_SIMPLE(buff); -+ snprintf(buff, BUFF_SIZE, "UploadId length %zd exceeds expected max (%d)", upload_id_len, MAX_UPLOAD_ID_SIZE); -+ ERR(buff); - } - - // save the UploadId we found -@@ -551,16 +581,15 @@ S3_Create_Or_Open_internal(char* testFileName, - - // For N:1, share UploadId across all ranks - if (n_to_1) -- MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm); -+ MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, testComm); - } - else - // N:1, and we're not rank0. recv UploadID from Rank 0 -- MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm); -+ MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, testComm); - } - - /* initializations for N:N or N:1 writes using EMC byte-range extensions */ - else { -- - /* maybe reset to zero-length, so "append" can work */ - if (needs_reset) { - -@@ -576,84 +605,48 @@ S3_Create_Or_Open_internal(char* testFileName, - } - } - -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Create_Or_Open\n"); -- } -- return ((void *) testFileName ); -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- S3_Create_Or_Open\n"); -+ //} -+ return ((aiori_fd_t *) testFileName ); - } - -+static aiori_fd_t * S3_Create( char *testFileName, int iorflags, aiori_mod_opt_t * param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Create\n"); -+ //} - -- --static --void * --S3_Create( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Create\n"); -- } -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Create\n"); -- } -- return S3_Create_Or_Open_internal( testFileName, param, TRUE, TRUE ); -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- S3_Create\n"); -+ //} -+ return S3_Create_Or_Open_internal( testFileName, iorflags, (s3_options_t*) param, TRUE ); - } --static --void * --EMC_Create( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> EMC_Create\n"); -- } -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- EMC_Create\n"); -- } -- return S3_Create_Or_Open_internal( testFileName, param, TRUE, FALSE ); --} -- -- - -+static aiori_fd_t * EMC_Create( char *testFileName, int iorflags, aiori_mod_opt_t * param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> EMC_Create\n"); -+ //} - -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- EMC_Create\n"); -+ //} -+ return S3_Create_Or_Open_internal( testFileName, iorflags, (s3_options_t*) param, FALSE ); -+} - -+static aiori_fd_t * S3_Open( char *testFileName, int flags, aiori_mod_opt_t * param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Open\n"); -+ //} - --static --void * --S3_Open( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Open\n"); -- } -- -- if ( param->openFlags & IOR_CREAT ) { -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Open( ... TRUE)\n"); -- } -- return S3_Create_Or_Open_internal( testFileName, param, TRUE, TRUE ); -- } -- else { -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Open( ... FALSE)\n"); -- } -- return S3_Create_Or_Open_internal( testFileName, param, FALSE, TRUE ); -- } -+ return S3_Create_Or_Open_internal( testFileName, flags, (s3_options_t*) param, TRUE ); - } --static --void * --EMC_Open( char *testFileName, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Open\n"); -- } - -- if ( param->openFlags & IOR_CREAT ) { -- if (param->verbose >= VERBOSE_2) { -- printf("<- EMC_Open( ... TRUE)\n"); -- } -- return S3_Create_Or_Open_internal( testFileName, param, TRUE, FALSE ); -- } -- else { -- if (param->verbose >= VERBOSE_2) { -- printf("<- EMC_Open( ... FALSE)\n"); -- } -- return S3_Create_Or_Open_internal( testFileName, param, FALSE, FALSE ); -- } -+static aiori_fd_t * EMC_Open( char *testFileName, int flags, aiori_mod_opt_t * param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Open\n"); -+ //} -+ -+ return S3_Create_Or_Open_internal( testFileName, flags, (s3_options_t*) param, FALSE ); - } - - -@@ -714,7 +707,7 @@ EMC_Open( char *testFileName, IOR_param_t * param ) { - * impose two scaling problems: (1) requires all ETags to be shipped at - * the BW available to a single process, (1) requires either that they - * all fit into memory of a single process, or be written to disk -- * (imposes additional BW contraints), or make a more-complex -+ * (imposes additional BW constraints), or make a more-complex - * interaction with a threaded curl writefunction, to present the - * appearance of a single thread to curl, whilst allowing streaming - * reception of non-local ETags. -@@ -730,39 +723,35 @@ EMC_Open( char *testFileName, IOR_param_t * param ) { - */ - - --static --IOR_offset_t --S3_Xfer_internal(int access, -- void* file, -+static IOR_offset_t S3_Xfer_internal(int access, -+ aiori_fd_t* file, - IOR_size_t* buffer, - IOR_offset_t length, -- IOR_param_t* param, -+ IOR_offset_t offset, -+ s3_options_t* param, - int multi_part_upload_p ) { -- -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", -- access, (char*)file, buffer, length, param); -- } -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", -+ // access, (char*)file, buffer, length, param); -+ //} - - char* fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */ - size_t remaining = (size_t)length; - char* data_ptr = (char *)buffer; -- off_t offset = param->offset; - - // easier to think -- int n_to_n = param->filePerProc; -+ int n_to_n = hints->filePerProc; - int n_to_1 = (! n_to_n); -- int segmented = (param->segmentCount == 1); -+ int segmented = (hints->segmentCount == 1); - - - if (access == WRITE) { /* WRITE */ -- -- if (verbose >= VERBOSE_3) { -- fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", -- rank, -- remaining, -- param->offset + length - remaining); -- } -+ //if (verbose >= VERBOSE_3) { -+ // fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", -+ // rank, -+ // remaining, -+ // param->offset + length - remaining); -+ //} - - - if (multi_part_upload_p) { -@@ -777,7 +766,7 @@ S3_Xfer_internal(int access, - // - // In the N:1 case, the global order of part-numbers we're writing - // depends on whether wer're writing strided or segmented, in -- // other words, how and are acutally -+ // other words, how and are actually - // positioning the parts being written. [See discussion at - // S3_Close_internal().] - // -@@ -790,11 +779,11 @@ S3_Xfer_internal(int access, - size_t part_number; - if (n_to_1) { - if (segmented) { // segmented -- size_t parts_per_rank = param->blockSize / param->transferSize; -+ size_t parts_per_rank = hints->blockSize / hints->transferSize; - part_number = (rank * parts_per_rank) + param->part_number; - } - else // strided -- part_number = (param->part_number * param->numTasks) + rank; -+ part_number = (param->part_number * hints->numTasks) + rank; - } - else - part_number = param->part_number; -@@ -804,14 +793,15 @@ S3_Xfer_internal(int access, - // if (verbose >= VERBOSE_3) { - // fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n", - // rank, -- // param->numTasks, -+ // hints->numTasks, - // (n_to_1 ? "N:1" : "N:N"), - // (segmented ? "segmented" : "strided"), - // part_number); - // } - -+ char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ - snprintf(buff, BUFF_SIZE, -- "%s?partNumber=%d&uploadId=%s", -+ "%s?partNumber=%zd&uploadId=%s", - fname, part_number, param->UploadId); - - // For performance, we append directly into the linked list -@@ -838,20 +828,20 @@ S3_Xfer_internal(int access, - // } - // } - -- if (verbose >= VERBOSE_3) { -- fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", -- rank, -- param->numTasks, -- (n_to_1 ? "N:1" : "N:N"), -- (segmented ? "segmented" : "strided"), -- offset, -- part_number, -- param->io_buf->eTag); // incl quote-marks at [0] and [len-1] -- } -+ //if (verbose >= VERBOSE_3) { -+ // fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", -+ // rank, -+ // hints->numTasks, -+ // (n_to_1 ? "N:1" : "N:N"), -+ // (segmented ? "segmented" : "strided"), -+ // offset, -+ // part_number, -+ // param->io_buf->eTag); // incl quote-marks at [0] and [len-1] -+ //} - if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ - fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", - rank, ETAG_SIZE); -- exit(1); -+ exit(EXIT_FAILURE); - } - - // save the eTag for later -@@ -862,9 +852,9 @@ S3_Xfer_internal(int access, - param->io_buf->eTag +1, - strlen(param->io_buf->eTag) -2); - // DEBUGGING -- if (verbose >= VERBOSE_4) { -- printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); -- } -+ //if (verbose >= VERBOSE_4) { -+ // printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); -+ //} - - // drop ptrs to , in param->io_buf - aws_iobuf_reset(param->io_buf); -@@ -885,7 +875,7 @@ S3_Xfer_internal(int access, - // than empty storage. - aws_iobuf_reset(param->io_buf); - aws_iobuf_append_static(param->io_buf, data_ptr, remaining); -- AWS4C_CHECK ( s3_put(param->io_buf, file) ); -+ AWS4C_CHECK ( s3_put(param->io_buf, (char*) file) ); - AWS4C_CHECK_OK( param->io_buf ); - - // drop ptrs to , in param->io_buf -@@ -893,18 +883,18 @@ S3_Xfer_internal(int access, - } - - -- if ( param->fsyncPerWrite == TRUE ) { -+ if ( hints->fsyncPerWrite == TRUE ) { - WARN("S3 doesn't support 'fsync'" ); /* does it? */ - } - - } - else { /* READ or CHECK */ - -- if (verbose >= VERBOSE_3) { -- fprintf( stdout, "rank %d reading from offset %lld\n", -- rank, -- param->offset + length - remaining ); -- } -+ //if (verbose >= VERBOSE_3) { -+ // fprintf( stdout, "rank %d reading from offset %lld\n", -+ // rank, -+ // hints->offset + length - remaining ); -+ //} - - // read specific byte-range from the object - // [This is included in the "pure" S3 spec.] -@@ -917,43 +907,45 @@ S3_Xfer_internal(int access, - // libcurl writefunction, invoked via aws4c. - aws_iobuf_reset(param->io_buf); - aws_iobuf_extend_static(param->io_buf, data_ptr, remaining); -- AWS4C_CHECK( s3_get(param->io_buf, file) ); -+ AWS4C_CHECK( s3_get(param->io_buf, (char*) file) ); - if (param->io_buf->code != 206) { /* '206 Partial Content' */ -+ char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ - snprintf(buff, BUFF_SIZE, - "Unexpected result (%d, '%s')", - param->io_buf->code, param->io_buf->result); -- ERR_SIMPLE(buff); -+ ERR(buff); - } - - // drop refs to , in param->io_buf - aws_iobuf_reset(param->io_buf); - } - -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Xfer\n"); -- } -+ //if (verbose >= VERBOSE_2) { -+ // printf("<- S3_Xfer\n"); -+ //} - return ( length ); - } - - --static --IOR_offset_t --S3_Xfer(int access, -- void* file, -+static IOR_offset_t S3_Xfer(int access, -+ aiori_fd_t* file, - IOR_size_t* buffer, - IOR_offset_t length, -- IOR_param_t* param ) { -- S3_Xfer_internal(access, file, buffer, length, param, TRUE); -+ IOR_offset_t offset, -+ aiori_mod_opt_t* param ) { -+ S3_Xfer_internal(access, file, buffer, length, offset, (s3_options_t*) param, TRUE); - } -+ -+ - static - IOR_offset_t - EMC_Xfer(int access, -- void* file, -+ aiori_fd_t* file, - IOR_size_t* buffer, - IOR_offset_t length, -- IOR_param_t* param ) { -- S3_Xfer_internal(access, file, buffer, length, param, FALSE); -+ IOR_offset_t offset, -+ aiori_mod_opt_t* param ) { -+ S3_Xfer_internal(access, file, buffer, length, offset, (s3_options_t*) param, FALSE); - } - - -@@ -992,16 +984,10 @@ EMC_Xfer(int access, - * MPI_COMM_WORLD. - */ - --static --void --S3_Fsync( void *fd, IOR_param_t * param ) { -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Fsync [no-op]\n"); -- } -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Fsync\n"); -- } -+static void S3_Fsync( aiori_fd_t *fd, aiori_mod_opt_t * param ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Fsync [no-op]\n"); -+ //} - } - - -@@ -1014,7 +1000,7 @@ S3_Fsync( void *fd, IOR_param_t * param ) { - * - * ISSUE: The S3 spec says that a multi-part upload can have at most 10,000 - * parts. Does EMC allow more than this? (NOTE the spec also says -- * parts must be at leaast 5MB, but EMC definitely allows smaller -+ * parts must be at least 5MB, but EMC definitely allows smaller - * parts than that.) - * - * ISSUE: All Etags must be sent from a single rank, in a single -@@ -1030,29 +1016,17 @@ S3_Fsync( void *fd, IOR_param_t * param ) { - * See S3_Fsync() for some possible considerations. - */ - --static --void --S3_Close_internal( void* fd, -- IOR_param_t* param, -- int multi_part_upload_p ) { -+static void S3_Close_internal(aiori_fd_t* fd, s3_options_t* param, int multi_part_upload_p) { - - char* fname = (char*)fd; /* see NOTE above S3_Create_Or_Open() */ - - // easier to think -- int n_to_n = param->filePerProc; -+ int n_to_n = hints->filePerProc; - int n_to_1 = (! n_to_n); -- int segmented = (param->segmentCount == 1); -- -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Close('%s', ,%d) %s\n", -- fname, -- multi_part_upload_p, -- ((n_to_n) ? "N:N" : ((segmented) ? "N:1(seg)" : "N:1(str)"))); -- } -- -- if (param->open == WRITE) { -+ int segmented = (hints->segmentCount == 1); - - -+ if (param->written) { - // finalizing Multi-Part Upload (for N:1 or N:N) - if (multi_part_upload_p) { - -@@ -1078,11 +1052,11 @@ S3_Close_internal( void* fd, - // Everybody should have the same number of ETags (?) - size_t etag_count_max = 0; /* highest number on any proc */ - MPI_Allreduce(&etags_per_rank, &etag_count_max, -- 1, mpi_size_t, MPI_MAX, param->testComm); -+ 1, mpi_size_t, MPI_MAX, testComm); - if (etags_per_rank != etag_count_max) { -- printf("Rank %d: etag count mismatch: max:%d, mine:%d\n", -+ printf("Rank %d: etag count mismatch: max:%zd, mine:%zd\n", - rank, etag_count_max, etags_per_rank); -- MPI_Abort(param->testComm, 1); -+ MPI_Abort(testComm, 1); - } - - // collect ETag data at Rank0 -@@ -1095,26 +1069,25 @@ S3_Close_internal( void* fd, - int j; - int rnk; - -- char* etag_vec = (char*)malloc((param->numTasks * etag_data_size) +1); -+ char* etag_vec = (char*)malloc((hints->numTasks * etag_data_size) +1); - if (! etag_vec) { -- fprintf(stderr, "rank 0 failed to malloc %d bytes\n", -- param->numTasks * etag_data_size); -- MPI_Abort(param->testComm, 1); -+ fprintf(stderr, "rank 0 failed to malloc %zd bytes\n", -+ hints->numTasks * etag_data_size); -+ MPI_Abort(testComm, 1); - } - MPI_Gather(etag_data, etag_data_size, MPI_BYTE, -- etag_vec, etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD); -+ etag_vec, etag_data_size, MPI_BYTE, 0, testComm); - - // --- debugging: show the gathered etag data - // (This shows the raw concatenated etag-data from each node.) -- if (param->verbose >= VERBOSE_4) { -- -- printf("rank 0: gathered %d etags from all ranks:\n", etags_per_rank); -+ if (verbose >= VERBOSE_4) { -+ printf("rank 0: gathered %zd etags from all ranks:\n", etags_per_rank); - etag_ptr=etag_vec; -- for (rnk=0; rnknumTasks; ++rnk) { -+ for (rnk=0; rnk < hints->numTasks; ++rnk) { - printf("\t[%d]: '", rnk); - - int ii; -- for (ii=0; ii parts, - // locally. At rank0, the etags for each rank are now -- // stored as a continguous block of text, with the blocks -+ // stored as a contiguous block of text, with the blocks - // stored in rank order in etag_vec. In other words, our - // internal rep at rank 0 matches the "segmented" format. - // From this, we must select etags in an order matching how -@@ -1173,14 +1146,14 @@ S3_Close_internal( void* fd, - size_t stride; // in etag_vec - - if (segmented) { // segmented -- i_max = param->numTasks; -+ i_max = hints->numTasks; - j_max = etags_per_rank; - start_multiplier = etag_data_size; /* one rank's-worth of Etag data */ - stride = ETAG_SIZE; /* one ETag */ - } - else { // strided - i_max = etags_per_rank; -- j_max = param->numTasks; -+ j_max = hints->numTasks; - start_multiplier = ETAG_SIZE; /* one ETag */ - stride = etag_data_size; /* one rank's-worth of Etag data */ - } -@@ -1203,7 +1176,7 @@ S3_Close_internal( void* fd, - char etag[ETAG_SIZE +1]; - memcpy(etag, etag_ptr, ETAG_SIZE); - etag[ETAG_SIZE] = 0; -- -+ char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ - // write XML for next part, with Etag ... - snprintf(buff, BUFF_SIZE, - " \n" -@@ -1221,15 +1194,11 @@ S3_Close_internal( void* fd, - - // write XML tail ... - aws_iobuf_append_str(xml, "\n"); -- } -- -- else { -+ } else { - MPI_Gather(etag_data, etag_data_size, MPI_BYTE, -- NULL, etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD); -+ NULL, etag_data_size, MPI_BYTE, 0, testComm); - } -- } -- -- else { /* N:N */ -+ } else { /* N:N */ - - xml = aws_iobuf_new(); - aws_iobuf_growth_size(xml, 1024 * 8); -@@ -1241,6 +1210,7 @@ S3_Close_internal( void* fd, - char etag[ETAG_SIZE +1]; - int part = 0; - int i; -+ char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ - for (i=0; i\n"); - } - -- -- - // send request to finalize MPU - if (n_to_n || (rank == 0)) { - - // DEBUGGING: show the XML we constructed -- if (param->verbose >= VERBOSE_3) -+ if (verbose >= VERBOSE_3) - debug_iobuf(xml, 1, 1); -- -+ char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ - // --- POST our XML to the server. - snprintf(buff, BUFF_SIZE, - "%s?uploadId=%s", -@@ -1300,42 +1268,36 @@ S3_Close_internal( void* fd, - // N:1 file until rank0 has finished the S3 multi-part finalize. - // The object will not appear to exist, until then. - if (n_to_1) -- MPI_CHECK(MPI_Barrier(param->testComm), "barrier error"); -- } -- else { -+ MPI_CHECK(MPI_Barrier(testComm), "barrier error"); -+ } else { - - // No finalization is needed, when using EMC's byte-range writing - // support. However, we do need to make sure everyone has - // finished writing, before anyone starts reading. - if (n_to_1) { -- MPI_CHECK(MPI_Barrier(param->testComm), "barrier error"); -- if (param->verbose >= VERBOSE_2) -- printf("rank %d: passed barrier\n", rank); -- } -- } -+ MPI_CHECK(MPI_Barrier(testComm), "barrier error"); -+ //if (verbose >= VERBOSE_2) -+ // printf("rank %d: passed barrier\n", rank); -+ //} -+ } -+ } - - // After writing, reset the CURL connection, so that caches won't be - // used for reads. - aws_reset_connection(); - } - -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_Close\n"); -- } -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("<- S3_Close\n"); -+ //} - } - --static --void --S3_Close( void* fd, -- IOR_param_t* param ) { -- S3_Close_internal(fd, param, TRUE); -+static void S3_Close( aiori_fd_t* fd, aiori_mod_opt_t* param ) { -+ S3_Close_internal(fd, (s3_options_t*) param, TRUE); - } --static --void --EMC_Close( void* fd, -- IOR_param_t* param ) { -- S3_Close_internal(fd, param, FALSE); -+ -+static void EMC_Close( aiori_fd_t* fd, aiori_mod_opt_t* param ) { -+ S3_Close_internal(fd, (s3_options_t*) param, FALSE); - } - - -@@ -1349,16 +1311,13 @@ EMC_Close( void* fd, - * successfully read. - */ - --static --void --S3_Delete( char *testFileName, IOR_param_t * param ) { -- -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_Delete(%s)\n", testFileName); -- } -- -+static void S3_Delete( char *testFileName, aiori_mod_opt_t * options ) { -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_Delete(%s)\n", testFileName); -+ //} - /* maybe initialize curl */ -- s3_connect( param ); -+ s3_options_t * param = (s3_options_t*) options; -+ s3_connect(param ); - - #if 0 - // EMC BUG: If file was written with appends, and is deleted, -@@ -1372,19 +1331,16 @@ S3_Delete( char *testFileName, IOR_param_t * param ) { - #endif - - AWS4C_CHECK_OK( param->io_buf ); -- -- if (param->verbose >= VERBOSE_2) -- printf("<- S3_Delete\n"); -+ //if (verbose >= VERBOSE_2) -+ // printf("<- S3_Delete\n"); - } - - --static --void --EMC_Delete( char *testFileName, IOR_param_t * param ) { -- -- if (param->verbose >= VERBOSE_2) { -- printf("-> EMC_Delete(%s)\n", testFileName); -- } -+static void EMC_Delete( char *testFileName, aiori_mod_opt_t * options ) { -+ s3_options_t * param = (s3_options_t*) options; -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> EMC_Delete(%s)\n", testFileName); -+ //} - - /* maybe initialize curl */ - s3_connect( param ); -@@ -1401,16 +1357,10 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) { - #endif - - AWS4C_CHECK_OK( param->io_buf ); -- -- if (param->verbose >= VERBOSE_2) -- printf("<- EMC_Delete\n"); -+ //if (param->verbose >= VERBOSE_2) -+ // printf("<- EMC_Delete\n"); - } - -- -- -- -- -- - /* - * HTTP HEAD returns meta-data for a "file". - * -@@ -1420,15 +1370,11 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) { - * request more data than the header actually takes? - */ - --static --IOR_offset_t --S3_GetFileSize(IOR_param_t * param, -- MPI_Comm testComm, -- char * testFileName) { -- -- if (param->verbose >= VERBOSE_2) { -- printf("-> S3_GetFileSize(%s)\n", testFileName); -- } -+static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char * testFileName) { -+ s3_options_t * param = (s3_options_t*) options; -+ //if (param->verbose >= VERBOSE_2) { -+ // printf("-> S3_GetFileSize(%s)\n", testFileName); -+ //} - - IOR_offset_t aggFileSizeFromStat; /* i.e. "long long int" */ - IOR_offset_t tmpMin, tmpMax, tmpSum; -@@ -1442,63 +1388,9 @@ S3_GetFileSize(IOR_param_t * param, - if ( ! AWS4C_OK(param->io_buf) ) { - fprintf(stderr, "rank %d: couldn't stat '%s': %s\n", - rank, testFileName, param->io_buf->result); -- MPI_Abort(param->testComm, 1); -+ MPI_Abort(testComm, 1); - } - aggFileSizeFromStat = param->io_buf->contentLen; - -- if (param->verbose >= VERBOSE_2) { -- printf("\trank %d: file-size %llu\n", rank, aggFileSizeFromStat); -- } -- -- if ( param->filePerProc == TRUE ) { -- if (param->verbose >= VERBOSE_2) { -- printf("\tall-reduce (1)\n"); -- } -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, -- &tmpSum, /* sum */ -- 1, -- MPI_LONG_LONG_INT, -- MPI_SUM, -- testComm ), -- "cannot total data moved" ); -- -- aggFileSizeFromStat = tmpSum; -- } -- else { -- if (param->verbose >= VERBOSE_2) { -- printf("\tall-reduce (2a)\n"); -- } -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, -- &tmpMin, /* min */ -- 1, -- MPI_LONG_LONG_INT, -- MPI_MIN, -- testComm ), -- "cannot total data moved" ); -- -- if (param->verbose >= VERBOSE_2) { -- printf("\tall-reduce (2b)\n"); -- } -- MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, -- &tmpMax, /* max */ -- 1, -- MPI_LONG_LONG_INT, -- MPI_MAX, -- testComm ), -- "cannot total data moved" ); -- -- if ( tmpMin != tmpMax ) { -- if ( rank == 0 ) { -- WARN( "inconsistent file size by different tasks" ); -- } -- -- /* incorrect, but now consistent across tasks */ -- aggFileSizeFromStat = tmpMin; -- } -- } -- -- if (param->verbose >= VERBOSE_2) { -- printf("<- S3_GetFileSize [%llu]\n", aggFileSizeFromStat); -- } - return ( aggFileSizeFromStat ); - } -diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c -new file mode 100644 -index 0000000..fdb3aaf ---- /dev/null -+++ b/src/aiori-S3-libs3.c -@@ -0,0 +1,586 @@ -+/* -+* S3 implementation using the newer libs3 -+* https://github.com/bji/libs3 -+* Use one object per file chunk -+*/ -+ -+#ifdef HAVE_CONFIG_H -+# include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "ior.h" -+#include "aiori.h" -+#include "aiori-debug.h" -+#include "utilities.h" -+ -+ -+static aiori_xfer_hint_t * hints = NULL; -+ -+static void s3_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+} -+ -+/************************** O P T I O N S *****************************/ -+typedef struct { -+ int bucket_per_file; -+ char * access_key; -+ char * secret_key; -+ char * host; -+ char * bucket_prefix; -+ char * bucket_prefix_cur; -+ char * locationConstraint; -+ char * authRegion; -+ -+ int timeout; -+ int dont_suffix; -+ int s3_compatible; -+ int use_ssl; -+ S3BucketContext bucket_context; -+ S3Protocol s3_protocol; -+} s3_options_t; -+ -+static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ -+ s3_options_t * o = malloc(sizeof(s3_options_t)); -+ if (init_values != NULL){ -+ memcpy(o, init_values, sizeof(s3_options_t)); -+ }else{ -+ memset(o, 0, sizeof(s3_options_t)); -+ } -+ -+ *init_backend_options = (aiori_mod_opt_t*) o; -+ o->bucket_prefix = "ior"; -+ o->bucket_prefix_cur = "b"; -+ -+ option_help h [] = { -+ {0, "S3-libs3.bucket-per-file", "Use one bucket to map one file/directory, otherwise one bucket is used to store all dirs/files.", OPTION_FLAG, 'd', & o->bucket_per_file}, -+ {0, "S3-libs3.bucket-name-prefix", "The prefix of the bucket(s).", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, -+ {0, "S3-libs3.dont-suffix-bucket", "By default a hash will be added to the bucket name to increase uniqueness, this disables the option.", OPTION_FLAG, 'd', & o->dont_suffix }, -+ {0, "S3-libs3.s3-compatible", "to be selected when using S3 compatible storage", OPTION_FLAG, 'd', & o->s3_compatible }, -+ {0, "S3-libs3.use-ssl", "used to specify that SSL is needed for the connection", OPTION_FLAG, 'd', & o->use_ssl }, -+ {0, "S3-libs3.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, -+ {0, "S3-libs3.secret-key", "The secret key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->secret_key}, -+ {0, "S3-libs3.access-key", "The access key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->access_key}, -+ {0, "S3-libs3.region", "The region used for the authorization signature.", OPTION_OPTIONAL_ARGUMENT, 's', & o->authRegion}, -+ {0, "S3-libs3.location", "The bucket geographic location.", OPTION_OPTIONAL_ARGUMENT, 's', & o->locationConstraint}, -+ LAST_OPTION -+ }; -+ option_help * help = malloc(sizeof(h)); -+ memcpy(help, h, sizeof(h)); -+ return help; -+} -+ -+static void def_file_name(s3_options_t * o, char * out_name, char const * path){ -+ if(o->bucket_per_file){ -+ out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); -+ } -+ // duplicate path except "/" -+ while(*path != 0){ -+ char c = *path; -+ if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') )){ -+ *out_name = *path; -+ out_name++; -+ }else if(c >= 'A' && c <= 'Z'){ -+ *out_name = *path + ('a' - 'A'); -+ out_name++; -+ }else if(c == '/'){ -+ *out_name = '_'; -+ out_name++; -+ }else{ -+ // encode special characters -+ *out_name = 'a' + (c / 26); -+ out_name++; -+ *out_name = 'a' + (c % 26); -+ out_name++; -+ } -+ path++; -+ } -+ *out_name = 'b'; -+ out_name++; -+ *out_name = '\0'; -+} -+ -+static void def_bucket_name(s3_options_t * o, char * out_name, char const * path){ -+ // S3_MAX_BUCKET_NAME_SIZE -+ if(o->bucket_per_file){ -+ out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); -+ } -+ // duplicate path except "/" -+ while(*path != 0){ -+ char c = *path; -+ if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') )){ -+ *out_name = *path; -+ out_name++; -+ }else if(c >= 'A' && c <= 'Z'){ -+ *out_name = *path + ('a' - 'A'); -+ out_name++; -+ } -+ path++; -+ } -+ *out_name = '\0'; -+ -+ // S3Status S3_validate_bucket_name(const char *bucketName, S3UriStyle uriStyle); -+} -+ -+struct data_handling{ -+ IOR_size_t * buf; -+ int64_t size; -+}; -+ -+static S3Status s3status = S3StatusInterrupted; -+static S3ErrorDetails s3error = {NULL}; -+ -+static S3Status responsePropertiesCallback(const S3ResponseProperties *properties, void *callbackData){ -+ s3status = S3StatusOK; -+ return s3status; -+} -+ -+static void responseCompleteCallback(S3Status status, const S3ErrorDetails *error, void *callbackData) { -+ s3status = status; -+ if (error == NULL){ -+ s3error.message = NULL; -+ }else{ -+ s3error = *error; -+ } -+ return; -+} -+ -+#define CHECK_ERROR(p) \ -+if (s3status != S3StatusOK){ \ -+ WARNF("S3 %s:%d (path:%s) \"%s\": %s %s", __FUNCTION__, __LINE__, p, S3_get_status_name(s3status), s3error.message, s3error.furtherDetails ? s3error.furtherDetails : ""); \ -+} -+ -+ -+static S3ResponseHandler responseHandler = { &responsePropertiesCallback, &responseCompleteCallback }; -+ -+static char * S3_getVersion() -+{ -+ return "0.5"; -+} -+ -+static void S3_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options) -+{ -+ // Not needed -+} -+ -+ -+static void S3_Sync(aiori_mod_opt_t * options) -+{ -+ // Not needed -+} -+ -+static S3Status S3ListResponseCallback(const char *ownerId, const char *ownerDisplayName, const char *bucketName, int64_t creationDateSeconds, void *callbackData){ -+ uint64_t * count = (uint64_t*) callbackData; -+ *count += 1; -+ return S3StatusOK; -+} -+ -+static S3ListServiceHandler listhandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & S3ListResponseCallback}; -+ -+static int S3_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options){ -+ stat->f_bsize = 1; -+ stat->f_blocks = 1; -+ stat->f_bfree = 1; -+ stat->f_bavail = 1; -+ stat->f_ffree = 1; -+ s3_options_t * o = (s3_options_t*) options; -+ -+ // use the number of bucket as files -+ uint64_t buckets = 0; -+ S3_list_service(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, -+ o->authRegion, NULL, o->timeout, & listhandler, & buckets); -+ stat->f_files = buckets; -+ CHECK_ERROR(o->authRegion); -+ -+ return 0; -+} -+ -+static S3Status S3multipart_handler(const char *upload_id, void *callbackData){ -+ *((char const**)(callbackData)) = upload_id; -+ return S3StatusOK; -+} -+ -+static S3MultipartInitialHandler multipart_handler = { {&responsePropertiesCallback, &responseCompleteCallback }, & S3multipart_handler}; -+ -+typedef struct{ -+ char * object; -+} S3_fd_t; -+ -+static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData){ -+ struct data_handling * dh = (struct data_handling *) callbackData; -+ const int64_t size = dh->size > bufferSize ? bufferSize : dh->size; -+ if(size == 0) return 0; -+ memcpy(buffer, dh->buf, size); -+ dh->buf = (IOR_size_t*) ((char*)(dh->buf) + size); -+ dh->size -= size; -+ -+ return size; -+} -+ -+static S3PutObjectHandler putObjectHandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & putObjectDataCallback }; -+ -+static aiori_fd_t *S3_Create(char *path, int iorflags, aiori_mod_opt_t * options) -+{ -+ char * upload_id; -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ def_file_name(o, p, path); -+ -+ -+ if(iorflags & IOR_CREAT){ -+ if(o->bucket_per_file){ -+ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); -+ }else{ -+ struct data_handling dh = { .buf = NULL, .size = 0 }; -+ S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, &putObjectHandler, & dh); -+ } -+ if (s3status != S3StatusOK){ -+ CHECK_ERROR(p); -+ return NULL; -+ } -+ } -+ -+ S3_fd_t * fd = malloc(sizeof(S3_fd_t)); -+ fd->object = strdup(p); -+ return (aiori_fd_t*) fd; -+} -+ -+ -+static S3Status statResponsePropertiesCallback(const S3ResponseProperties *properties, void *callbackData){ -+ // check the size -+ struct stat *buf = (struct stat*) callbackData; -+ if(buf != NULL){ -+ buf->st_size = properties->contentLength; -+ buf->st_mtime = properties->lastModified; -+ } -+ s3status = S3StatusOK; -+ return s3status; -+} -+ -+static S3ResponseHandler statResponseHandler = { &statResponsePropertiesCallback, &responseCompleteCallback }; -+ -+static aiori_fd_t *S3_Open(char *path, int flags, aiori_mod_opt_t * options) -+{ -+ if(flags & IOR_CREAT){ -+ return S3_Create(path, flags, options); -+ } -+ if(flags & IOR_WRONLY){ -+ WARN("S3 IOR_WRONLY is not supported"); -+ } -+ if(flags & IOR_RDWR){ -+ WARN("S3 IOR_RDWR is not supported"); -+ } -+ -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ def_file_name(o, p, path); -+ -+ if (o->bucket_per_file){ -+ S3_test_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, -+ NULL, o->host, p, o->authRegion, 0, NULL, -+ NULL, o->timeout, & responseHandler, NULL); -+ }else{ -+ struct stat buf; -+ S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, & buf); -+ } -+ if (s3status != S3StatusOK){ -+ CHECK_ERROR(p); -+ return NULL; -+ } -+ -+ S3_fd_t * fd = malloc(sizeof(S3_fd_t)); -+ fd->object = strdup(p); -+ return (aiori_fd_t*) fd; -+} -+ -+static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *callbackData){ -+ struct data_handling * dh = (struct data_handling *) callbackData; -+ const int64_t size = dh->size > bufferSize ? bufferSize : dh->size; -+ memcpy(dh->buf, buffer, size); -+ dh->buf = (IOR_size_t*) ((char*)(dh->buf) + size); -+ dh->size -= size; -+ -+ return S3StatusOK; -+} -+ -+static S3GetObjectHandler getObjectHandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & getObjectDataCallback }; -+ -+static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ -+ S3_fd_t * fd = (S3_fd_t *) afd; -+ struct data_handling dh = { .buf = buffer, .size = length }; -+ -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ -+ if(o->bucket_per_file){ -+ o->bucket_context.bucketName = fd->object; -+ if(offset != 0){ -+ sprintf(p, "%ld-%ld", (long) offset, (long) length); -+ }else{ -+ sprintf(p, "0"); -+ } -+ }else{ -+ if(offset != 0){ -+ sprintf(p, "%s-%ld-%ld", fd->object, (long) offset, (long) length); -+ }else{ -+ sprintf(p, "%s", fd->object); -+ } -+ } -+ if(access == WRITE){ -+ S3_put_object(& o->bucket_context, p, length, NULL, NULL, o->timeout, &putObjectHandler, & dh); -+ }else{ -+ S3_get_object(& o->bucket_context, p, NULL, 0, length, NULL, o->timeout, &getObjectHandler, & dh); -+ } -+ if (! o->s3_compatible){ -+ CHECK_ERROR(p); -+ } -+ return length; -+} -+ -+ -+static void S3_Close(aiori_fd_t * afd, aiori_mod_opt_t * options) -+{ -+ S3_fd_t * fd = (S3_fd_t *) afd; -+ free(fd->object); -+ free(afd); -+} -+ -+typedef struct { -+ int status; // do not reorder! -+ s3_options_t * o; -+ int truncated; -+ char const *nextMarker; -+} s3_delete_req; -+ -+S3Status list_delete_cb(int isTruncated, const char *nextMarker, int contentsCount, const S3ListBucketContent *contents, int commonPrefixesCount, const char **commonPrefixes, void *callbackData){ -+ s3_delete_req * req = (s3_delete_req*) callbackData; -+ for(int i=0; i < contentsCount; i++){ -+ S3_delete_object(& req->o->bucket_context, contents[i].key, NULL, req->o->timeout, & responseHandler, NULL); -+ } -+ req->truncated = isTruncated; -+ if(isTruncated){ -+ req->nextMarker = nextMarker; -+ } -+ return S3StatusOK; -+} -+ -+static S3ListBucketHandler list_delete_handler = {{&responsePropertiesCallback, &responseCompleteCallback }, list_delete_cb}; -+ -+static void S3_Delete(char *path, aiori_mod_opt_t * options) -+{ -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ def_file_name(o, p, path); -+ -+ -+ if(o->bucket_per_file){ -+ o->bucket_context.bucketName = p; -+ s3_delete_req req = {0, o, 0, NULL}; -+ do{ -+ S3_list_bucket(& o->bucket_context, NULL, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); -+ }while(req.truncated); -+ S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); -+ }else{ -+ char * del_heuristics = getenv("S3LIB_DELETE_HEURISTICS"); -+ if(del_heuristics){ -+ struct stat buf; -+ S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, & buf); -+ if(s3status != S3StatusOK){ -+ // As the file does not exist, can return safely -+ CHECK_ERROR(p); -+ return; -+ } -+ int threshold = atoi(del_heuristics); -+ if (buf.st_size > threshold){ -+ // there may exist fragments, so try to delete them -+ s3_delete_req req = {0, o, 0, NULL}; -+ do{ -+ S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); -+ }while(req.truncated); -+ } -+ S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); -+ }else{ -+ // Regular deletion, must remove all created fragments -+ S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); -+ if(s3status != S3StatusOK){ -+ // As the file does not exist, can return savely -+ CHECK_ERROR(p); -+ return; -+ } -+ s3_delete_req req = {0, o, 0, NULL}; -+ do{ -+ S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); -+ }while(req.truncated); -+ } -+ } -+ CHECK_ERROR(p); -+} -+ -+static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ def_bucket_name(o, p, path); -+ -+ -+ if (o->bucket_per_file){ -+ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); -+ CHECK_ERROR(p); -+ return 0; -+ }else{ -+ struct data_handling dh = { .buf = NULL, .size = 0 }; -+ S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, & putObjectHandler, & dh); -+ if (! o->s3_compatible){ -+ CHECK_ERROR(p); -+ } -+ return 0; -+ } -+} -+ -+static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ -+ def_bucket_name(o, p, path); -+ if (o->bucket_per_file){ -+ S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); -+ CHECK_ERROR(p); -+ return 0; -+ }else{ -+ S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); -+ CHECK_ERROR(p); -+ return 0; -+ } -+} -+ -+static int S3_stat(const char *path, struct stat *buf, aiori_mod_opt_t * options){ -+ s3_options_t * o = (s3_options_t*) options; -+ char p[FILENAME_MAX]; -+ def_file_name(o, p, path); -+ memset(buf, 0, sizeof(struct stat)); -+ // TODO count the individual file fragment sizes together -+ if (o->bucket_per_file){ -+ S3_test_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, -+ NULL, o->host, p, o->authRegion, 0, NULL, -+ NULL, o->timeout, & responseHandler, NULL); -+ }else{ -+ S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, buf); -+ } -+ if (s3status != S3StatusOK){ -+ return -1; -+ } -+ return 0; -+} -+ -+static int S3_access (const char *path, int mode, aiori_mod_opt_t * options){ -+ struct stat buf; -+ return S3_stat(path, & buf, options); -+} -+ -+static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName) -+{ -+ struct stat buf; -+ if(S3_stat(testFileName, & buf, options) != 0) return -1; -+ return buf.st_size; -+} -+ -+ -+static int S3_check_params(aiori_mod_opt_t * options){ -+ s3_options_t * o = (s3_options_t*) options; -+ if(o->access_key == NULL){ -+ o->access_key = ""; -+ } -+ if(o->secret_key == NULL){ -+ o->secret_key = ""; -+ } -+ if(o->host == NULL){ -+ WARN("The S3 hostname should be specified"); -+ } -+ return 0; -+} -+ -+static void S3_init(aiori_mod_opt_t * options){ -+ s3_options_t * o = (s3_options_t*) options; -+ int ret = S3_initialize(NULL, S3_INIT_ALL, o->host); -+ if(ret != S3StatusOK) -+ FAIL("Could not initialize S3 library"); -+ -+ // create a bucket id based on access-key using a trivial checksumming -+ if(! o->dont_suffix){ -+ uint64_t c = 0; -+ char * r = o->access_key; -+ for(uint64_t pos = 1; (*r) != '\0' ; r++, pos*=10) { -+ c += (*r) * pos; -+ } -+ int count = snprintf(NULL, 0, "%s%lu", o->bucket_prefix, c % 1000); -+ char * old_prefix = o->bucket_prefix; -+ o->bucket_prefix_cur = malloc(count + 1); -+ sprintf(o->bucket_prefix_cur, "%s%lu", old_prefix, c % 1000); -+ }else{ -+ o->bucket_prefix_cur = o->bucket_prefix; -+ } -+ -+ // init bucket context -+ memset(& o->bucket_context, 0, sizeof(o->bucket_context)); -+ o->bucket_context.hostName = o->host; -+ o->bucket_context.bucketName = o->bucket_prefix_cur; -+ if (o->use_ssl){ -+ o->s3_protocol = S3ProtocolHTTPS; -+ }else{ -+ o->s3_protocol = S3ProtocolHTTP; -+ } -+ o->bucket_context.protocol = o->s3_protocol; -+ o->bucket_context.uriStyle = S3UriStylePath; -+ o->bucket_context.accessKeyId = o->access_key; -+ o->bucket_context.secretAccessKey = o->secret_key; -+ -+ if (! o->bucket_per_file && rank == 0){ -+ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); -+ CHECK_ERROR(o->bucket_context.bucketName); -+ } -+ -+ if ( ret != S3StatusOK ){ -+ FAIL("S3 error %s", S3_get_status_name(ret)); -+ } -+} -+ -+static void S3_final(aiori_mod_opt_t * options){ -+ s3_options_t * o = (s3_options_t*) options; -+ if (! o->bucket_per_file && rank == 0){ -+ S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, NULL, o->timeout, & responseHandler, NULL); -+ CHECK_ERROR(o->bucket_context.bucketName); -+ } -+ -+ S3_deinitialize(); -+} -+ -+ -+ior_aiori_t S3_libS3_aiori = { -+ .name = "S3-libs3", -+ .name_legacy = NULL, -+ .create = S3_Create, -+ .open = S3_Open, -+ .xfer = S3_Xfer, -+ .close = S3_Close, -+ .delete = S3_Delete, -+ .get_version = S3_getVersion, -+ .fsync = S3_Fsync, -+ .xfer_hints = s3_xfer_hints, -+ .get_file_size = S3_GetFileSize, -+ .statfs = S3_statfs, -+ .mkdir = S3_mkdir, -+ .rmdir = S3_rmdir, -+ .access = S3_access, -+ .stat = S3_stat, -+ .initialize = S3_init, -+ .finalize = S3_final, -+ .get_options = S3_options, -+ .check_params = S3_check_params, -+ .sync = S3_Sync, -+ .enable_mdtest = true -+}; -diff --git a/src/aiori-aio.c b/src/aiori-aio.c -new file mode 100644 -index 0000000..fcf5dc7 ---- /dev/null -+++ b/src/aiori-aio.c -@@ -0,0 +1,257 @@ -+/* -+ This backend uses linux-aio -+ Requires: libaio-dev -+ */ -+ -+#ifdef HAVE_CONFIG_H -+# include "config.h" -+#endif -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "ior.h" -+#include "aiori.h" -+#include "iordef.h" -+#include "utilities.h" -+ -+#include "aiori-POSIX.h" -+ -+/************************** O P T I O N S *****************************/ -+typedef struct{ -+ aiori_mod_opt_t * p; // posix options -+ int max_pending; -+ int granularity; // how frequent to submit, submit ever granularity elements -+ -+ // runtime data -+ io_context_t ioctx; // one context per fs -+ struct iocb ** iocbs; -+ int iocbs_pos; // how many are pending in iocbs -+ -+ int in_flight; // total pending ops -+ IOR_offset_t pending_bytes; // track pending IO volume for error checking -+} aio_options_t; -+ -+option_help * aio_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ -+ aio_options_t * o = malloc(sizeof(aio_options_t)); -+ -+ if (init_values != NULL){ -+ memcpy(o, init_values, sizeof(aio_options_t)); -+ }else{ -+ memset(o, 0, sizeof(aio_options_t)); -+ o->max_pending = 128; -+ o->granularity = 16; -+ } -+ option_help * p_help = POSIX_options((aiori_mod_opt_t**)& o->p, init_values == NULL ? NULL : (aiori_mod_opt_t*) ((aio_options_t*)init_values)->p); -+ *init_backend_options = (aiori_mod_opt_t*) o; -+ -+ option_help h [] = { -+ {0, "aio.max-pending", "Max number of pending ops", OPTION_OPTIONAL_ARGUMENT, 'd', & o->max_pending}, -+ {0, "aio.granularity", "How frequent to submit pending IOs, submit every *granularity* elements", OPTION_OPTIONAL_ARGUMENT, 'd', & o->granularity}, -+ LAST_OPTION -+ }; -+ option_help * help = option_merge(h, p_help); -+ free(p_help); -+ return help; -+} -+ -+ -+/************************** D E C L A R A T I O N S ***************************/ -+ -+typedef struct{ -+ aiori_fd_t * pfd; // the underlying POSIX fd -+} aio_fd_t; -+ -+/***************************** F U N C T I O N S ******************************/ -+ -+static aiori_xfer_hint_t * hints = NULL; -+ -+static void aio_xfer_hints(aiori_xfer_hint_t * params){ -+ hints = params; -+ POSIX_xfer_hints(params); -+} -+ -+static void aio_initialize(aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ if(io_setup(o->max_pending, & o->ioctx) != 0){ -+ ERRF("Couldn't initialize io context %s", strerror(errno)); -+ } -+ printf("%d\n", (o->max_pending)); -+ -+ o->iocbs = malloc(sizeof(struct iocb *) * o->granularity); -+ o->iocbs_pos = 0; -+ o->in_flight = 0; -+} -+ -+static void aio_finalize(aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ io_destroy(o->ioctx); -+} -+ -+static int aio_check_params(aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ POSIX_check_params((aiori_mod_opt_t*) o->p); -+ if(o->max_pending < 8){ -+ ERRF("AIO max-pending = %d < 8", o->max_pending); -+ } -+ if(o->granularity > o->max_pending){ -+ ERRF("AIO granularity must be < max-pending, is %d > %d", o->granularity, o->max_pending); -+ } -+ return 0; -+} -+ -+static aiori_fd_t *aio_Open(char *testFileName, int flags, aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ aio_fd_t * fd = malloc(sizeof(aio_fd_t)); -+ fd->pfd = POSIX_Open(testFileName, flags, o->p); -+ return (aiori_fd_t*) fd; -+} -+ -+static aiori_fd_t *aio_create(char *testFileName, int flags, aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ aio_fd_t * fd = malloc(sizeof(aio_fd_t)); -+ fd->pfd = POSIX_Create(testFileName, flags, o->p); -+ return (aiori_fd_t*) fd; -+} -+ -+/* called whenever the granularity is met */ -+static void submit_pending(aio_options_t * o){ -+ if(o->iocbs_pos == 0){ -+ return; -+ } -+ int res; -+ res = io_submit(o->ioctx, o->iocbs_pos, o->iocbs); -+ //printf("AIO submit %d jobs\n", o->iocbs_pos); -+ if(res != o->iocbs_pos){ -+ if(errno == EAGAIN){ -+ ERR("AIO: errno == EAGAIN; this should't happen"); -+ } -+ ERRF("AIO: submitted %d, error: \"%s\" ; this should't happen", res, strerror(errno)); -+ } -+ o->iocbs_pos = 0; -+} -+ -+/* complete all pending ops */ -+static void complete_all(aio_options_t * o){ -+ submit_pending(o); -+ -+ struct io_event events[o->in_flight]; -+ int num_events; -+ num_events = io_getevents(o->ioctx, o->in_flight, o->in_flight, events, NULL); -+ for (int i = 0; i < num_events; i++) { -+ struct io_event event = events[i]; -+ if(event.res == -1){ -+ ERR("AIO, error in io_getevents(), IO incomplete!"); -+ }else{ -+ o->pending_bytes -= event.res; -+ } -+ free(event.obj); -+ } -+ if(o->pending_bytes != 0){ -+ ERRF("AIO, error in flushing data, pending bytes: %lld", o->pending_bytes); -+ } -+ o->in_flight = 0; -+} -+ -+/* called if we must make *some* progress */ -+static void process_some(aio_options_t * o){ -+ if(o->in_flight == 0){ -+ return; -+ } -+ struct io_event events[o->in_flight]; -+ int num_events; -+ int mn = o->in_flight < o->granularity ? o->in_flight : o->granularity; -+ num_events = io_getevents(o->ioctx, mn, o->in_flight, events, NULL); -+ //printf("Completed: %d\n", num_events); -+ for (int i = 0; i < num_events; i++) { -+ struct io_event event = events[i]; -+ if(event.res == -1){ -+ ERR("AIO, error in io_getevents(), IO incomplete!"); -+ }else{ -+ o->pending_bytes -= event.res; -+ } -+ free(event.obj); -+ } -+ o->in_flight -= num_events; -+} -+ -+static IOR_offset_t aio_Xfer(int access, aiori_fd_t *fd, IOR_size_t * buffer, -+ IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ aio_fd_t * afd = (aio_fd_t*) fd; -+ -+ if(o->in_flight >= o->max_pending){ -+ process_some(o); -+ } -+ o->pending_bytes += length; -+ -+ struct iocb * iocb = malloc(sizeof(struct iocb)); -+ if(access == WRITE){ -+ io_prep_pwrite(iocb, *(int*)afd->pfd, buffer, length, offset); -+ }else{ -+ io_prep_pread(iocb, *(int*)afd->pfd, buffer, length, offset); -+ } -+ o->iocbs[o->iocbs_pos] = iocb; -+ o->iocbs_pos++; -+ o->in_flight++; -+ -+ if(o->iocbs_pos == o->granularity){ -+ submit_pending(o); -+ } -+ return length; -+} -+ -+static void aio_Close(aiori_fd_t *fd, aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ aio_fd_t * afd = (aio_fd_t*) fd; -+ complete_all(o); -+ POSIX_Close(afd->pfd, o->p); -+} -+ -+static void aio_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ complete_all(o); -+ aio_fd_t * afd = (aio_fd_t*) fd; -+ POSIX_Fsync(afd->pfd, o->p); -+} -+ -+static void aio_Sync(aiori_mod_opt_t * param){ -+ aio_options_t * o = (aio_options_t*) param; -+ complete_all(o); -+ POSIX_Sync((aiori_mod_opt_t*) o->p); -+} -+ -+ -+ -+ior_aiori_t aio_aiori = { -+ .name = "AIO", -+ .name_legacy = NULL, -+ .create = aio_create, -+ .get_options = aio_options, -+ .initialize = aio_initialize, -+ .finalize = aio_finalize, -+ .xfer_hints = aio_xfer_hints, -+ .fsync = aio_Fsync, -+ .open = aio_Open, -+ .xfer = aio_Xfer, -+ .close = aio_Close, -+ .sync = aio_Sync, -+ .check_params = aio_check_params, -+ .delete = POSIX_Delete, -+ .get_version = aiori_get_version, -+ .get_file_size = POSIX_GetFileSize, -+ .statfs = aiori_posix_statfs, -+ .mkdir = aiori_posix_mkdir, -+ .rmdir = aiori_posix_rmdir, -+ .access = aiori_posix_access, -+ .stat = aiori_posix_stat, -+ .enable_mdtest = true -+}; -diff --git a/src/aiori-debug.h b/src/aiori-debug.h -new file mode 100644 -index 0000000..9affb71 ---- /dev/null -+++ b/src/aiori-debug.h -@@ -0,0 +1,101 @@ -+#ifndef _AIORI_UTIL_H -+#define _AIORI_UTIL_H -+ -+/* This file contains only debug relevant helpers */ -+ -+#include -+#include -+ -+/* output log file */ -+extern FILE * out_logfile; -+/* verbosity level */ -+extern int verbose; -+/* treat warnings as errors */ -+extern int aiori_warning_as_errors; -+ -+#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__) -+void FailMessage(int rank, const char *location, char *format, ...); -+ -+/* display simple warning message and reset member value to default */ -+#define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER) do { \ -+ (TO_STRUCT_PTR)->MEMBER = (FROM_STRUCT_PTR)->MEMBER; \ -+ if (rank == 0) { \ -+ fprintf(out_logfile, "WARNING: %s. Using value of %d.\n", \ -+ MSG, (TO_STRUCT_PTR)->MEMBER); \ -+ } \ -+ fflush(out_logfile); \ -+} while (0) -+ -+/* display warning message with format string */ -+#define WARNF(FORMAT, ...) do { \ -+ if(aiori_warning_as_errors){ \ -+ ERRF(FORMAT, __VA_ARGS__); \ -+ } \ -+ if (verbose > VERBOSE_2) { \ -+ fprintf(out_logfile, "WARNING: " FORMAT ", (%s:%d).\n", \ -+ __VA_ARGS__, __FILE__, __LINE__); \ -+ } else { \ -+ fprintf(out_logfile, "WARNING: " FORMAT "\n", \ -+ __VA_ARGS__); \ -+ } \ -+ fflush(out_logfile); \ -+} while (0) -+ -+/* display simple warning message */ -+#define WARN(MSG) do { \ -+ WARNF("%s", MSG); \ -+} while (0) -+ -+/* display info message with format string */ -+#define INFOF(FORMAT, ...) do { \ -+ if (verbose > VERBOSE_2) { \ -+ fprintf(out_logfile, "INFO: " FORMAT ", (%s:%d).\n", \ -+ __VA_ARGS__, __FILE__, __LINE__); \ -+ } else { \ -+ fprintf(out_logfile, "INFO: " FORMAT "\n", \ -+ __VA_ARGS__); \ -+ } \ -+ fflush(out_logfile); \ -+} while (0) -+ -+/* display simple info message */ -+#define INFO(MSG) do { \ -+ INFOF("%s", MSG); \ -+} while (0) -+ -+/* display error message with format string and terminate execution */ -+#define ERRF(FORMAT, ...) do { \ -+ fprintf(out_logfile, "ERROR: " FORMAT ", (%s:%d)\n", \ -+ __VA_ARGS__, __FILE__, __LINE__); \ -+ fflush(out_logfile); \ -+ MPI_Abort(MPI_COMM_WORLD, -1); \ -+} while (0) -+ -+/* display simple error message and terminate execution */ -+#define ERR(MSG) do { \ -+ ERRF("%s", MSG); \ -+} while (0) -+ -+/* if MPI_STATUS indicates error, display error message with format */ -+/* string and error string from MPI_STATUS and terminate execution */ -+#define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ -+ char resultString[MPI_MAX_ERROR_STRING]; \ -+ int resultLength; \ -+ int _MPI_STATUS = (MPI_STATUS); \ -+ \ -+ if (_MPI_STATUS != MPI_SUCCESS) { \ -+ MPI_Error_string(_MPI_STATUS, resultString, &resultLength); \ -+ fprintf(out_logfile, "ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ -+ __VA_ARGS__, resultString, __FILE__, __LINE__); \ -+ fflush(out_logfile); \ -+ MPI_Abort(MPI_COMM_WORLD, -1); \ -+ } \ -+} while(0) -+ -+/* if MPI_STATUS indicates error, display simple error message with */ -+/* error string from MPI_STATUS and terminate execution */ -+#define MPI_CHECK(MPI_STATUS, MSG) do { \ -+ MPI_CHECKF(MPI_STATUS, "%s", MSG); \ -+} while(0) -+ -+#endif -diff --git a/src/aiori.c b/src/aiori.c -index 71f99d1..6c9a971 100644 ---- a/src/aiori.c -+++ b/src/aiori.c -@@ -42,8 +42,13 @@ ior_aiori_t *available_aiori[] = { - #ifdef USE_POSIX_AIORI - &posix_aiori, - #endif -+#ifdef USE_AIO_AIORI -+ &aio_aiori, -+#endif -+#ifdef USE_PMDK_AIORI -+ &pmdk_aiori, -+#endif - #ifdef USE_DAOS_AIORI -- &daos_aiori, - &dfs_aiori, - #endif - & dummy_aiori, -@@ -65,8 +70,11 @@ ior_aiori_t *available_aiori[] = { - #ifdef USE_MMAP_AIORI - &mmap_aiori, - #endif --#ifdef USE_S3_AIORI -- &s3_aiori, -+#ifdef USE_S3_LIBS3_AIORI -+ &S3_libS3_aiori, -+#endif -+#ifdef USE_S3_4C_AIORI -+ &s3_4c_aiori, - &s3_plus_aiori, - &s3_emc_aiori, - #endif -@@ -97,6 +105,7 @@ void * airoi_update_module_options(const ior_aiori_t * backend, options_all_t * - } - - options_all_t * airoi_create_all_module_options(option_help * global_options){ -+ if(! out_logfile) out_logfile = stdout; - int airoi_c = aiori_count(); - options_all_t * opt = malloc(sizeof(options_all_t)); - opt->module_count = airoi_c + 1; -@@ -119,6 +128,8 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) - { - ior_aiori_t **tmp = available_aiori; - char delimiter = ' '; -+ *APIs = 0; -+ *APIs_legacy = 0; - - while (*tmp != NULL) - { -@@ -127,7 +138,6 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) - tmp++; - continue; - } -- - if (delimiter == ' ') - { - APIs += sprintf(APIs, "%s", (*tmp)->name); -@@ -139,6 +149,7 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) - if ((*tmp)->name_legacy != NULL) - APIs_legacy += sprintf(APIs_legacy, "%c%s", - delimiter, (*tmp)->name_legacy); -+ - tmp++; - } - } -@@ -152,47 +163,68 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) - * This function provides a AIORI statfs for POSIX-compliant filesystems. It - * uses statvfs is available and falls back on statfs. - */ --int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param) -+int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, aiori_mod_opt_t * module_options) - { -- int ret; -+ // find the parent directory -+ char * fileName = strdup(path); -+ int i; -+ int directoryFound = FALSE; -+ -+ /* get directory for outfile */ -+ i = strlen(fileName); -+ while (i-- > 0) { -+ if (fileName[i] == '/') { -+ fileName[i] = '\0'; -+ directoryFound = TRUE; -+ break; -+ } -+ } -+ /* if no directory/, use '.' */ -+ if (directoryFound == FALSE) { -+ strcpy(fileName, "."); -+ } -+ -+ int ret; - #if defined(HAVE_STATVFS) -- struct statvfs statfs_buf; -+ struct statvfs statfs_buf; - -- ret = statvfs (path, &statfs_buf); -+ ret = statvfs (fileName, &statfs_buf); - #else -- struct statfs statfs_buf; -+ struct statfs statfs_buf; - -- ret = statfs (path, &statfs_buf); -+ ret = statfs (fileName, &statfs_buf); - #endif -- if (-1 == ret) { -- return -1; -- } -+ if (-1 == ret) { -+ perror("POSIX couldn't call statvfs"); -+ return -1; -+ } - -- stat_buf->f_bsize = statfs_buf.f_bsize; -- stat_buf->f_blocks = statfs_buf.f_blocks; -- stat_buf->f_bfree = statfs_buf.f_bfree; -- stat_buf->f_files = statfs_buf.f_files; -- stat_buf->f_ffree = statfs_buf.f_ffree; -+ stat_buf->f_bsize = statfs_buf.f_bsize; -+ stat_buf->f_blocks = statfs_buf.f_blocks; -+ stat_buf->f_bfree = statfs_buf.f_bfree; -+ stat_buf->f_files = statfs_buf.f_files; -+ stat_buf->f_ffree = statfs_buf.f_ffree; - -- return 0; -+ free(fileName); -+ return 0; - } - --int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param) -+int aiori_posix_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * module_options) - { - return mkdir (path, mode); - } - --int aiori_posix_rmdir (const char *path, IOR_param_t * param) -+int aiori_posix_rmdir (const char *path, aiori_mod_opt_t * module_options) - { - return rmdir (path); - } - --int aiori_posix_access (const char *path, int mode, IOR_param_t * param) -+int aiori_posix_access (const char *path, int mode, aiori_mod_opt_t * module_options) - { - return access (path, mode); - } - --int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param) -+int aiori_posix_stat (const char *path, struct stat *buf, aiori_mod_opt_t * module_options) - { - return stat (path, buf); - } -@@ -202,92 +234,6 @@ char* aiori_get_version() - return ""; - } - --static bool is_initialized = false; -- --static void init_or_fini_internal(const ior_aiori_t *test_backend, -- const bool init) --{ -- if (init) -- { -- if (test_backend->initialize) -- test_backend->initialize(); -- } -- else -- { -- if (test_backend->finalize) -- test_backend->finalize(); -- } --} -- --static void init_or_fini(IOR_test_t *tests, const bool init) --{ -- /* Sanity check, we were compiled with SOME backend, right? */ -- if (0 == aiori_count ()) { -- ERR("No IO backends compiled into aiori. " -- "Run 'configure --with-', and recompile."); -- } -- -- /* Pointer to the initialize of finalize function */ -- -- -- /* if tests is NULL, initialize or finalize all available backends */ -- if (tests == NULL) -- { -- for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) -- init_or_fini_internal(*tmp, init); -- -- return; -- } -- -- for (IOR_test_t *t = tests; t != NULL; t = t->next) -- { -- IOR_param_t *params = &t->params; -- assert(params != NULL); -- -- const ior_aiori_t *test_backend = params->backend; -- assert(test_backend != NULL); -- -- init_or_fini_internal(test_backend, init); -- } --} -- -- --/** -- * Initialize IO backends. -- * -- * @param[in] tests Pointers to the first test -- * -- * This function initializes all backends which will be used. If tests is NULL -- * all available backends are initialized. -- */ --void aiori_initialize(IOR_test_t *tests) --{ -- if (is_initialized) -- return; -- -- init_or_fini(tests, true); -- -- is_initialized = true; --} -- --/** -- * Finalize IO backends. -- * -- * @param[in] tests Pointers to the first test -- * -- * This function finalizes all backends which were used. If tests is NULL -- * all available backends are finialized. -- */ --void aiori_finalize(IOR_test_t *tests) --{ -- if (!is_initialized) -- return; -- -- is_initialized = false; -- -- init_or_fini(tests, false); --} -- - const ior_aiori_t *aiori_select (const char *api) - { - char warn_str[256] = {0}; -diff --git a/src/aiori.h b/src/aiori.h -index 4d416c2..e0756d0 100755 ---- a/src/aiori.h -+++ b/src/aiori.h -@@ -15,17 +15,11 @@ - #ifndef _AIORI_H - #define _AIORI_H - --#include -- --#ifndef MPI_FILE_NULL --# include --#endif /* not MPI_FILE_NULL */ -- - #include - #include - --#include "ior.h" - #include "iordef.h" /* IOR Definitions */ -+#include "aiori-debug.h" - #include "option.h" - - /*************************** D E F I N I T I O N S ****************************/ -@@ -63,31 +57,62 @@ typedef struct ior_aiori_statfs { - uint64_t f_ffree; - } ior_aiori_statfs_t; - -+/* -+ This structure contains information about the expected IO pattern that may be used to optimize data access. Optimally, it should be stored for each file descriptor, at the moment it can only be set globally per aiori backend module. -+ */ -+typedef struct aiori_xfer_hint_t{ -+ int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ -+ int filePerProc; /* single file or file-per-process */ -+ int collective; /* collective I/O */ -+ int numTasks; /* number of tasks for test */ -+ int numNodes; /* number of nodes for test */ -+ int randomOffset; /* access is to random offsets */ -+ int fsyncPerWrite; /* fsync() after each write */ -+ IOR_offset_t segmentCount; /* number of segments (or HDF5 datasets) */ -+ IOR_offset_t blockSize; /* contiguous bytes to write per task */ -+ IOR_offset_t transferSize; /* size of transfer in bytes */ -+ IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */ -+ int singleXferAttempt; /* do not retry transfer if incomplete */ -+} aiori_xfer_hint_t; -+ -+/* this is a dummy structure to create some type safety */ -+struct aiori_mod_opt_t{ -+ void * dummy; -+}; -+ -+typedef struct aiori_fd_t{ -+ void * dummy; -+} aiori_fd_t; - - typedef struct ior_aiori { - char *name; - char *name_legacy; -- void *(*create)(char *, IOR_param_t *); -+ aiori_fd_t *(*create)(char *, int iorflags, aiori_mod_opt_t *); - int (*mknod)(char *); -- void *(*open)(char *, IOR_param_t *); -- IOR_offset_t (*xfer)(int, void *, IOR_size_t *, -- IOR_offset_t, IOR_param_t *); -- void (*close)(void *, IOR_param_t *); -- void (*delete)(char *, IOR_param_t *); -+ aiori_fd_t *(*open)(char *, int iorflags, aiori_mod_opt_t *); -+ /* -+ Allow to set generic transfer options that shall be applied to any subsequent IO call. -+ */ -+ void (*xfer_hints)(aiori_xfer_hint_t * params); -+ IOR_offset_t (*xfer)(int access, aiori_fd_t *, IOR_size_t *, -+ IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t * module_options); -+ void (*close)(aiori_fd_t *, aiori_mod_opt_t * module_options); -+ void (*delete)(char *, aiori_mod_opt_t * module_options); - char* (*get_version)(void); -- void (*fsync)(void *, IOR_param_t *); -- IOR_offset_t (*get_file_size)(IOR_param_t *, MPI_Comm, char *); -- int (*statfs) (const char *, ior_aiori_statfs_t *, IOR_param_t * param); -- int (*mkdir) (const char *path, mode_t mode, IOR_param_t * param); -- int (*rmdir) (const char *path, IOR_param_t * param); -- int (*access) (const char *path, int mode, IOR_param_t * param); -- int (*stat) (const char *path, struct stat *buf, IOR_param_t * param); -- void (*initialize)(void); /* called once per program before MPI is started */ -- void (*finalize)(void); /* called once per program after MPI is shutdown */ -- option_help * (*get_options)(void ** init_backend_options, void* init_values); /* initializes the backend options as well and returns the pointer to the option help structure */ -+ void (*fsync)(aiori_fd_t *, aiori_mod_opt_t * module_options); -+ IOR_offset_t (*get_file_size)(aiori_mod_opt_t * module_options, char * filename); -+ int (*statfs) (const char *, ior_aiori_statfs_t *, aiori_mod_opt_t * module_options); -+ int (*mkdir) (const char *path, mode_t mode, aiori_mod_opt_t * module_options); -+ int (*rmdir) (const char *path, aiori_mod_opt_t * module_options); -+ int (*access) (const char *path, int mode, aiori_mod_opt_t * module_options); -+ int (*stat) (const char *path, struct stat *buf, aiori_mod_opt_t * module_options); -+ void (*initialize)(aiori_mod_opt_t * options); /* called once per program before MPI is started */ -+ void (*finalize)(aiori_mod_opt_t * options); /* called once per program after MPI is shutdown */ -+ int (*rename) (const char *oldpath, const char *newpath, aiori_mod_opt_t * module_options); -+ option_help * (*get_options)(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t* init_values); /* initializes the backend options as well and returns the pointer to the option help structure */ -+ int (*check_params)(aiori_mod_opt_t *); /* check if the provided module_optionseters for the given test and the module options are correct, if they aren't print a message and exit(1) or return 1*/ -+ void (*sync)(aiori_mod_opt_t * ); /* synchronize every pending operation for this storage */ - bool enable_mdtest; -- int (*check_params)(IOR_param_t *); /* check if the provided parameters for the given test and the module options are correct, if they aren't print a message and exit(1) or return 1*/ -- void (*sync)(IOR_param_t * ); /* synchronize every pending operation for this storage */ - } ior_aiori_t; - - enum bench_type { -@@ -96,6 +121,7 @@ enum bench_type { - }; - - extern ior_aiori_t dummy_aiori; -+extern ior_aiori_t aio_aiori; - extern ior_aiori_t daos_aiori; - extern ior_aiori_t dfs_aiori; - extern ior_aiori_t hdf5_aiori; -@@ -104,16 +130,16 @@ extern ior_aiori_t ime_aiori; - extern ior_aiori_t mpiio_aiori; - extern ior_aiori_t ncmpi_aiori; - extern ior_aiori_t posix_aiori; -+extern ior_aiori_t pmdk_aiori; - extern ior_aiori_t mmap_aiori; --extern ior_aiori_t s3_aiori; -+extern ior_aiori_t S3_libS3_aiori; -+extern ior_aiori_t s3_4c_aiori; - extern ior_aiori_t s3_plus_aiori; - extern ior_aiori_t s3_emc_aiori; - extern ior_aiori_t rados_aiori; - extern ior_aiori_t cephfs_aiori; - extern ior_aiori_t gfarm_aiori; - --void aiori_initialize(IOR_test_t * tests); --void aiori_finalize(IOR_test_t * tests); - const ior_aiori_t *aiori_select (const char *api); - int aiori_count (void); - void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type); -@@ -125,25 +151,27 @@ const char *aiori_default (void); - - /* some generic POSIX-based backend calls */ - char * aiori_get_version (void); --int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param); --int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param); --int aiori_posix_rmdir (const char *path, IOR_param_t * param); --int aiori_posix_access (const char *path, int mode, IOR_param_t * param); --int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param); -- --void *POSIX_Create(char *testFileName, IOR_param_t * param); --int POSIX_Mknod(char *testFileName); --void *POSIX_Open(char *testFileName, IOR_param_t * param); --IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); --void POSIX_Delete(char *testFileName, IOR_param_t * param); --void POSIX_Close(void *fd, IOR_param_t * param); --option_help * POSIX_options(void ** init_backend_options, void * init_values); -- -- --/* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ --void MPIIO_Delete(char *testFileName, IOR_param_t * param); --IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, -- char *testFileName); --int MPIIO_Access(const char *, int, IOR_param_t *); -+int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, aiori_mod_opt_t * module_options); -+int aiori_posix_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * module_options); -+int aiori_posix_rmdir (const char *path, aiori_mod_opt_t * module_options); -+int aiori_posix_access (const char *path, int mode, aiori_mod_opt_t * module_options); -+int aiori_posix_stat (const char *path, struct stat *buf, aiori_mod_opt_t * module_options); -+ -+ -+/* NOTE: these MPI-IO pro are exported for reuse by HDF5/PNetCDF */ -+ -+typedef struct { /* if you change this datatype, e.g., adding more options, make sure that all depending modules are updated */ -+ int showHints; /* show hints */ -+ int useFileView; /* use MPI_File_set_view */ -+ int preallocate; /* preallocate file size */ -+ int useSharedFilePointer; /* use shared file pointer */ -+ int useStridedDatatype; /* put strided access into datatype */ -+ char * hintsFileName; /* full name for hints file */ -+} mpiio_options_t; -+ -+void MPIIO_Delete(char *testFileName, aiori_mod_opt_t * module_options); -+IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * options, char *testFileName); -+int MPIIO_Access(const char *, int, aiori_mod_opt_t * module_options); -+void MPIIO_xfer_hints(aiori_xfer_hint_t * params); - - #endif /* not _AIORI_H */ -diff --git a/src/ior-internal.h b/src/ior-internal.h -index 9cc8406..c0af544 100644 ---- a/src/ior-internal.h -+++ b/src/ior-internal.h -@@ -17,7 +17,6 @@ void PrintShortSummary(IOR_test_t * test); - void PrintLongSummaryAllTests(IOR_test_t *tests_head); - void PrintLongSummaryHeader(); - void PrintLongSummaryOneTest(IOR_test_t *test); --void DisplayFreespace(IOR_param_t * test); - void GetTestFileName(char *, IOR_param_t *); - void PrintRemoveTiming(double start, double finish, int rep); - void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, -@@ -26,8 +25,7 @@ void PrintTestEnds(); - void PrintTableHeader(); - /* End of ior-output */ - --IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank); --IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access); -+IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count); - - struct results { - double min; -diff --git a/src/ior-output.c b/src/ior-output.c -index 01136fb..3f75e46 100644 ---- a/src/ior-output.c -+++ b/src/ior-output.c -@@ -20,6 +20,8 @@ void PrintTableHeader(){ - fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "access bw(MiB/s) IOPS Latency(s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); - fprintf(out_resultfile, "------ --------- ---- ---------- ---------- --------- -------- -------- -------- -------- ----\n"); -+ }else if(outputFormat == OUTPUT_CSV){ -+ fprintf(out_resultfile, "access,bw(MiB/s),IOPS,Latency,block(KiB),xfer(KiB),open(s),wr/rd(s),close(s),total(s),numTasks,iter\n"); - } - } - -@@ -45,8 +47,6 @@ static void PrintKeyValStart(char * key){ - } - if(outputFormat == OUTPUT_JSON){ - fprintf(out_resultfile, "\"%s\": \"", key); -- }else if(outputFormat == OUTPUT_CSV){ -- - } - } - -@@ -84,7 +84,7 @@ static void PrintKeyVal(char * key, char * value){ - if(outputFormat == OUTPUT_JSON){ - fprintf(out_resultfile, "\"%s\": \"%s\"", key, value); - }else if(outputFormat == OUTPUT_CSV){ -- fprintf(out_resultfile, "%s", value); -+ fprintf(out_resultfile, "%s,", value); - } - } - -@@ -96,9 +96,13 @@ static void PrintKeyValDouble(char * key, double value){ - return; - } - if(outputFormat == OUTPUT_JSON){ -- fprintf(out_resultfile, "\"%s\": %.4f", key, value); -+ if(isinf(value)){ -+ fprintf(out_resultfile, "\"%s\": \"%.4f\"", key, value); -+ }else{ -+ fprintf(out_resultfile, "\"%s\": %.4f", key, value); -+ } - }else if(outputFormat == OUTPUT_CSV){ -- fprintf(out_resultfile, "%.4f", value); -+ fprintf(out_resultfile, "%.4f,", value); - } - } - -@@ -113,7 +117,7 @@ static void PrintKeyValInt(char * key, int64_t value){ - if(outputFormat == OUTPUT_JSON){ - fprintf(out_resultfile, "\"%s\": %lld", key, (long long) value); - }else if(outputFormat == OUTPUT_CSV){ -- fprintf(out_resultfile, "%lld", (long long) value); -+ fprintf(out_resultfile, "%lld,", (long long) value); - } - } - -@@ -203,14 +207,17 @@ void PrintRepeatEnd(){ - void PrintRepeatStart(){ - if (rank != 0) - return; -- if( outputFormat == OUTPUT_DEFAULT){ -+ if(outputFormat == OUTPUT_DEFAULT){ - return; - } - PrintArrayStart(); - } - - void PrintTestEnds(){ -- if (rank != 0 || verbose < VERBOSE_0) { -+ if (outputFormat == OUTPUT_CSV){ -+ return; -+ } -+ if (rank != 0 || verbose <= VERBOSE_0) { - PrintEndSection(); - return; - } -@@ -246,7 +253,21 @@ void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, do - PrintKeyValDouble("closeTime", diff_subset[2]); - PrintKeyValDouble("totalTime", totalTime); - PrintEndSection(); -+ }else if (outputFormat == OUTPUT_CSV){ -+ PrintKeyVal("access", access == WRITE ? "write" : "read"); -+ PrintKeyValDouble("bwMiB", bw / MEBIBYTE); -+ PrintKeyValDouble("iops", iops); -+ PrintKeyValDouble("latency", latency); -+ PrintKeyValDouble("blockKiB", (double)test->params.blockSize / KIBIBYTE); -+ PrintKeyValDouble("xferKiB", (double)test->params.transferSize / KIBIBYTE); -+ PrintKeyValDouble("openTime", diff_subset[0]); -+ PrintKeyValDouble("wrRdTime", diff_subset[1]); -+ PrintKeyValDouble("closeTime", diff_subset[2]); -+ PrintKeyValDouble("totalTime", totalTime); -+ PrintKeyValInt("Numtasks", test->params.numTasks); -+ fprintf(out_resultfile, "%d\n", rep); - } -+ - fflush(out_resultfile); - } - -@@ -258,6 +279,10 @@ void PrintHeader(int argc, char **argv) - if (rank != 0) - return; - -+ if (outputFormat == OUTPUT_CSV){ -+ return; -+ } -+ - PrintStartSection(); - if (outputFormat != OUTPUT_DEFAULT){ - PrintKeyVal("Version", META_VERSION); -@@ -272,7 +297,7 @@ void PrintHeader(int argc, char **argv) - } - PrintKeyValEnd(); - if (uname(&unamebuf) != 0) { -- EWARN("uname failed"); -+ WARN("uname failed"); - PrintKeyVal("Machine", "Unknown"); - } else { - PrintKeyValStart("Machine"); -@@ -284,23 +309,6 @@ void PrintHeader(int argc, char **argv) - } - PrintKeyValEnd(); - } -- --#ifdef _NO_MPI_TIMER -- if (verbose >= VERBOSE_2) -- fprintf(out_logfile, "Using unsynchronized POSIX timer\n"); --#else /* not _NO_MPI_TIMER */ -- if (MPI_WTIME_IS_GLOBAL) { -- if (verbose >= VERBOSE_2) -- fprintf(out_logfile, "Using synchronized MPI timer\n"); -- } else { -- if (verbose >= VERBOSE_2) -- fprintf(out_logfile, "Using unsynchronized MPI timer\n"); -- } --#endif /* _NO_MPI_TIMER */ -- if (verbose >= VERBOSE_1) { -- fprintf(out_logfile, "Start time skew across all tasks: %.02f sec\n", -- wall_clock_deviation); -- } - if (verbose >= VERBOSE_3) { /* show env */ - fprintf(out_logfile, "STARTING ENVIRON LOOP\n"); - for (i = 0; environ[i] != NULL; i++) { -@@ -319,14 +327,16 @@ void PrintHeader(int argc, char **argv) - */ - void ShowTestStart(IOR_param_t *test) - { -+ if (outputFormat == OUTPUT_CSV){ -+ return; -+ } - PrintStartSection(); - PrintKeyValInt("TestID", test->id); - PrintKeyVal("StartTime", CurrentTimeString()); -- /* if pvfs2:, then skip */ -- if (strcasecmp(test->api, "DFS") && -- Regex(test->testFileName, "^[a-z][a-z].*:") == 0) { -- DisplayFreespace(test); -- } -+ -+ char filename[MAX_PATHLEN]; -+ GetTestFileName(filename, test); -+ ShowFileSystemSize(filename, test->backend, test->backend_options); - - if (verbose >= VERBOSE_3 || outputFormat == OUTPUT_JSON) { - char* data_packets[] = {"g","t","o","i"}; -@@ -337,7 +347,6 @@ void ShowTestStart(IOR_param_t *test) - PrintKeyVal("api", test->api); - PrintKeyVal("platform", test->platform); - PrintKeyVal("testFileName", test->testFileName); -- PrintKeyVal("hintsFileName", test->hintsFileName); - PrintKeyValInt("deadlineForStonewall", test->deadlineForStonewalling); - PrintKeyValInt("stoneWallingWearOut", test->stoneWallingWearOut); - PrintKeyValInt("maxTimeDuration", test->maxTimeDuration); -@@ -355,9 +364,7 @@ void ShowTestStart(IOR_param_t *test) - PrintKeyValInt("fsync", test->fsync); - PrintKeyValInt("fsyncperwrite", test->fsyncPerWrite); - PrintKeyValInt("useExistingTestFile", test->useExistingTestFile); -- PrintKeyValInt("showHints", test->showHints); - PrintKeyValInt("uniqueDir", test->uniqueDir); -- PrintKeyValInt("individualDataSets", test->individualDataSets); - PrintKeyValInt("singleXferAttempt", test->singleXferAttempt); - PrintKeyValInt("readFile", test->readFile); - PrintKeyValInt("writeFile", test->writeFile); -@@ -368,24 +375,19 @@ void ShowTestStart(IOR_param_t *test) - PrintKeyValInt("randomOffset", test->randomOffset); - PrintKeyValInt("checkWrite", test->checkWrite); - PrintKeyValInt("checkRead", test->checkRead); -- PrintKeyValInt("preallocate", test->preallocate); -- PrintKeyValInt("useFileView", test->useFileView); -- PrintKeyValInt("setAlignment", test->setAlignment); -- PrintKeyValInt("storeFileOffset", test->storeFileOffset); -- PrintKeyValInt("useSharedFilePointer", test->useSharedFilePointer); -- PrintKeyValInt("useStridedDatatype", test->useStridedDatatype); -+ PrintKeyValInt("dataPacketType", test->dataPacketType); - PrintKeyValInt("keepFile", test->keepFile); - PrintKeyValInt("keepFileWithError", test->keepFileWithError); -- PrintKeyValInt("quitOnError", test->quitOnError); -+ PrintKeyValInt("warningAsErrors", test->warningAsErrors); - PrintKeyValInt("verbose", verbose); - PrintKeyVal("data packet type", data_packets[test->dataPacketType]); - PrintKeyValInt("setTimeStampSignature/incompressibleSeed", test->setTimeStampSignature); /* Seed value was copied into setTimeStampSignature as well */ - PrintKeyValInt("collective", test->collective); - PrintKeyValInt("segmentCount", test->segmentCount); -- #ifdef HAVE_GPFS_FCNTL_H -- PrintKeyValInt("gpfsHintAccess", test->gpfs_hint_access); -- PrintKeyValInt("gpfsReleaseToken", test->gpfs_release_token); -- #endif -+ //#ifdef HAVE_GPFS_FCNTL_H -+ //PrintKeyValInt("gpfsHintAccess", test->gpfs_hint_access); -+ //PrintKeyValInt("gpfsReleaseToken", test->gpfs_release_token); -+ //#endif - PrintKeyValInt("transferSize", test->transferSize); - PrintKeyValInt("blockSize", test->blockSize); - PrintEndSection(); -@@ -412,6 +414,9 @@ void ShowTestEnd(IOR_test_t *tptr){ - */ - void ShowSetup(IOR_param_t *params) - { -+ if (outputFormat == OUTPUT_CSV){ -+ return; -+ } - if (params->debug) { - fprintf(out_logfile, "\n*** DEBUG MODE ***\n"); - fprintf(out_logfile, "*** %s ***\n\n", params->debug); -@@ -452,14 +457,10 @@ void ShowSetup(IOR_param_t *params) - if(params->dryRun){ - PrintKeyValInt("dryRun", params->dryRun); - } -- --#ifdef HAVE_LUSTRE_LUSTRE_USER_H -- if (params->lustre_set_striping) { -- PrintKeyVal("Lustre stripe size", ((params->lustre_stripe_size == 0) ? "Use default" : -- HumanReadable(params->lustre_stripe_size, BASE_TWO))); -- PrintKeyValInt("Lustre stripe count", params->lustre_stripe_count); -+ if(params->verbose) { -+ PrintKeyValInt("verbose", params->verbose); - } --#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ -+ - if (params->deadlineForStonewalling > 0) { - PrintKeyValInt("stonewallingTime", params->deadlineForStonewalling); - PrintKeyValInt("stoneWallingWearOut", params->stoneWallingWearOut ); -@@ -535,7 +536,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) - struct results *ops; - - int reps; -- if (rank != 0 || verbose < VERBOSE_0) -+ if (rank != 0 || verbose <= VERBOSE_0) - return; - - reps = params->repetitions; -@@ -609,9 +610,6 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) - PrintKeyValInt("taskPerNodeOffset", params->taskPerNodeOffset); - PrintKeyValInt("reorderTasksRandom", params->reorderTasksRandom); - PrintKeyValInt("reorderTasksRandomSeed", params->reorderTasksRandomSeed); -- PrintKeyValInt("segmentCount", params->segmentCount); -- PrintKeyValInt("blockSize", params->blockSize); -- PrintKeyValInt("transferSize", params->transferSize); - PrintKeyValDouble("bwMaxMIB", bw->max / MEBIBYTE); - PrintKeyValDouble("bwMinMIB", bw->min / MEBIBYTE); - PrintKeyValDouble("bwMeanMIB", bw->mean / MEBIBYTE); -@@ -627,8 +625,6 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) - } - PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE); - PrintEndSection(); -- }else if (outputFormat == OUTPUT_CSV){ -- - } - - fflush(out_resultfile); -@@ -650,10 +646,10 @@ void PrintLongSummaryOneTest(IOR_test_t *test) - - void PrintLongSummaryHeader() - { -- if (rank != 0 || verbose < VERBOSE_0) -+ if (rank != 0 || verbose <= VERBOSE_0) - return; - if(outputFormat != OUTPUT_DEFAULT){ -- return; -+ return; - } - - fprintf(out_resultfile, "\n"); -@@ -670,7 +666,7 @@ void PrintLongSummaryHeader() - void PrintLongSummaryAllTests(IOR_test_t *tests_head) - { - IOR_test_t *tptr; -- if (rank != 0 || verbose < VERBOSE_0) -+ if (rank != 0 || verbose <= VERBOSE_0) - return; - - PrintArrayEnd(); -@@ -680,8 +676,6 @@ void PrintLongSummaryAllTests(IOR_test_t *tests_head) - fprintf(out_resultfile, "Summary of all tests:"); - }else if (outputFormat == OUTPUT_JSON){ - PrintNamedArrayStart("summary"); -- }else if (outputFormat == OUTPUT_CSV){ -- - } - - PrintLongSummaryHeader(); -@@ -703,7 +697,7 @@ void PrintShortSummary(IOR_test_t * test) - int reps; - int i; - -- if (rank != 0 || verbose < VERBOSE_0) -+ if (rank != 0 || verbose <= VERBOSE_0) - return; - - PrintArrayEnd(); -@@ -740,41 +734,9 @@ void PrintShortSummary(IOR_test_t * test) - } - } - -- --/* -- * Display freespace (df). -- */ --void DisplayFreespace(IOR_param_t * test) --{ -- char fileName[MAX_STR] = { 0 }; -- int i; -- int directoryFound = FALSE; -- -- /* get outfile name */ -- GetTestFileName(fileName, test); -- -- /* get directory for outfile */ -- i = strlen(fileName); -- while (i-- > 0) { -- if (fileName[i] == '/') { -- fileName[i] = '\0'; -- directoryFound = TRUE; -- break; -- } -- } -- -- /* if no directory/, use '.' */ -- if (directoryFound == FALSE) { -- strcpy(fileName, "."); -- } -- -- ShowFileSystemSize(fileName); --} -- -- - void PrintRemoveTiming(double start, double finish, int rep) - { -- if (rank != 0 || verbose < VERBOSE_0) -+ if (rank != 0 || verbose <= VERBOSE_0) - return; - - if (outputFormat == OUTPUT_DEFAULT){ -diff --git a/src/ior.c b/src/ior.c -index 361a9a4..81ef0e6 100755 ---- a/src/ior.c -+++ b/src/ior.c -@@ -33,6 +33,10 @@ - # include /* uname() */ - #endif - -+#ifdef HAVE_CUDA -+#include -+#endif -+ - #include - - #include "ior.h" -@@ -51,42 +55,120 @@ static const ior_aiori_t *backend; - static void DestroyTests(IOR_test_t *tests_head); - static char *PrependDir(IOR_param_t *, char *); - static char **ParseFileName(char *, int *); --static void InitTests(IOR_test_t * , MPI_Comm); -+static void InitTests(IOR_test_t *); - static void TestIoSys(IOR_test_t *); --static void ValidateTests(IOR_param_t *); -+static void ValidateTests(IOR_param_t * params, MPI_Comm com); - static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, -- void *fd, const int access, -+ aiori_fd_t *fd, const int access, - IOR_io_buffers *ioBuffers); - -+static void ior_set_xfer_hints(IOR_param_t * p){ -+ aiori_xfer_hint_t * hints = & p->hints; -+ hints->dryRun = p->dryRun; -+ hints->filePerProc = p->filePerProc; -+ hints->collective = p->collective; -+ hints->numTasks = p->numTasks; -+ hints->numNodes = p->numNodes; -+ hints->randomOffset = p->randomOffset; -+ hints->fsyncPerWrite = p->fsyncPerWrite; -+ hints->segmentCount = p->segmentCount; -+ hints->blockSize = p->blockSize; -+ hints->transferSize = p->transferSize; -+ hints->expectedAggFileSize = p->expectedAggFileSize; -+ hints->singleXferAttempt = p->singleXferAttempt; -+ -+ if(backend->xfer_hints){ -+ backend->xfer_hints(hints); -+ } -+} -+ -+int aiori_warning_as_errors = 0; -+ -+/* -+ Returns 1 if the process participates in the test -+ */ -+static int test_initialize(IOR_test_t * test){ -+ int range[3]; -+ IOR_param_t *params = &test->params; -+ MPI_Group orig_group, new_group; -+ -+ /* set up communicator for test */ -+ MPI_CHECK(MPI_Comm_group(params->mpi_comm_world, &orig_group), -+ "MPI_Comm_group() error"); -+ range[0] = 0; /* first rank */ -+ range[1] = params->numTasks - 1; /* last rank */ -+ range[2] = 1; /* stride */ -+ MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group), -+ "MPI_Group_range_incl() error"); -+ MPI_CHECK(MPI_Comm_create(params->mpi_comm_world, new_group, & params->testComm), -+ "MPI_Comm_create() error"); -+ MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error"); -+ MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error"); -+ -+ -+ if (params->testComm == MPI_COMM_NULL) { -+ /* tasks not in the group do not participate in this test, this matches the proceses in test_finalize() that participate */ -+ MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); -+ return 0; -+ } -+ -+ /* Setup global variables */ -+ testComm = params->testComm; -+ verbose = test->params.verbose; -+ backend = test->params.backend; -+ -+#ifdef HAVE_CUDA -+ cudaError_t cret = cudaSetDevice(test->params.gpuID); -+ if(cret != cudaSuccess){ -+ WARNF("cudaSetDevice(%d) error: %s", test->params.gpuID, cudaGetErrorString(cret)); -+ } -+#endif -+ -+ if(backend->initialize){ -+ backend->initialize(test->params.backend_options); -+ } -+ ior_set_xfer_hints(& test->params); -+ aiori_warning_as_errors = test->params.warningAsErrors; -+ -+ if (rank == 0 && verbose >= VERBOSE_0) { -+ ShowTestStart(& test->params); -+ } -+ return 1; -+} -+ -+static void test_finalize(IOR_test_t * test){ -+ backend = test->params.backend; -+ if(backend->finalize){ -+ backend->finalize(test->params.backend_options); -+ } -+ MPI_CHECK(MPI_Barrier(test->params.mpi_comm_world), "barrier error"); -+ MPI_CHECK(MPI_Comm_free(& testComm), "MPI_Comm_free() error"); -+} -+ -+ - IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out){ - IOR_test_t *tests_head; - IOR_test_t *tptr; - out_logfile = world_out; - out_resultfile = world_out; -- mpi_comm_world = world_com; - -- MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); -+ MPI_CHECK(MPI_Comm_rank(world_com, &rank), "cannot get rank"); - - /* setup tests, and validate parameters */ -- tests_head = ParseCommandLine(argc, argv); -- InitTests(tests_head, world_com); -- verbose = tests_head->params.verbose; -+ tests_head = ParseCommandLine(argc, argv, world_com); -+ InitTests(tests_head); - - PrintHeader(argc, argv); - - /* perform each test */ - for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { -- aiori_initialize(tptr); -+ int participate = test_initialize(tptr); -+ if( ! participate ) continue; - totalErrorCount = 0; -- verbose = tptr->params.verbose; -- backend = tptr->params.backend; -- if (rank == 0 && verbose >= VERBOSE_0) { -- ShowTestStart(&tptr->params); -- } - TestIoSys(tptr); - tptr->results->errors = totalErrorCount; - ShowTestEnd(tptr); -- aiori_finalize(tptr); -+ test_finalize(tptr); - } - - PrintLongSummaryAllTests(tests_head); -@@ -109,34 +191,26 @@ int ior_main(int argc, char **argv) - /* - * check -h option from commandline without starting MPI; - */ -- tests_head = ParseCommandLine(argc, argv); -+ tests_head = ParseCommandLine(argc, argv, MPI_COMM_WORLD); - - /* start the MPI code */ - MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI"); - -- mpi_comm_world = MPI_COMM_WORLD; -- MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); -+ MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &rank), "cannot get rank"); - - /* set error-handling */ - /*MPI_CHECK(MPI_Errhandler_set(mpi_comm_world, MPI_ERRORS_RETURN), - "cannot set errhandler"); */ - - /* setup tests, and validate parameters */ -- InitTests(tests_head, mpi_comm_world); -- verbose = tests_head->params.verbose; -- -- aiori_initialize(tests_head); // this is quite suspicious, likely an error when multiple tests need to be executed with different backends and options -+ InitTests(tests_head); - - PrintHeader(argc, argv); - - /* perform each test */ - for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { -- verbose = tptr->params.verbose; -- backend = tptr->params.backend; -- if (rank == 0 && verbose >= VERBOSE_0) { -- backend = tptr->params.backend; -- ShowTestStart(&tptr->params); -- } -+ int participate = test_initialize(tptr); -+ if( ! participate ) continue; - - // This is useful for trapping a running MPI process. While - // this is sleeping, run the script 'testing/hdfs/gdb.attach' -@@ -148,18 +222,17 @@ int ior_main(int argc, char **argv) - - TestIoSys(tptr); - ShowTestEnd(tptr); -+ test_finalize(tptr); - } - -- if (verbose < 0) -+ if (verbose <= VERBOSE_0) - /* always print final summary */ -- verbose = 0; -+ verbose = VERBOSE_1; - PrintLongSummaryAllTests(tests_head); - - /* display finish time */ - PrintTestEnds(); - -- aiori_finalize(tests_head); -- - MPI_CHECK(MPI_Finalize(), "cannot finalize MPI"); - - DestroyTests(tests_head); -@@ -172,18 +245,12 @@ int ior_main(int argc, char **argv) - /* - * Initialize an IOR_param_t structure to the defaults - */ --void init_IOR_Param_t(IOR_param_t * p) -+void init_IOR_Param_t(IOR_param_t * p, MPI_Comm com) - { - const char *default_aiori = aiori_default (); -- char *hdfs_user; -- - assert (NULL != default_aiori); - - memset(p, 0, sizeof(IOR_param_t)); -- -- p->mode = IOR_IRUSR | IOR_IWUSR | IOR_IRGRP | IOR_IWGRP; -- p->openFlags = IOR_RDWR | IOR_CREAT; -- - p->api = strdup(default_aiori); - p->platform = strdup("HOST(OSTYPE)"); - p->testFileName = strdup("testFile"); -@@ -208,25 +275,10 @@ void init_IOR_Param_t(IOR_param_t * p) - p->transferSize = 262144; - p->randomSeed = -1; - p->incompressibleSeed = 573; -- p->testComm = mpi_comm_world; -- p->setAlignment = 1; -- p->lustre_start_ost = -1; -- -- hdfs_user = getenv("USER"); -- if (!hdfs_user) -- hdfs_user = ""; -- p->hdfs_user = strdup(hdfs_user); -- p->hdfs_name_node = "default"; -- p->hdfs_name_node_port = 0; /* ??? */ -- p->hdfs_fs = NULL; -- p->hdfs_replicas = 0; /* invokes the default */ -- p->hdfs_block_size = 0; -+ p->testComm = com; // this com might change for smaller tests -+ p->mpi_comm_world = com; - - p->URI = NULL; -- p->part_number = 0; -- -- p->beegfs_numTargets = -1; -- p->beegfs_chunkSize = -1; - } - - static void -@@ -238,7 +290,7 @@ DisplayOutliers(int numTasks, - double sum, mean, sqrDiff, var, sd; - - /* for local timerVal, don't compensate for wall clock delta */ -- timerVal += wall_clock_delta; -+ //timerVal += wall_clock_delta; - - MPI_CHECK(MPI_Allreduce - (&timerVal, &sum, 1, MPI_DOUBLE, MPI_SUM, testComm), -@@ -262,10 +314,8 @@ DisplayOutliers(int numTasks, - if (ret != 0) - strcpy(hostname, "unknown"); - -- fprintf(out_logfile, "WARNING: for %s, task %d, %s %s is %f\n", -- hostname, rank, accessString, timeString, timerVal); -- fprintf(out_logfile, " (mean=%f, stddev=%f)\n", mean, sd); -- fflush(out_logfile); -+ WARNF("for %s, task %d, %s %s is %f (mean=%f, stddev=%f)\n", -+ hostname, rank, accessString, timeString, timerVal, mean, sd); - } - } - -@@ -295,37 +345,54 @@ CheckForOutliers(IOR_param_t *test, const double *timer, const int access) - * Check if actual file size equals expected size; if not use actual for - * calculating performance rate. - */ --static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, -- const int access) -+static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t dataMoved, int rep, const int access) - { - IOR_param_t *params = &test->params; - IOR_results_t *results = test->results; - IOR_point_t *point = (access == WRITE) ? &results[rep].write : - &results[rep].read; - -+ /* get the size of the file */ -+ IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; -+ aggFileSizeFromStat = backend->get_file_size(params->backend_options, testFilename); -+ -+ if (params->hints.filePerProc == TRUE) { -+ MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, -+ MPI_LONG_LONG_INT, MPI_SUM, testComm), -+ "cannot reduce total data moved"); -+ aggFileSizeFromStat = tmpSum; -+ } else { -+ MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, -+ MPI_LONG_LONG_INT, MPI_MIN, testComm), -+ "cannot reduce total data moved"); -+ MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, -+ MPI_LONG_LONG_INT, MPI_MAX, testComm), -+ "cannot reduce total data moved"); -+ if (tmpMin != tmpMax) { -+ if (rank == 0) { -+ WARN("inconsistent file size by different tasks"); -+ } -+ /* incorrect, but now consistent across tasks */ -+ aggFileSizeFromStat = tmpMin; -+ } -+ } -+ point->aggFileSizeFromStat = aggFileSizeFromStat; -+ - MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer, - 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), - "cannot total data moved"); - -- if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0 && -- strcasecmp(params->api, "DAOS") != 0) { -+ if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { - if (verbose >= VERBOSE_0 && rank == 0) { - if ((params->expectedAggFileSize - != point->aggFileSizeFromXfer) - || (point->aggFileSizeFromStat - != point->aggFileSizeFromXfer)) { -- fprintf(out_logfile, -- "WARNING: Expected aggregate file size = %lld.\n", -- (long long) params->expectedAggFileSize); -- fprintf(out_logfile, -- "WARNING: Stat() of aggregate file size = %lld.\n", -- (long long) point->aggFileSizeFromStat); -- fprintf(out_logfile, -- "WARNING: Using actual aggregate bytes moved = %lld.\n", -- (long long) point->aggFileSizeFromXfer); -+ WARNF("Expected aggregate file size = %lld", (long long) params->expectedAggFileSize); -+ WARNF("Stat() of aggregate file size = %lld", (long long) point->aggFileSizeFromStat); -+ WARNF("Using actual aggregate bytes moved = %lld", (long long) point->aggFileSizeFromXfer); - if(params->deadlineForStonewalling){ -- fprintf(out_logfile, -- "WARNING: maybe caused by deadlineForStonewalling\n"); -+ WARN("Maybe caused by deadlineForStonewalling"); - } - } - } -@@ -339,101 +406,10 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, - * difference in buffers and returns total errors counted. - */ - static size_t --CompareBuffers(void *expectedBuffer, -- void *unknownBuffer, -- size_t size, -- IOR_offset_t transferCount, IOR_param_t *test, int access) -+CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_param_t *test, IOR_offset_t offset, int fillrank, int access) - { -- char testFileName[MAX_PATHLEN]; -- char bufferLabel1[MAX_STR]; -- char bufferLabel2[MAX_STR]; -- size_t i, j, length, first, last; -- size_t errorCount = 0; -- int inError = 0; -- unsigned long long *goodbuf = (unsigned long long *)expectedBuffer; -- unsigned long long *testbuf = (unsigned long long *)unknownBuffer; -- -- if (access == WRITECHECK || access == READCHECK) { -- strcpy(bufferLabel1, "Expected: "); -- strcpy(bufferLabel2, "Actual: "); -- } else { -- ERR("incorrect argument for CompareBuffers()"); -- } -- -- length = size / sizeof(IOR_size_t); -- first = -1; -- if (verbose >= VERBOSE_3) { -- fprintf(out_logfile, -- "[%d] At file byte offset %lld, comparing %llu-byte transfer\n", -- rank, test->offset, (long long)size); -- } -- for (i = 0; i < length; i++) { -- if (testbuf[i] != goodbuf[i]) { -- errorCount++; -- if (verbose >= VERBOSE_2) { -- fprintf(out_logfile, -- "[%d] At transfer buffer #%lld, index #%lld (file byte offset %lld):\n", -- rank, transferCount - 1, (long long)i, -- test->offset + -- (IOR_size_t) (i * sizeof(IOR_size_t))); -- fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1); -- fprintf(out_logfile, "%016llx\n", goodbuf[i]); -- fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel2); -- fprintf(out_logfile, "%016llx\n", testbuf[i]); -- } -- if (!inError) { -- inError = 1; -- first = i; -- last = i; -- } else { -- last = i; -- } -- } else if (verbose >= VERBOSE_5 && i % 4 == 0) { -- fprintf(out_logfile, -- "[%d] PASSED offset = %lld bytes, transfer %lld\n", -- rank, -- ((i * sizeof(unsigned long long)) + -- test->offset), transferCount); -- fprintf(out_logfile, "[%d] GOOD %s0x", rank, bufferLabel1); -- for (j = 0; j < 4; j++) -- fprintf(out_logfile, "%016llx ", goodbuf[i + j]); -- fprintf(out_logfile, "\n[%d] GOOD %s0x", rank, bufferLabel2); -- for (j = 0; j < 4; j++) -- fprintf(out_logfile, "%016llx ", testbuf[i + j]); -- fprintf(out_logfile, "\n"); -- } -- } -- if (inError) { -- inError = 0; -- GetTestFileName(testFileName, test); -- fprintf(out_logfile, -- "[%d] FAILED comparison of buffer containing %d-byte ints:\n", -- rank, (int)sizeof(unsigned long long int)); -- fprintf(out_logfile, "[%d] File name = %s\n", rank, testFileName); -- fprintf(out_logfile, "[%d] In transfer %lld, ", rank, -- transferCount); -- fprintf(out_logfile, -- "%lld errors between buffer indices %lld and %lld.\n", -- (long long)errorCount, (long long)first, -- (long long)last); -- fprintf(out_logfile, "[%d] File byte offset = %lld:\n", rank, -- ((first * sizeof(unsigned long long)) + test->offset)); -- -- fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1); -- for (j = first; j < length && j < first + 4; j++) -- fprintf(out_logfile, "%016llx ", goodbuf[j]); -- if (j == length) -- fprintf(out_logfile, "[end of buffer]"); -- fprintf(out_logfile, "\n[%d] %s0x", rank, bufferLabel2); -- for (j = first; j < length && j < first + 4; j++) -- fprintf(out_logfile, "%016llx ", testbuf[j]); -- if (j == length) -- fprintf(out_logfile, "[end of buffer]"); -- fprintf(out_logfile, "\n"); -- if (test->quitOnError == TRUE) -- ERR("data check error, aborting execution"); -- } -- return (errorCount); -+ assert(access == WRITECHECK || access == READCHECK); -+ return verify_memory_pattern(offset, expectedBuffer, transferCount, test->setTimeStampSignature, fillrank, test->dataPacketType); - } - - /* -@@ -457,7 +433,7 @@ static int CountErrors(IOR_param_t * test, int access, int errors) - WARN("overflow in errors counted"); - allErrors = -1; - } -- fprintf(out_logfile, "WARNING: incorrect data on %s (%d errors found).\n", -+ WARNF("Incorrect data on %s (%d errors found).\n", - access == WRITECHECK ? "write" : "read", allErrors); - fprintf(out_logfile, - "Used Time Stamp %u (0x%x) for Data Signature\n", -@@ -468,44 +444,6 @@ static int CountErrors(IOR_param_t * test, int access, int errors) - return (allErrors); - } - --/* -- * Allocate a page-aligned (required by O_DIRECT) buffer. -- */ --static void *aligned_buffer_alloc(size_t size) --{ -- size_t pageMask; -- char *buf, *tmp; -- char *aligned; -- --#ifdef HAVE_SYSCONF -- long pageSize = sysconf(_SC_PAGESIZE); --#else -- size_t pageSize = getpagesize(); --#endif -- -- pageMask = pageSize - 1; -- buf = malloc(size + pageSize + sizeof(void *)); -- if (buf == NULL) -- ERR("out of memory"); -- /* find the alinged buffer */ -- tmp = buf + sizeof(char *); -- aligned = tmp + pageSize - ((size_t) tmp & pageMask); -- /* write a pointer to the original malloc()ed buffer into the bytes -- preceding "aligned", so that the aligned buffer can later be free()ed */ -- tmp = aligned - sizeof(void *); -- *(void **)tmp = buf; -- -- return (void *)aligned; --} -- --/* -- * Free a buffer allocated by aligned_buffer_alloc(). -- */ --static void aligned_buffer_free(void *buf) --{ -- free(*(void **)((char *)buf - sizeof(char *))); --} -- - void AllocResults(IOR_test_t *test) - { - int reps; -@@ -562,7 +500,7 @@ static void DestroyTests(IOR_test_t *tests_head) - /* - * Distribute IOR_HINTs to all tasks' environments. - */ --void DistributeHints(void) -+static void DistributeHints(MPI_Comm com) - { - char hint[MAX_HINTS][MAX_STR], fullHint[MAX_STR], hintVariable[MAX_STR]; - int hintCount = 0, i; -@@ -584,11 +522,9 @@ void DistributeHints(void) - } - } - -- MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, -- 0, MPI_COMM_WORLD), "cannot broadcast hints"); -+ MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, 0, com), "cannot broadcast hints"); - for (i = 0; i < hintCount; i++) { -- MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, -- 0, MPI_COMM_WORLD), -+ MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, 0, com), - "cannot broadcast hints"); - strcpy(fullHint, hint[i]); - strcpy(hintVariable, strtok(fullHint, "=")); -@@ -600,64 +536,6 @@ void DistributeHints(void) - } - } - --/* -- * Fill buffer, which is transfer size bytes long, with known 8-byte long long -- * int values. In even-numbered 8-byte long long ints, store MPI task in high -- * bits and timestamp signature in low bits. In odd-numbered 8-byte long long -- * ints, store transfer offset. If storeFileOffset option is used, the file -- * (not transfer) offset is stored instead. -- */ -- --static void --FillIncompressibleBuffer(void* buffer, IOR_param_t * test) -- --{ -- size_t i; -- unsigned long long hi, lo; -- unsigned long long *buf = (unsigned long long *)buffer; -- -- for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) { -- hi = ((unsigned long long) rand_r(&test->incompressibleSeed) << 32); -- lo = (unsigned long long) rand_r(&test->incompressibleSeed); -- buf[i] = hi | lo; -- } --} -- --unsigned int reseed_incompressible_prng = TRUE; -- --static void --FillBuffer(void *buffer, -- IOR_param_t * test, unsigned long long offset, int fillrank) --{ -- size_t i; -- unsigned long long hi, lo; -- unsigned long long *buf = (unsigned long long *)buffer; -- -- if(test->dataPacketType == incompressible ) { /* Make for some non compressable buffers with randomish data */ -- -- /* In order for write checks to work, we have to restart the psuedo random sequence */ -- if(reseed_incompressible_prng == TRUE) { -- test->incompressibleSeed = test->setTimeStampSignature + rank; /* We copied seed into timestampSignature at initialization, also add the rank to add randomness between processes */ -- reseed_incompressible_prng = FALSE; -- } -- FillIncompressibleBuffer(buffer, test); -- } -- -- else { -- hi = ((unsigned long long)fillrank) << 32; -- lo = (unsigned long long)test->timeStampSignatureValue; -- for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) { -- if ((i % 2) == 0) { -- /* evens contain MPI rank and time in seconds */ -- buf[i] = hi | lo; -- } else { -- /* odds contain offset */ -- buf[i] = offset + (i * sizeof(unsigned long long)); -- } -- } -- } --} -- - /* - * Return string describing machine name and type. - */ -@@ -668,7 +546,7 @@ char * GetPlatformName() - struct utsname name; - - if (uname(&name) != 0) { -- EWARN("cannot get platform name"); -+ WARN("cannot get platform name"); - sprintf(sysName, "%s", "Unknown"); - sprintf(nodeName, "%s", "Unknown"); - } else { -@@ -752,10 +630,16 @@ void GetTestFileName(char *testFileName, IOR_param_t * test) - char initialTestFileName[MAX_PATHLEN]; - char testFileNameRoot[MAX_STR]; - char tmpString[MAX_STR]; -- int count; -+ int count; -+ int socket, core; - - /* parse filename for multiple file systems */ - strcpy(initialTestFileName, test->testFileName); -+ if(test->dualMount){ -+ GetProcessorAndCore(&socket, &core); -+ sprintf(tmpString, "%s%d/%s",initialTestFileName, socket, "data"); -+ strcpy(initialTestFileName, tmpString); -+ } - fileNames = ParseFileName(initialTestFileName, &count); - if (count > 1 && test->uniqueDir == TRUE) - ERR("cannot use multiple file names with unique directories"); -@@ -824,15 +708,15 @@ static char *PrependDir(IOR_param_t * test, char *rootDir) - sprintf(dir + i + 1, "%d", (rank + rankOffset) % test->numTasks); - - /* dir doesn't exist, so create */ -- if (backend->access(dir, F_OK, test) != 0) { -- if (backend->mkdir(dir, S_IRWXU, test) < 0) { -+ if (backend->access(dir, F_OK, test->backend_options) != 0) { -+ if (backend->mkdir(dir, S_IRWXU, test->backend_options) < 0) { - ERRF("cannot create directory: %s", dir); - } - - /* check if correct permissions */ -- } else if (backend->access(dir, R_OK, test) != 0 || -- backend->access(dir, W_OK, test) != 0 || -- backend->access(dir, X_OK, test) != 0) { -+ } else if (backend->access(dir, R_OK, test->backend_options) != 0 || -+ backend->access(dir, W_OK, test->backend_options) != 0 || -+ backend->access(dir, X_OK, test->backend_options) != 0) { - ERRF("invalid directory permissions: %s", dir); - } - -@@ -919,24 +803,24 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) - rankOffset = 0; - GetTestFileName(testFileName, test); - } -- if (backend->access(testFileName, F_OK, test) == 0) { -+ if (backend->access(testFileName, F_OK, test->backend_options) == 0) { - if (verbose >= VERBOSE_3) { - fprintf(out_logfile, "task %d removing %s\n", rank, - testFileName); - } -- backend->delete(testFileName, test); -+ backend->delete(testFileName, test->backend_options); - } - if (test->reorderTasksRandom == TRUE) { - rankOffset = tmpRankOffset; - GetTestFileName(testFileName, test); - } - } else { -- if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0)) { -+ if ((rank == 0) && (backend->access(testFileName, F_OK, test->backend_options) == 0)) { - if (verbose >= VERBOSE_3) { - fprintf(out_logfile, "task %d removing %s\n", rank, - testFileName); - } -- backend->delete(testFileName, test); -+ backend->delete(testFileName, test->backend_options); - } - } - } -@@ -945,12 +829,19 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) - * Setup tests by parsing commandline and creating test script. - * Perform a sanity-check on the configured parameters. - */ --static void InitTests(IOR_test_t *tests, MPI_Comm com) -+static void InitTests(IOR_test_t *tests) - { -+ if(tests == NULL){ -+ return; -+ } -+ MPI_Comm com = tests->params.mpi_comm_world; - int mpiNumNodes = 0; - int mpiNumTasks = 0; - int mpiNumTasksOnNode0 = 0; - -+ verbose = tests->params.verbose; -+ aiori_warning_as_errors = tests->params.warningAsErrors; -+ - /* - * These default values are the same for every test and expensive to - * retrieve so just do it once. -@@ -964,7 +855,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) - * task 0 has the environment settings for the hints, pass - * the hint=value pair to everyone else in mpi_comm_world - */ -- DistributeHints(); -+ DistributeHints(com); - - /* check validity of tests and create test queue */ - while (tests != NULL) { -@@ -979,11 +870,9 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) - params->numTasks = mpiNumTasks; - } else if (params->numTasks > mpiNumTasks) { - if (rank == 0) { -- fprintf(out_logfile, -- "WARNING: More tasks requested (%d) than available (%d),", -+ WARNF("More tasks requested (%d) than available (%d),", - params->numTasks, mpiNumTasks); -- fprintf(out_logfile, " running with %d tasks.\n", -- mpiNumTasks); -+ WARNF(" running with %d tasks.\n", mpiNumTasks); - } - params->numTasks = mpiNumTasks; - } -@@ -995,14 +884,11 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) - params->expectedAggFileSize = - params->blockSize * params->segmentCount * params->numTasks; - -- ValidateTests(&tests->params); -+ ValidateTests(&tests->params, com); - tests = tests->next; - } - -- init_clock(); -- -- /* seed random number generator */ -- SeedRandGen(mpi_comm_world); -+ init_clock(com); - } - - /* -@@ -1011,16 +897,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) - static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, - int pretendRank) - { -- ioBuffers->buffer = aligned_buffer_alloc(test->transferSize); -- -- if (test->checkWrite || test->checkRead) { -- ioBuffers->checkBuffer = aligned_buffer_alloc(test->transferSize); -- } -- if (test->checkRead || test->checkWrite) { -- ioBuffers->readCheckBuffer = aligned_buffer_alloc(test->transferSize); -- } -- -- return; -+ ioBuffers->buffer = aligned_buffer_alloc(test->transferSize, test->gpuMemoryFlags); - } - - /* -@@ -1029,16 +906,7 @@ static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, - static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test) - - { -- aligned_buffer_free(ioBuffers->buffer); -- -- if (test->checkWrite || test->checkRead) { -- aligned_buffer_free(ioBuffers->checkBuffer); -- } -- if (test->checkRead) { -- aligned_buffer_free(ioBuffers->readCheckBuffer); -- } -- -- return; -+ aligned_buffer_free(ioBuffers->buffer, test->gpuMemoryFlags); - } - - -@@ -1083,7 +951,7 @@ static void file_hits_histogram(IOR_param_t *params) - } - - MPI_CHECK(MPI_Gather(&rankOffset, 1, MPI_INT, rankoffs, -- 1, MPI_INT, 0, mpi_comm_world), -+ 1, MPI_INT, 0, params->testComm), - "MPI_Gather error"); - - if (rank != 0) -@@ -1219,6 +1087,55 @@ WriteTimes(IOR_param_t *test, const double *timer, const int iteration, - timerName); - } - } -+ -+static void StoreRankInformation(IOR_test_t *test, double *timer, const int rep, const int access){ -+ IOR_param_t *params = &test->params; -+ double totalTime = timer[5] - timer[0]; -+ double accessTime = timer[3] - timer[2]; -+ double times[] = {totalTime, accessTime}; -+ -+ if(rank == 0){ -+ FILE* fd = fopen(params->saveRankDetailsCSV, "a"); -+ if (fd == NULL){ -+ FAIL("Cannot open saveRankPerformanceDetailsCSV file for writes!"); -+ } -+ int size; -+ MPI_Comm_size(params->testComm, & size); -+ double *all_times = malloc(2* size * sizeof(double)); -+ MPI_Gather(times, 2, MPI_DOUBLE, all_times, 2, MPI_DOUBLE, 0, params->testComm); -+ IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : &test->results[rep].read; -+ double file_size = ((double) point->aggFileSizeForBW) / size; -+ -+ for(int i=0; i < size; i++){ -+ char buff[1024]; -+ sprintf(buff, "%s,%d,%.10e,%.10e,%.10e,%.10e\n", access==WRITE ? "write" : "read", i, all_times[i*2], all_times[i*2+1], file_size/all_times[i*2], file_size/all_times[i*2+1] ); -+ int ret = fwrite(buff, strlen(buff), 1, fd); -+ if(ret != 1){ -+ WARN("Couln't append to saveRankPerformanceDetailsCSV file\n"); -+ break; -+ } -+ } -+ fclose(fd); -+ }else{ -+ MPI_Gather(& times, 2, MPI_DOUBLE, NULL, 2, MPI_DOUBLE, 0, testComm); -+ } -+} -+ -+static void ProcessIterResults(IOR_test_t *test, double *timer, const int rep, const int access){ -+ IOR_param_t *params = &test->params; -+ -+ if (verbose >= VERBOSE_3) -+ WriteTimes(params, timer, rep, access); -+ ReduceIterResults(test, timer, rep, access); -+ if (params->outlierThreshold) { -+ CheckForOutliers(params, timer, access); -+ } -+ -+ if(params->saveRankDetailsCSV){ -+ StoreRankInformation(test, timer, rep, access); -+ } -+} -+ - /* - * Using the test parameters, run iteration(s) of single test. - */ -@@ -1231,33 +1148,13 @@ static void TestIoSys(IOR_test_t *test) - double startTime; - int pretendRank; - int rep; -- void *fd; -- MPI_Group orig_group, new_group; -- int range[3]; -+ aiori_fd_t *fd; - IOR_offset_t dataMoved; /* for data rate calculation */ - void *hog_buf; - IOR_io_buffers ioBuffers; - -- /* set up communicator for test */ -- MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group), -- "MPI_Comm_group() error"); -- range[0] = 0; /* first rank */ -- range[1] = params->numTasks - 1; /* last rank */ -- range[2] = 1; /* stride */ -- MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group), -- "MPI_Group_range_incl() error"); -- MPI_CHECK(MPI_Comm_create(mpi_comm_world, new_group, &testComm), -- "MPI_Comm_create() error"); -- MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error"); -- MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error"); -- params->testComm = testComm; -- if (testComm == MPI_COMM_NULL) { -- /* tasks not in the group do not participate in this test */ -- MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); -- return; -- } - if (rank == 0 && verbose >= VERBOSE_1) { -- fprintf(out_logfile, "Participating tasks: %d\n", params->numTasks); -+ fprintf(out_logfile, "Participating tasks : %d\n", params->numTasks); - fflush(out_logfile); - } - if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) { -@@ -1279,15 +1176,24 @@ static void TestIoSys(IOR_test_t *test) - params->timeStampSignatureValue = (unsigned int) params->setTimeStampSignature; - } - XferBuffersSetup(&ioBuffers, params, pretendRank); -- reseed_incompressible_prng = TRUE; // reset pseudo random generator, necessary to guarantee the next call to FillBuffer produces the same value as it is right now -- -+ - /* Initial time stamp */ - startTime = GetTimeStamp(); - - /* loop over test iterations */ - uint64_t params_saved_wearout = params->stoneWallingWearOutIterations; -+ -+ /* Check if the file exists and warn users */ -+ if((params->writeFile || params->checkWrite) && (params->hints.filePerProc || rank == 0)){ -+ struct stat sb; -+ GetTestFileName(testFileName, params); -+ int ret = backend->stat(testFileName, & sb, params->backend_options); -+ if(ret == 0) { -+ WARNF("The file \"%s\" exists already and will be overwritten", testFileName); -+ } -+ } -+ - for (rep = 0; rep < params->repetitions; rep++) { -- PrintRepeatStart(); - /* Get iteration start time in seconds in task 0 and broadcast to - all tasks */ - if (rank == 0) { -@@ -1297,13 +1203,13 @@ static void TestIoSys(IOR_test_t *test) - ERR("cannot get current time"); - } - params->timeStampSignatureValue = -- (unsigned int) currentTime; -- if (verbose >= VERBOSE_2) { -- fprintf(out_logfile, -- "Using Time Stamp %u (0x%x) for Data Signature\n", -- params->timeStampSignatureValue, -- params->timeStampSignatureValue); -- } -+ (unsigned int)currentTime; -+ } -+ if (verbose >= VERBOSE_2) { -+ fprintf(out_logfile, -+ "Using Time Stamp %u (0x%x) for Data Signature\n", -+ params->timeStampSignatureValue, -+ params->timeStampSignatureValue); - } - if (rep == 0 && verbose >= VERBOSE_0) { - PrintTableHeader(); -@@ -1313,7 +1219,8 @@ static void TestIoSys(IOR_test_t *test) - (¶ms->timeStampSignatureValue, 1, MPI_UNSIGNED, 0, - testComm), "cannot broadcast start time value"); - -- FillBuffer(ioBuffers.buffer, params, 0, pretendRank); -+ generate_memory_pattern((char*) ioBuffers.buffer, params->transferSize, params->setTimeStampSignature, pretendRank, params->dataPacketType); -+ - /* use repetition count for number of multiple files */ - if (params->multiFile) - params->repCounter = rep; -@@ -1338,7 +1245,8 @@ static void TestIoSys(IOR_test_t *test) - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - params->open = WRITE; - timer[0] = GetTimeStamp(); -- fd = backend->create(testFileName, params); -+ fd = backend->create(testFileName, IOR_WRONLY | IOR_CREAT | IOR_TRUNC, params->backend_options); -+ if(fd == NULL) FAIL("Cannot create file"); - timer[1] = GetTimeStamp(); - if (params->intraTestBarriers) - MPI_CHECK(MPI_Barrier(testComm), -@@ -1359,25 +1267,16 @@ static void TestIoSys(IOR_test_t *test) - MPI_CHECK(MPI_Barrier(testComm), - "barrier error"); - timer[4] = GetTimeStamp(); -- backend->close(fd, params); -+ backend->close(fd, params->backend_options); - - timer[5] = GetTimeStamp(); - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - -- /* get the size of the file just written */ -- results[rep].write.aggFileSizeFromStat = -- backend->get_file_size(params, testComm, testFileName); -- - /* check if stat() of file doesn't equal expected file size, - use actual amount of byte moved */ -- CheckFileSize(test, dataMoved, rep, WRITE); -+ CheckFileSize(test, testFileName, dataMoved, rep, WRITE); - -- if (verbose >= VERBOSE_3) -- WriteTimes(params, timer, rep, WRITE); -- ReduceIterResults(test, timer, rep, WRITE); -- if (params->outlierThreshold) { -- CheckForOutliers(params, timer, WRITE); -- } -+ ProcessIterResults(test, timer, rep, WRITE); - - /* check if in this round we run write with stonewalling */ - if(params->deadlineForStonewalling > 0){ -@@ -1404,17 +1303,13 @@ static void TestIoSys(IOR_test_t *test) - } - rankOffset = (2 * shift) % params->numTasks; - } -- -- // update the check buffer -- FillBuffer(ioBuffers.readCheckBuffer, params, 0, (rank + rankOffset) % params->numTasks); -- -- reseed_incompressible_prng = TRUE; /* Re-Seed the PRNG to get same sequence back, if random */ -- -+ - GetTestFileName(testFileName, params); - params->open = WRITECHECK; -- fd = backend->open(testFileName, params); -+ fd = backend->open(testFileName, IOR_RDONLY, params->backend_options); -+ if(fd == NULL) FAIL("Cannot open file"); - dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers); -- backend->close(fd, params); -+ backend->close(fd, params->backend_options); - rankOffset = 0; - } - /* -@@ -1423,9 +1318,9 @@ static void TestIoSys(IOR_test_t *test) - if ((params->readFile || params->checkRead ) && !test_time_elapsed(params, startTime)) { - /* check for stonewall */ - if(params->stoneWallingStatusFile){ -- params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile); -+ params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile, params->testComm); - if(params->stoneWallingWearOutIterations == -1 && rank == 0){ -- fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n"); -+ WARN("Could not read back the stonewalling status from the file!"); - params->stoneWallingWearOutIterations = 0; - } - } -@@ -1469,10 +1364,6 @@ static void TestIoSys(IOR_test_t *test) - file_hits_histogram(params); - } - } -- if(operation_flag == READCHECK){ -- FillBuffer(ioBuffers.readCheckBuffer, params, 0, (rank + rankOffset) % params->numTasks); -- } -- - /* Using globally passed rankOffset, following function generates testFileName to read */ - GetTestFileName(testFileName, params); - -@@ -1484,7 +1375,8 @@ static void TestIoSys(IOR_test_t *test) - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - params->open = READ; - timer[0] = GetTimeStamp(); -- fd = backend->open(testFileName, params); -+ fd = backend->open(testFileName, IOR_RDONLY, params->backend_options); -+ if(fd == NULL) FAIL("Cannot open file"); - timer[1] = GetTimeStamp(); - if (params->intraTestBarriers) - MPI_CHECK(MPI_Barrier(testComm), -@@ -1501,24 +1393,14 @@ static void TestIoSys(IOR_test_t *test) - MPI_CHECK(MPI_Barrier(testComm), - "barrier error"); - timer[4] = GetTimeStamp(); -- backend->close(fd, params); -+ backend->close(fd, params->backend_options); - timer[5] = GetTimeStamp(); - -- /* get the size of the file just read */ -- results[rep].read.aggFileSizeFromStat = -- backend->get_file_size(params, testComm, -- testFileName); -- - /* check if stat() of file doesn't equal expected file size, - use actual amount of byte moved */ -- CheckFileSize(test, dataMoved, rep, READ); -+ CheckFileSize(test, testFileName, dataMoved, rep, READ); - -- if (verbose >= VERBOSE_3) -- WriteTimes(params, timer, rep, READ); -- ReduceIterResults(test, timer, rep, READ); -- if (params->outlierThreshold) { -- CheckForOutliers(params, timer, READ); -- } -+ ProcessIterResults(test, timer, rep, READ); - } - - if (!params->keepFile -@@ -1536,10 +1418,8 @@ static void TestIoSys(IOR_test_t *test) - params->errorFound = FALSE; - rankOffset = 0; - -- PrintRepeatEnd(); - } -- -- MPI_CHECK(MPI_Comm_free(&testComm), "MPI_Comm_free() error"); -+ PrintRepeatEnd(); - - if (params->summary_every_test) { - PrintLongSummaryHeader(); -@@ -1552,20 +1432,24 @@ static void TestIoSys(IOR_test_t *test) - - if (hog_buf != NULL) - free(hog_buf); -- -- /* Sync with the tasks that did not participate in this test */ -- MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); -- - } - - /* - * Determine if valid tests from parameters. - */ --static void ValidateTests(IOR_param_t * test) -+static void ValidateTests(IOR_param_t * test, MPI_Comm com) - { - IOR_param_t defaults; -- init_IOR_Param_t(&defaults); -- -+ init_IOR_Param_t(&defaults, com); -+ -+ if (test->stoneWallingStatusFile && test->keepFile == 0) -+ ERR("a StoneWallingStatusFile is only sensible when splitting write/read into multiple executions of ior, please use -k"); -+ if (test->stoneWallingStatusFile && test->stoneWallingWearOut == 0 && test->writeFile) -+ ERR("the StoneWallingStatusFile is only sensible for a write test when using stoneWallingWearOut"); -+ if (test->deadlineForStonewalling == 0 && test->stoneWallingWearOut > 0) -+ ERR("the stoneWallingWearOut is only sensible when setting a stonewall deadline with -D"); -+ if (test->stoneWallingStatusFile && test->testscripts) -+ WARN("the StoneWallingStatusFile only preserves the last experiment, make sure that each run uses a separate status file!"); - if (test->repetitions <= 0) - WARN_RESET("too few test repetitions", - test, &defaults, repetitions); -@@ -1587,8 +1471,6 @@ static void ValidateTests(IOR_param_t * test) - ERR("block size must be non-negative integer"); - if ((test->transferSize % sizeof(IOR_size_t)) != 0) - ERR("transfer size must be a multiple of access size"); -- if (test->setAlignment < 0) -- ERR("alignment must be non-negative integer"); - if (test->transferSize < 0) - ERR("transfer size must be non-negative integer"); - if (test->transferSize == 0) { -@@ -1599,7 +1481,12 @@ static void ValidateTests(IOR_param_t * test) - } - if (test->blockSize < test->transferSize) - ERR("block size must not be smaller than transfer size"); -- -+ if (test->randomOffset && test->blockSize == test->transferSize) -+ ERR("IOR will randomize access within a block and repeats the same pattern for all segments, therefore choose blocksize > transferSize"); -+ if (! test->randomOffset && test->randomPrefillBlocksize) -+ ERR("Setting the randomPrefill option without using random is not useful"); -+ if (test->randomPrefillBlocksize && (test->blockSize % test->randomPrefillBlocksize != 0)) -+ ERR("The randomPrefill option must divide the blockSize"); - /* specific APIs */ - if ((strcasecmp(test->api, "MPIIO") == 0) - && (test->blockSize < sizeof(IOR_size_t) -@@ -1613,59 +1500,17 @@ static void ValidateTests(IOR_param_t * test) - && (test->blockSize < sizeof(IOR_size_t) - || test->transferSize < sizeof(IOR_size_t))) - ERR("block/transfer size may not be smaller than IOR_size_t for NCMPI"); -- if ((test->useFileView == TRUE) -- && (sizeof(MPI_Aint) < 8) /* used for 64-bit datatypes */ -- &&((test->numTasks * test->blockSize) > -- (2 * (IOR_offset_t) GIBIBYTE))) -- ERR("segment size must be < 2GiB"); -- if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt) -- WARN_RESET("retry only available in POSIX", -- test, &defaults, singleXferAttempt); - if (((strcasecmp(test->api, "POSIX") != 0) - && (strcasecmp(test->api, "MPIIO") != 0) - && (strcasecmp(test->api, "MMAP") != 0) - && (strcasecmp(test->api, "HDFS") != 0) - && (strcasecmp(test->api, "DFS") != 0) -- && (strcasecmp(test->api, "DAOS") != 0) - && (strcasecmp(test->api, "Gfarm") != 0) - && (strcasecmp(test->api, "RADOS") != 0) - && (strcasecmp(test->api, "CEPHFS") != 0)) && test->fsync) - WARN_RESET("fsync() not supported in selected backend", - test, &defaults, fsync); -- if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate) -- WARN_RESET("preallocation only available in MPIIO", -- test, &defaults, preallocate); -- if ((strcasecmp(test->api, "MPIIO") != 0) && test->useFileView) -- WARN_RESET("file view only available in MPIIO", -- test, &defaults, useFileView); -- if ((strcasecmp(test->api, "MPIIO") != 0) && test->useSharedFilePointer) -- WARN_RESET("shared file pointer only available in MPIIO", -- test, &defaults, useSharedFilePointer); -- if ((strcasecmp(test->api, "MPIIO") == 0) && test->useSharedFilePointer) -- WARN_RESET("shared file pointer not implemented", -- test, &defaults, useSharedFilePointer); -- if ((strcasecmp(test->api, "MPIIO") != 0) && test->useStridedDatatype) -- WARN_RESET("strided datatype only available in MPIIO", -- test, &defaults, useStridedDatatype); -- if ((strcasecmp(test->api, "MPIIO") == 0) && test->useStridedDatatype) -- WARN_RESET("strided datatype not implemented", -- test, &defaults, useStridedDatatype); -- if ((strcasecmp(test->api, "MPIIO") == 0) -- && test->useStridedDatatype && (test->blockSize < sizeof(IOR_size_t) -- || test->transferSize < -- sizeof(IOR_size_t))) -- ERR("need larger file size for strided datatype in MPIIO"); -- if ((strcasecmp(test->api, "POSIX") == 0) && test->showHints) -- WARN_RESET("hints not available in POSIX", -- test, &defaults, showHints); -- if ((strcasecmp(test->api, "POSIX") == 0) && test->collective) -- WARN_RESET("collective not available in POSIX", -- test, &defaults, collective); -- if ((strcasecmp(test->api, "MMAP") == 0) && test->fsyncPerWrite -- && (test->transferSize & (sysconf(_SC_PAGESIZE) - 1))) -- ERR("transfer size must be aligned with PAGESIZE for MMAP with fsyncPerWrite"); -- -- /* parameter consitency */ -+ /* parameter consistency */ - if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE) - ERR("Both Constant and Random task re-ordering specified. Choose one and resubmit"); - if (test->randomOffset && test->reorderTasksRandom -@@ -1674,55 +1519,21 @@ static void ValidateTests(IOR_param_t * test) - if (test->randomOffset && test->reorderTasks - && test->filePerProc == FALSE) - ERR("random offset and constant reorder tasks specified with single-shared-file. Choose one and resubmit"); -- if (test->randomOffset && test->checkRead) -- ERR("random offset not available with read check option (use write check)"); -- if (test->randomOffset && test->storeFileOffset) -- ERR("random offset not available with store file offset option)"); -- -- -- if ((strcasecmp(test->api, "MPIIO") == 0) && test->randomOffset -- && test->collective) -- ERR("random offset not available with collective MPIIO"); -- if ((strcasecmp(test->api, "MPIIO") == 0) && test->randomOffset -- && test->useFileView) -- ERR("random offset not available with MPIIO fileviews"); -+ if (test->randomOffset && test->checkRead && test->randomSeed == -1) -+ ERR("random offset with read check option requires to set the random seed"); - if ((strcasecmp(test->api, "HDF5") == 0) && test->randomOffset) - ERR("random offset not available with HDF5"); - if ((strcasecmp(test->api, "NCMPI") == 0) && test->randomOffset) - ERR("random offset not available with NCMPI"); -- if ((strcasecmp(test->api, "HDF5") != 0) && test->individualDataSets) -- WARN_RESET("individual datasets only available in HDF5", -- test, &defaults, individualDataSets); -- if ((strcasecmp(test->api, "HDF5") == 0) && test->individualDataSets) -- WARN_RESET("individual data sets not implemented", -- test, &defaults, individualDataSets); - if ((strcasecmp(test->api, "NCMPI") == 0) && test->filePerProc) - ERR("file-per-proc not available in current NCMPI"); -- if (test->noFill) { -- if (strcasecmp(test->api, "HDF5") != 0) { -- ERR("'no fill' option only available in HDF5"); -- } else { -- /* check if hdf5 available */ --#if defined (H5_VERS_MAJOR) && defined (H5_VERS_MINOR) -- /* no-fill option not available until hdf5-1.6.x */ --#if (H5_VERS_MAJOR > 0 && H5_VERS_MINOR > 5) -- ; --#else -- ERRF("'no fill' option not available in %s", -- test->apiVersion); --#endif --#else -- WARN("unable to determine HDF5 version for 'no fill' usage"); --#endif -- } -- } -- if (test->useExistingTestFile && test->lustre_set_striping) -- ERR("Lustre stripe options are incompatible with useExistingTestFile"); - -+ backend = test->backend; -+ ior_set_xfer_hints(test); - /* allow the backend to validate the options */ - if(test->backend->check_params){ -- int check = test->backend->check_params(test); -- if (check == 0){ -+ int check = test->backend->check_params(test->backend_options); -+ if (check){ - ERR("The backend returned that the test parameters are invalid."); - } - } -@@ -1730,158 +1541,112 @@ static void ValidateTests(IOR_param_t * test) - - /** - * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. -- * They are sequential and the last element is set to -1 as end marker. -- * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount -- * @param pretendRank int pretended Rank for shifting the offsest corectly -- * @return IOR_offset_t -- */ --IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) --{ -- IOR_offset_t i, j, k = 0; -- IOR_offset_t offsets; -- IOR_offset_t *offsetArray; -- -- /* count needed offsets */ -- offsets = (test->blockSize / test->transferSize) * test->segmentCount; -- -- /* setup empty array */ -- offsetArray = -- (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t)); -- if (offsetArray == NULL) -- ERR("malloc() failed"); -- offsetArray[offsets] = -1; /* set last offset with -1 */ -- -- /* fill with offsets */ -- for (i = 0; i < test->segmentCount; i++) { -- for (j = 0; j < (test->blockSize / test->transferSize); j++) { -- offsetArray[k] = j * test->transferSize; -- if (test->filePerProc) { -- offsetArray[k] += i * test->blockSize; -- } else { -- offsetArray[k] += -- (i * test->numTasks * test->blockSize) -- + (pretendRank * test->blockSize); -- } -- k++; -- } -- } -- -- return (offsetArray); --} -- --/** -- * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. -- * They get created sequentially and mixed up in the end. The last array element -- * is set to -1 as end marker. -- * It should be noted that as the seeds get synchronised across all processes -- * every process computes the same random order if used with filePerProc. -+ * They get created sequentially and mixed up in the end. -+ * It should be noted that as the seeds get synchronised across all processes if not FilePerProcess is set -+ * every process computes the same random order. - * For a shared file all transfers get randomly assigned to ranks. The processes - * can also have differen't numbers of transfers. This might lead to a bigger - * diversion in accesse as it dose with filePerProc. This is expected but - * should be mined. - * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount -- * @param pretendRank int pretended Rank for shifting the offsest corectly -+ * @param pretendRank int pretended Rank for shifting the offsets correctly - * @return IOR_offset_t -- * @return - */ --IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access) -+IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count) - { - int seed; -- IOR_offset_t i, value, tmp; -- IOR_offset_t offsets = 0; -+ IOR_offset_t i; -+ IOR_offset_t offsets; - IOR_offset_t offsetCnt = 0; -- IOR_offset_t fileSize; - IOR_offset_t *offsetArray; - -- /* set up seed for random() */ -- if (access == WRITE || access == READ) { -- test->randomSeed = seed = rand(); -- } else { -- seed = test->randomSeed; -- } -- srand(seed); -- -- fileSize = test->blockSize * test->segmentCount; -- if (test->filePerProc == FALSE) { -- fileSize *= test->numTasks; -+ if (test->filePerProc) { -+ /* set up seed, each process can determine which regions to access individually */ -+ if (test->randomSeed == -1) { -+ seed = time(NULL); -+ test->randomSeed = seed; -+ } else { -+ seed = test->randomSeed + pretendRank; -+ } -+ }else{ -+ /* Shared file requires that the seed is synchronized */ -+ if (test->randomSeed == -1) { -+ // all processes need to have the same seed. -+ if(rank == 0){ -+ seed = time(NULL); -+ } -+ MPI_CHECK(MPI_Bcast(& seed, 1, MPI_INT, 0, test->testComm), "cannot broadcast random seed value"); -+ test->randomSeed = seed; -+ }else{ -+ seed = test->randomSeed; -+ } - } -+ srandom(seed); - - /* count needed offsets (pass 1) */ -- for (i = 0; i < fileSize; i += test->transferSize) { -- if (test->filePerProc == FALSE) { -- // this counts which process get how many transferes in -- // a shared file -- if ((rand() % test->numTasks) == pretendRank) { -- offsets++; -- } -- } else { -- offsets++; -- } -+ if (test->filePerProc) { -+ offsets = test->blockSize / test->transferSize; -+ }else{ -+ offsets = 0; -+ for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) { -+ // this counts which process get how many transferes in the shared file -+ if ((rand() % test->numTasks) == pretendRank) { -+ offsets++; -+ } -+ } - } - - /* setup empty array */ -- offsetArray = -- (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t)); -- if (offsetArray == NULL) -- ERR("malloc() failed"); -- offsetArray[offsets] = -1; /* set last offset with -1 */ -+ offsetArray = (IOR_offset_t *) safeMalloc(offsets * sizeof(IOR_offset_t)); -+ -+ *out_count = offsets; - - if (test->filePerProc) { -- /* fill array */ -- for (i = 0; i < offsets; i++) { -- offsetArray[i] = i * test->transferSize; -- } -+ /* fill array */ -+ for (i = 0; i < offsets; i++) { -+ offsetArray[i] = i * test->transferSize; -+ } - } else { -- /* fill with offsets (pass 2) */ -- srand(seed); /* need same seed to get same transfers as counted in the beginning*/ -- for (i = 0; i < fileSize; i += test->transferSize) { -- if ((rand() % test->numTasks) == pretendRank) { -- offsetArray[offsetCnt] = i; -- offsetCnt++; -- } -+ /* fill with offsets (pass 2) */ -+ srandom(seed); /* need same seed to get same transfers as counted in the beginning*/ -+ for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) { -+ if ((rand() % test->numTasks) == pretendRank) { -+ offsetArray[offsetCnt] = i; -+ offsetCnt++; - } -+ } - } - /* reorder array */ - for (i = 0; i < offsets; i++) { -+ IOR_offset_t value, tmp; - value = rand() % offsets; - tmp = offsetArray[value]; - offsetArray[value] = offsetArray[i]; - offsetArray[i] = tmp; - } -- SeedRandGen(test->testComm); /* synchronize seeds across tasks */ - - return (offsetArray); - } - --static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offsetArray, int pretendRank, -- IOR_offset_t * transferCount, int * errors, IOR_param_t * test, int * fd, IOR_io_buffers* ioBuffers, int access){ -+static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, IOR_offset_t * transferCount, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){ - IOR_offset_t amtXferred = 0; -- IOR_offset_t transfer; - - void *buffer = ioBuffers->buffer; -- void *checkBuffer = ioBuffers->checkBuffer; -- void *readCheckBuffer = ioBuffers->readCheckBuffer; -- -- test->offset = offsetArray[pairCnt]; -- -- transfer = test->transferSize; - if (access == WRITE) { - /* fills each transfer with a unique pattern - * containing the offset into the file */ -- if (test->storeFileOffset == TRUE) { -- FillBuffer(buffer, test, test->offset, pretendRank); -- } -- amtXferred = -- backend->xfer(access, fd, buffer, transfer, test); -+ update_write_memory_pattern(offset, ioBuffers->buffer, transfer, test->setTimeStampSignature, pretendRank, test->dataPacketType); -+ amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); - if (amtXferred != transfer) - ERR("cannot write to file"); -+ if (test->fsyncPerWrite) -+ backend->fsync(fd, test->backend_options); - if (test->interIODelay > 0){ - struct timespec wait = {test->interIODelay / 1000 / 1000, 1000l * (test->interIODelay % 1000000)}; - nanosleep( & wait, NULL); - } - } else if (access == READ) { -- amtXferred = -- backend->xfer(access, fd, buffer, transfer, test); -+ amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); - if (amtXferred != transfer) - ERR("cannot read from file"); - if (test->interIODelay > 0){ -@@ -1889,79 +1654,134 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset - nanosleep( & wait, NULL); - } - } else if (access == WRITECHECK) { -- memset(checkBuffer, 'a', transfer); -- -- if (test->storeFileOffset == TRUE) { -- FillBuffer(readCheckBuffer, test, test->offset, pretendRank); -- } -- -- amtXferred = backend->xfer(access, fd, checkBuffer, transfer, test); -+ ((long long int*) buffer)[0] = ~((long long int*) buffer)[0]; // changes the buffer, no memset to reduce the memory pressure -+ amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); - if (amtXferred != transfer) - ERR("cannot read from file write check"); -- (*transferCount)++; -- *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer, -- *transferCount, test, -- WRITECHECK); -+ *errors += CompareData(buffer, transfer, *transferCount, test, offset, pretendRank, WRITECHECK); - } else if (access == READCHECK) { -- memset(checkBuffer, 'a', transfer); -- -- amtXferred = backend->xfer(access, fd, checkBuffer, transfer, test); -+ ((long long int*) buffer)[0] = ~((long long int*) buffer)[0]; // changes the buffer, no memset to reduce the memory pressure -+ amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); - if (amtXferred != transfer){ - ERR("cannot read from file"); - } -- if (test->storeFileOffset == TRUE) { -- FillBuffer(readCheckBuffer, test, test->offset, pretendRank); -- } -- *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer, *transferCount, test, READCHECK); -+ *errors += CompareData(buffer, transfer, *transferCount, test, offset, pretendRank, READCHECK); - } - return amtXferred; - } - -+static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pretendRank, aiori_fd_t *fd, IOR_io_buffers *ioBuffers, int startSegment, int endSegment){ -+ // prefill the whole file already with an invalid pattern -+ int offsets = test->blockSize / test->randomPrefillBlocksize; -+ void * oldBuffer = ioBuffers->buffer; -+ IOR_offset_t transferCount; -+ int errors; -+ ioBuffers->buffer = randomPrefillBuffer; -+ for (IOR_offset_t i = startSegment; i < endSegment; i++){ -+ for (int j = 0; j < offsets; j++) { -+ IOR_offset_t offset = j * test->randomPrefillBlocksize; -+ if (test->filePerProc) { -+ offset += i * test->blockSize; -+ } else { -+ offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); -+ } -+ WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & transferCount, & errors, test, fd, ioBuffers, WRITE); -+ } -+ } -+ ioBuffers->buffer = oldBuffer; -+} -+ - /* - * Write or Read data to file(s). This loops through the strides, writing - * out the data to each block in transfer sizes, until the remainder left is 0. - */ - static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, -- void *fd, const int access, IOR_io_buffers *ioBuffers) -+ aiori_fd_t *fd, const int access, IOR_io_buffers *ioBuffers) - { - int errors = 0; - IOR_offset_t transferCount = 0; - uint64_t pairCnt = 0; -- IOR_offset_t *offsetArray; - int pretendRank; - IOR_offset_t dataMoved = 0; /* for data rate calculation */ - double startForStonewall; - int hitStonewall; -+ IOR_offset_t i, j; - IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ? - &results->write : &results->read; - - /* initialize values */ - pretendRank = (rank + rankOffset) % test->numTasks; - -+ // offsetArray = GetOffsetArraySequential(test, pretendRank); -+ -+ IOR_offset_t offsets; -+ IOR_offset_t * offsets_rnd; - if (test->randomOffset) { -- offsetArray = GetOffsetArrayRandom(test, pretendRank, access); -- } else { -- offsetArray = GetOffsetArraySequential(test, pretendRank); -+ offsets_rnd = GetOffsetArrayRandom(test, pretendRank, & offsets); -+ }else{ -+ offsets = (test->blockSize / test->transferSize); - } - -+ void * randomPrefillBuffer = NULL; -+ if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){ -+ randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize, test->gpuMemoryFlags); -+ // store invalid data into the buffer -+ memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize); -+ } -+ -+ // start timer after random offset was generated - startForStonewall = GetTimeStamp(); - hitStonewall = 0; - -- /* loop over offsets to access */ -- while ((offsetArray[pairCnt] != -1) && !hitStonewall ) { -- dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); -- pairCnt++; -+ if(randomPrefillBuffer && test->deadlineForStonewalling == 0){ -+ double t_start = GetTimeStamp(); -+ prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, 0, test->segmentCount); -+ if(rank == 0 && verbose > VERBOSE_1){ -+ fprintf(out_logfile, "Random prefill took: %fs\n", GetTimeStamp() - t_start); -+ } -+ // must synchronize processes to ensure they are not running ahead -+ MPI_Barrier(test->testComm); -+ } - -- hitStonewall = ((test->deadlineForStonewalling != 0 -- && (GetTimeStamp() - startForStonewall) -- > test->deadlineForStonewalling)) || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; -+ for (i = 0; i < test->segmentCount && !hitStonewall; i++) { -+ if(randomPrefillBuffer && test->deadlineForStonewalling != 0){ -+ // prefill the whole segment with data, this needs to be done collectively -+ double t_start = GetTimeStamp(); -+ prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, i, i+1); -+ MPI_Barrier(test->testComm); -+ if(rank == 0 && verbose > VERBOSE_1){ -+ fprintf(out_logfile, "Random: synchronizing segment count with barrier and prefill took: %fs\n", GetTimeStamp() - t_start); -+ } -+ } -+ for (j = 0; j < offsets && !hitStonewall ; j++) { -+ IOR_offset_t offset; -+ if (test->randomOffset) { -+ if(test->filePerProc){ -+ offset = offsets_rnd[j] + (i * test->blockSize); -+ }else{ -+ offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize); -+ } -+ }else{ -+ offset = j * test->transferSize; -+ if (test->filePerProc) { -+ offset += i * test->blockSize; -+ } else { -+ offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); -+ } -+ } -+ dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); -+ pairCnt++; - -- if ( test->collective && test->deadlineForStonewalling ) { -- // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop -- // it absolutely must be an 'all or none': -- MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); -- } -+ hitStonewall = ((test->deadlineForStonewalling != 0 -+ && (GetTimeStamp() - startForStonewall) > test->deadlineForStonewalling)) -+ || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; - -+ if ( test->collective && test->deadlineForStonewalling ) { -+ // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop -+ // it absolutely must be an 'all or none': -+ MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, testComm), "hitStonewall broadcast failed"); -+ } -+ } - } - if (test->stoneWallingWearOut){ - if (verbose >= VERBOSE_1){ -@@ -1977,32 +1797,57 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, - 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); - MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed, - 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); -- MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed, -+ MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_total_data_accessed, - 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved"); - - if(rank == 0){ -+ point->stonewall_avg_data_accessed = point->stonewall_total_data_accessed / test->numTasks; - fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n", - pairs_accessed_min, point->pairs_accessed, -- point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time); -- point->stonewall_min_data_accessed *= test->numTasks; -+ point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 , point->stonewall_time); - } - if(pairCnt != point->pairs_accessed){ -- // some work needs still to be done ! -- for(; pairCnt < point->pairs_accessed; pairCnt++ ) { -- dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); -+ // some work needs still to be done, complete the current block ! -+ i--; -+ if(j == offsets){ -+ j = 0; // current block is completed -+ i++; -+ } -+ for ( ; pairCnt < point->pairs_accessed; i++) { -+ for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) { -+ IOR_offset_t offset; -+ if (test->randomOffset) { -+ if(test->filePerProc){ -+ offset = offsets_rnd[j] + (i * test->blockSize); -+ }else{ -+ offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize); -+ } -+ }else{ -+ offset = j * test->transferSize; -+ if (test->filePerProc) { -+ offset += i * test->blockSize; -+ } else { -+ offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); -+ } -+ } -+ dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); -+ pairCnt++; -+ } -+ j = 0; - } - } - }else{ - point->pairs_accessed = pairCnt; - } - -- - totalErrorCount += CountErrors(test, access, errors); - -- free(offsetArray); -- - if (access == WRITE && test->fsync == TRUE) { -- backend->fsync(fd, test); /*fsync after all accesses */ -+ backend->fsync(fd, test->backend_options); /*fsync after all accesses */ -+ } -+ if(randomPrefillBuffer){ -+ aligned_buffer_free(randomPrefillBuffer, test->gpuMemoryFlags); - } -+ - return (dataMoved); - } -diff --git a/src/ior.h b/src/ior.h -index 758b048..2effa9a 100755 ---- a/src/ior.h -+++ b/src/ior.h -@@ -36,20 +36,22 @@ - typedef void *rados_ioctx_t; - #endif - #include "option.h" -- - #include "iordef.h" --/******************** DATA Packet Type ***************************************/ --/* Holds the types of data packets: generic, offset, timestamp, incompressible */ -+#include "aiori.h" - --enum PACKET_TYPE --{ -- generic = 0, /* No packet type specified */ -- timestamp=1, /* Timestamp packet set with -l */ -- offset=2, /* Offset packet set with -l */ -- incompressible=3 /* Incompressible packet set with -l */ -+#include - --}; -+#ifndef MPI_FILE_NULL -+# include -+#endif /* not MPI_FILE_NULL */ - -+#define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1))) -+ -+typedef enum{ -+ IOR_MEMORY_TYPE_CPU = 0, -+ IOR_MEMORY_TYPE_GPU_MANAGED = 1, -+ IOR_MEMORY_TYPE_GPU_DEVICE_ONLY = 2, -+} ior_memory_flags; - - - /***************** IOR_BUFFERS *************************************************/ -@@ -78,24 +80,25 @@ typedef struct IO_BUFFERS - * USER_GUIDE - */ - --struct ior_aiori; -- - typedef struct - { - const struct ior_aiori * backend; - char * debug; /* debug info string */ -- unsigned int mode; /* file permissions */ -- unsigned int openFlags; /* open flags (see also ) */ - int referenceNumber; /* user supplied reference number */ - char * api; /* API for I/O */ - char * apiVersion; /* API version */ - char * platform; /* platform type */ - char * testFileName; /* full name for test */ -- char * testFileName_fppReadCheck;/* filename for fpp read check */ -- char * hintsFileName; /* full name for hints file */ - char * options; /* options string */ - // intermediate options -+ int collective; /* collective I/O */ -+ MPI_Comm testComm; /* Current MPI communicator */ -+ MPI_Comm mpi_comm_world; /* The global MPI communicator */ - int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ -+ int dualMount; /* dual mount points */ -+ ior_memory_flags gpuMemoryFlags; /* use the GPU to store the data */ -+ int gpuDirect; /* use gpuDirect, this influences gpuMemoryFlags as well */ -+ int gpuID; /* the GPU to use for gpuDirect or memory options */ - int numTasks; /* number of tasks for test */ - int numNodes; /* number of nodes for test */ - int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */ -@@ -118,24 +121,18 @@ typedef struct - int keepFile; /* don't delete the testfile on exit */ - int keepFileWithError; /* don't delete the testfile with errors */ - int errorFound; /* error found in data check */ -- int quitOnError; /* quit code when error in check */ -- int collective; /* collective I/O */ - IOR_offset_t segmentCount; /* number of segments (or HDF5 datasets) */ - IOR_offset_t blockSize; /* contiguous bytes to write per task */ - IOR_offset_t transferSize; /* size of transfer in bytes */ -- IOR_offset_t offset; /* offset for read/write */ - IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */ -- int preallocate; /* preallocate file size */ -- int useFileView; /* use MPI_File_set_view */ -- int useSharedFilePointer; /* use shared file pointer */ -- int useStridedDatatype; /* put strided access into datatype */ -- int showHints; /* show hints */ -+ IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */ -+ -+ char * saveRankDetailsCSV; /* save the details about the performance to a file */ - int summary_every_test; /* flag to print summary every test, not just at end */ - int uniqueDir; /* use unique directory for each fpp */ - int useExistingTestFile; /* do not delete test file before access */ -- int storeFileOffset; /* use file offset as stored signature */ - int deadlineForStonewalling; /* max time in seconds to run any test phase */ -- int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */ -+ int stoneWallingWearOut; /* wear out the stonewalling, once the timeout is over, each process has to write the same amount */ - uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */ - char * stoneWallingStatusFile; - -@@ -144,7 +141,6 @@ typedef struct - int verbose; /* verbosity */ - int setTimeStampSignature; /* set time stamp signature */ - unsigned int timeStampSignatureValue; /* value for time stamp signature */ -- void * fd_fppReadCheck; /* additional fd for fpp read check */ - int randomSeed; /* random seed for write/read check */ - unsigned int incompressibleSeed; /* random seed for incompressible file creation */ - int randomOffset; /* access is to random offsets */ -@@ -153,7 +149,7 @@ typedef struct - char * memoryPerNodeStr; /* for parsing */ - char * testscripts; /* for parsing */ - char * buffer_type; /* for parsing */ -- enum PACKET_TYPE dataPacketType; /* The type of data packet. */ -+ ior_dataPacketType_e dataPacketType; /* The type of data packet. */ - - void * backend_options; /* Backend-specific options */ - -@@ -162,52 +158,17 @@ typedef struct - int fsyncPerWrite; /* fsync() after each write */ - int fsync; /* fsync() after write */ - -- /* MPI variables */ -- MPI_Comm testComm; /* MPI communicator */ -- MPI_Datatype transferType; /* datatype for transfer */ -- MPI_Datatype fileType; /* filetype for file view */ -- -- /* HDF5 variables */ -- int individualDataSets; /* datasets not shared by all procs */ -- int noFill; /* no fill in file creation */ -- IOR_offset_t setAlignment; /* alignment in bytes */ -- -- /* HDFS variables */ -- char * hdfs_user; /* copied from ENV, for now */ -- const char* hdfs_name_node; -- tPort hdfs_name_node_port; /* (uint16_t) */ -- hdfsFS hdfs_fs; /* file-system handle */ -- int hdfs_replicas; /* n block replicas. (0 gets default) */ -- int hdfs_block_size; /* internal blk-size. (0 gets default) */ -- - char* URI; /* "path" to target object */ -- size_t part_number; /* multi-part upload increment (PER-RANK!) */ -- char* UploadId; /* key for multi-part-uploads */ - - /* RADOS variables */ - rados_t rados_cluster; /* RADOS cluster handle */ - rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */ - -- /* NCMPI variables */ -- int var_id; /* variable id handle for data set */ -- -- /* Lustre variables */ -- int lustre_stripe_count; -- int lustre_stripe_size; -- int lustre_start_ost; -- int lustre_set_striping; /* flag that we need to set lustre striping */ -- int lustre_ignore_locks; -- -- /* gpfs variables */ -- int gpfs_hint_access; /* use gpfs "access range" hint */ -- int gpfs_release_token; /* immediately release GPFS tokens after -- creating or opening a file */ -- /* beegfs variables */ -- int beegfs_numTargets; /* number storage targets to use */ -- int beegfs_chunkSize; /* srtipe pattern for new files */ -- - int id; /* test's unique ID */ - int intraTestBarriers; /* barriers between open/op and op/close */ -+ int warningAsErrors; /* treat any warning as an error */ -+ -+ aiori_xfer_hint_t hints; - } IOR_param_t; - - /* each pointer for a single test */ -@@ -216,8 +177,9 @@ typedef struct { - size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling - - double stonewall_time; -- long long stonewall_min_data_accessed; -- long long stonewall_avg_data_accessed; -+ long long stonewall_min_data_accessed; // of all processes -+ long long stonewall_avg_data_accessed; // across all processes -+ long long stonewall_total_data_accessed; // sum accross all processes - - IOR_offset_t aggFileSizeFromStat; - IOR_offset_t aggFileSizeFromXfer; -@@ -241,7 +203,7 @@ IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num); - void AllocResults(IOR_test_t *test); - - char * GetPlatformName(void); --void init_IOR_Param_t(IOR_param_t *p); -+void init_IOR_Param_t(IOR_param_t *p, MPI_Comm global_com); - - /* - * This function runs IOR given by command line, useful for testing -diff --git a/src/iordef.h b/src/iordef.h -index 78cf1d3..6b4e57d 100755 ---- a/src/iordef.h -+++ b/src/iordef.h -@@ -18,8 +18,13 @@ - #include - #include - #include --#include --#include -+ -+typedef enum { -+ DATA_TIMESTAMP, /* Will not include any offset, hence each buffer will be the same */ -+ DATA_OFFSET, -+ DATA_INCOMPRESSIBLE, /* Will include the offset as well */ -+ DATA_RANDOM /* fully scrambled blocks */ -+} ior_dataPacketType_e; - - #ifdef _WIN32 - # define _CRT_SECURE_NO_WARNINGS -@@ -52,13 +57,6 @@ - # include - #endif - --/************************** D E C L A R A T I O N S ***************************/ -- --extern int numTasks; /* MPI variables */ --extern int rank; --extern int rankOffset; --extern int verbose; /* verbose output */ -- - /*************************** D E F I N I T I O N S ****************************/ - - enum OutputFormat_t{ -@@ -115,117 +113,11 @@ enum OutputFormat_t{ - #define DELIMITERS " \t\r\n=" /* ReadScript() */ - #define FILENAME_DELIMITER '@' /* ParseFileName() */ - --/* MACROs for debugging */ --#define HERE fprintf(stdout, "** LINE %d (TASK=%d) **\n", \ -- __LINE__, rank); -- - typedef long long int IOR_offset_t; - typedef long long int IOR_size_t; - - #define IOR_format "%016llx" - -- --/******************************** M A C R O S *********************************/ -- --/******************************************************************************/ --/* -- * WARN_RESET will display a custom error message and set value to default -- */ --#define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER) do { \ -- (TO_STRUCT_PTR)->MEMBER = (FROM_STRUCT_PTR)->MEMBER; \ -- if (rank == 0) { \ -- fprintf(stdout, "ior WARNING: %s. Using value of %d.\n", \ -- MSG, (TO_STRUCT_PTR)->MEMBER); \ -- } \ -- fflush(stdout); \ --} while (0) -- -- --#define WARN(MSG) do { \ -- if (verbose > VERBOSE_2) { \ -- fprintf(stdout, "ior WARNING: %s, (%s:%d).\n", \ -- MSG, __FILE__, __LINE__); \ -- } else { \ -- fprintf(stdout, "ior WARNING: %s.\n", MSG); \ -- } \ -- fflush(stdout); \ --} while (0) -- -- --/* warning with format string and errno printed */ --#define EWARNF(FORMAT, ...) do { \ -- if (verbose > VERBOSE_2) { \ -- fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s (%s:%d).\n", \ -- __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ -- } else { \ -- fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s \n", \ -- __VA_ARGS__, errno, strerror(errno)); \ -- } \ -- fflush(stdout); \ --} while (0) -- -- --/* warning with errno printed */ --#define EWARN(MSG) do { \ -- EWARNF("%s", MSG); \ --} while (0) -- -- --/* display error message with format string and terminate execution */ --#define ERRF(FORMAT, ...) do { \ -- fprintf(stdout, "ior ERROR: " FORMAT ", errno %d, %s (%s:%d)\n", \ -- __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ -- fflush(stdout); \ -- MPI_Abort(MPI_COMM_WORLD, -1); \ --} while (0) -- -- --/* display error message and terminate execution */ --#define ERR(MSG) do { \ -- ERRF("%s", MSG); \ --} while (0) -- -- --/* display a simple error message (i.e. errno is not set) and terminate execution */ --#define ERR_SIMPLE(MSG) do { \ -- fprintf(stdout, "ior ERROR: %s, (%s:%d)\n", \ -- MSG, __FILE__, __LINE__); \ -- fflush(stdout); \ -- MPI_Abort(MPI_COMM_WORLD, -1); \ --} while (0) -- -- --/******************************************************************************/ --/* -- * MPI_CHECKF will display a custom format string as well as an error string -- * from the MPI_STATUS and then exit the program -- */ -- --#define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ -- char resultString[MPI_MAX_ERROR_STRING]; \ -- int resultLength; \ -- \ -- if (MPI_STATUS != MPI_SUCCESS) { \ -- MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ -- fprintf(stdout, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ -- __VA_ARGS__, resultString, __FILE__, __LINE__); \ -- fflush(stdout); \ -- MPI_Abort(MPI_COMM_WORLD, -1); \ -- } \ --} while(0) -- -- --/******************************************************************************/ --/* -- * MPI_CHECK will display a custom error message as well as an error string -- * from the MPI_STATUS and then exit the program -- */ -- --#define MPI_CHECK(MPI_STATUS, MSG) do { \ -- MPI_CHECKF(MPI_STATUS, "%s", MSG); \ --} while(0) -- -- - /******************************************************************************/ - /* - * System info for Windows. -diff --git a/src/md-workbench-main.c b/src/md-workbench-main.c -new file mode 100644 -index 0000000..bb94126 ---- /dev/null -+++ b/src/md-workbench-main.c -@@ -0,0 +1,13 @@ -+#include -+ -+#include "md-workbench.h" -+ -+int main(int argc, char ** argv){ -+ MPI_Init(& argc, & argv); -+ //phase_stat_t* results = -+ md_workbench_run(argc, argv, MPI_COMM_WORLD, stdout); -+ // API check, access the results of the first phase which is precrate. -+ //printf("Max op runtime: %f\n", results->max_op_time); -+ MPI_Finalize(); -+ return 0; -+} -diff --git a/src/md-workbench.c b/src/md-workbench.c -new file mode 100644 -index 0000000..48998a5 ---- /dev/null -+++ b/src/md-workbench.c -@@ -0,0 +1,1055 @@ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "md-workbench.h" -+#include "config.h" -+#include "aiori.h" -+#include "utilities.h" -+#include "parse_options.h" -+ -+/* -+This is the modified version md-workbench-fs that can utilize AIORI. -+It follows the hierarchical file system semantics in contrast to the md-workbench (without -fs) which has dataset and object semantics. -+ */ -+ -+#define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH -+ -+#define CHECK_MPI_RET(ret) if (ret != MPI_SUCCESS){ printf("Unexpected error in MPI on Line %d\n", __LINE__);} -+#define LLU (long long unsigned) -+#define min(a,b) (a < b ? a : b) -+ -+#define oprintf(...) do { fprintf(o.logfile, __VA_ARGS__); fflush(o.logfile); } while(0); -+ -+// successfull, errors -+typedef struct { -+ int suc; -+ int err; -+} op_stat_t; -+ -+// A runtime for an operation and when the operation was started -+typedef struct{ -+ float time_since_app_start; -+ float runtime; -+} time_result_t; -+ -+ -+// statistics for running a single phase -+typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! -+ double t; // maximum time -+ double * t_all; -+ -+ op_stat_t dset_create; -+ op_stat_t dset_delete; -+ -+ op_stat_t obj_create; -+ op_stat_t obj_read; -+ op_stat_t obj_stat; -+ op_stat_t obj_delete; -+ -+ // time measurements of individual runs, these are not returned for now by the API! -+ uint64_t repeats; -+ time_result_t * time_create; -+ time_result_t * time_read; -+ time_result_t * time_stat; -+ time_result_t * time_delete; -+ -+ time_statistics_t stats_create; -+ time_statistics_t stats_read; -+ time_statistics_t stats_stat; -+ time_statistics_t stats_delete; -+ -+ // the maximum time for any single operation -+ double max_op_time; -+ double phase_start_timer; -+ int stonewall_iterations; -+} phase_stat_t; -+ -+struct benchmark_options{ -+ ior_aiori_t const * backend; -+ void * backend_options; -+ aiori_xfer_hint_t hints; -+ MPI_Comm com; -+ FILE * logfile; -+ -+ char * interface; -+ int num; -+ int precreate; -+ int dset_count; -+ -+ mdworkbench_results_t * results; // the results -+ -+ ior_dataPacketType_e dataPacketType; -+ char * packetTypeStr; -+ int offset; -+ int iterations; -+ int global_iteration; -+ int file_size; -+ int read_only; -+ int stonewall_timer; -+ int stonewall_timer_wear_out; -+ int gpu_memory_flags; /* use the GPU to store the data */ -+ -+ char * latency_file_prefix; -+ int latency_keep_all; -+ -+ int phase_cleanup; -+ int phase_precreate; -+ int phase_benchmark; -+ -+ //int limit_memory; -+ //int limit_memory_between_phases; -+ -+ int verbosity; -+ int process_report; -+ -+ int print_detailed_stats; -+ int quiet_output; -+ -+ char * run_info_file; -+ char * prefix; // directory to work on -+ -+ int ignore_precreate_errors; -+ int rank; -+ int size; -+ int verify_read; -+ int random_seed; -+ -+ float relative_waiting_factor; -+ int adaptive_waiting_mode; -+ -+ uint64_t start_item_number; -+}; -+ -+struct benchmark_options o; -+ -+static void def_dset_name(char * out_name, int n, int d){ -+ sprintf(out_name, "%s/%d_%d", o.prefix, n, d); -+} -+ -+static void def_obj_name(char * out_name, int n, int d, int i){ -+ sprintf(out_name, "%s/%d_%d/file-%d", o.prefix, n, d, i); -+} -+ -+void init_options(){ -+ o = (struct benchmark_options){ -+ .interface = "POSIX", -+ .prefix = "./out", -+ .num = 1000, -+ .random_seed = -1, -+ .precreate = 3000, -+ .dset_count = 10, -+ .offset = 1, -+ .iterations = 3, -+ .file_size = 3901, -+ .packetTypeStr = "t", -+ .run_info_file = "md-workbench.status"}; -+} -+ -+static void mdw_wait(double runtime){ -+ double waittime = runtime * o.relative_waiting_factor; -+ //printf("waittime: %e\n", waittime); -+ if(waittime < 0.01){ -+ double start; -+ start = GetTimeStamp(); -+ double cur = GetTimeStamp(); -+ double end = cur + waittime; -+ while (cur < end){ -+ cur = GetTimeStamp(); -+ } -+ }else{ -+ struct timespec w; -+ w.tv_sec = (time_t) (waittime); -+ w.tv_nsec = (long) ((waittime - w.tv_sec) * 1000 * 1000 * 1000); -+ nanosleep(& w, NULL); -+ } -+} -+ -+static void init_stats(phase_stat_t * p, size_t repeats){ -+ memset(p, 0, sizeof(phase_stat_t)); -+ p->repeats = repeats; -+ size_t timer_size = repeats * sizeof(time_result_t); -+ p->time_create = (time_result_t *) malloc(timer_size); -+ p->time_read = (time_result_t *) malloc(timer_size); -+ p->time_stat = (time_result_t *) malloc(timer_size); -+ p->time_delete = (time_result_t *) malloc(timer_size); -+} -+ -+static float add_timed_result(double start, double phase_start_timer, time_result_t * results, size_t pos, double * max_time, double * out_op_time){ -+ float curtime = start - phase_start_timer; -+ double op_time = GetTimeStamp() - start; -+ results[pos].runtime = (float) op_time; -+ results[pos].time_since_app_start = curtime; -+ if (op_time > *max_time){ -+ *max_time = op_time; -+ } -+ *out_op_time = op_time; -+ return curtime; -+} -+ -+static void print_detailed_stat_header(){ -+ printf("phase\t\td name\tcreate\tdelete\tob nam\tcreate\tread\tstat\tdelete\tt_inc_b\tt_no_bar\tthp\tmax_t\n"); -+} -+ -+static int sum_err(phase_stat_t * p){ -+ return p->dset_create.err + p->dset_delete.err + p->obj_create.err + p->obj_read.err + p->obj_stat.err + p->obj_delete.err; -+} -+ -+static double statistics_mean(int count, double * arr){ -+ double sum = 0; -+ for(int i=0; i < o.size; i++){ -+ sum += arr[i]; -+ } -+ return sum / o.size; -+} -+ -+static double statistics_std_dev(int count, double * arr){ -+ double mean = statistics_mean(count, arr); -+ double sum = 0; -+ for(int i=0; i < o.size; i++){ -+ sum += (mean - arr[i])*(mean - arr[i]); -+ } -+ return sqrt(sum / (o.size-1)); -+} -+ -+static void statistics_minmax(int count, double * arr, double * out_min, double * out_max){ -+ double min = 1e308; -+ double max = 0; -+ for(int i=0; i < o.size; i++){ -+ min = (arr[i] < min) ? arr[i] : min; -+ max = (arr[i] > max) ? arr[i] : max; -+ } -+ *out_min = min; -+ *out_max = max; -+} -+ -+static void print_p_stat(char * buff, const char * name, phase_stat_t * p, double t, int print_global){ -+ const double tp = (double)(p->obj_create.suc + p->obj_read.suc) * o.file_size / t / 1024 / 1024; -+ -+ const int errs = sum_err(p); -+ double r_min = 0; -+ double r_max = 0; -+ double r_mean = 0; -+ double r_std = 0; -+ -+ if(p->t_all){ -+ // we can compute several derived values that provide insight about quality of service, latency distribution and load balancing -+ statistics_minmax(o.size, p->t_all, & r_min, & r_max); -+ r_mean = statistics_mean(o.size, p->t_all); -+ r_std = statistics_std_dev(o.size, p->t_all); -+ } -+ -+ if (o.print_detailed_stats){ -+ sprintf(buff, "%s \t%d\t%d\t%d\t%d\t%d\t%d\t%.3fs\t%.3fs\t%.2f MiB/s %.4e", name, p->dset_create.suc, p->dset_delete.suc, p->obj_create.suc, p->obj_read.suc, p->obj_stat.suc, p->obj_delete.suc, p->t, t, tp, p->max_op_time); -+ -+ if (errs > 0){ -+ sprintf(buff, "%s err\t%d\t%d\t%d\t%d\t%d\t%d", name, p->dset_create.err, p->dset_delete.err, p->obj_create.err, p->obj_read.err, p->obj_stat.err, p->obj_delete.err); -+ } -+ }else{ -+ int pos = 0; -+ // single line -+ pos += sprintf(buff, "%s process max:%.2fs ", name, t); -+ if(print_global){ -+ pos += sprintf(buff + pos, "min:%.2fs mean: %.2fs balance:%.1f stddev:%.1f ", r_min, r_mean, r_min/r_max * 100.0, r_std); -+ } -+ int ioops_per_iter = 4; -+ if(o.read_only){ -+ ioops_per_iter = 2; -+ } -+ -+ double rate; -+ -+ switch(name[0]){ -+ case('b'): -+ rate = p->obj_read.suc * ioops_per_iter / t; -+ pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es", -+ rate, // write, stat, read, delete -+ p->obj_read.suc, -+ p->obj_read.suc / t, -+ tp, -+ p->max_op_time); -+ -+ if(o.relative_waiting_factor > 1e-9){ -+ pos += sprintf(buff + pos, " waiting_factor:%.2f", o.relative_waiting_factor); -+ } -+ break; -+ case('p'): -+ rate = (p->dset_create.suc + p->obj_create.suc) / t; -+ pos += sprintf(buff + pos, "rate:%.1f iops/s dsets: %d objects:%d rate:%.3f dset/s rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es", -+ rate, -+ p->dset_create.suc, -+ p->obj_create.suc, -+ p->dset_create.suc / t, -+ p->obj_create.suc / t, -+ tp, -+ p->max_op_time); -+ break; -+ case('c'): -+ rate = (p->obj_delete.suc + p->dset_delete.suc) / t; -+ pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d dsets: %d rate:%.1f obj/s rate:%.3f dset/s op-max:%.4es", -+ rate, -+ p->obj_delete.suc, -+ p->dset_delete.suc, -+ p->obj_delete.suc / t, -+ p->dset_delete.suc / t, -+ p->max_op_time); -+ break; -+ default: -+ pos = sprintf(buff, "%s: unknown phase", name); -+ break; -+ } -+ -+ if(print_global){ -+ mdworkbench_result_t * res = & o.results->result[o.results->count]; -+ res->errors = errs; -+ o.results->errors += errs; -+ res->rate = rate; -+ res->max_op_time = p->max_op_time; -+ res->runtime = t; -+ res->iterations_done = p->repeats; -+ } -+ -+ if(! o.quiet_output || errs > 0){ -+ pos += sprintf(buff + pos, " (%d errs", errs); -+ if(errs > 0){ -+ pos += sprintf(buff + pos, "!!!)" ); -+ }else{ -+ pos += sprintf(buff + pos, ")" ); -+ } -+ } -+ if(! o.quiet_output && p->stonewall_iterations){ -+ pos += sprintf(buff + pos, " stonewall-iter:%d", p->stonewall_iterations); -+ } -+ -+ if(p->stats_read.max > 1e-9){ -+ time_statistics_t stat = p->stats_read; -+ pos += sprintf(buff + pos, " read(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); -+ } -+ if(p->stats_stat.max > 1e-9){ -+ time_statistics_t stat = p->stats_stat; -+ pos += sprintf(buff + pos, " stat(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); -+ } -+ if(p->stats_create.max > 1e-9){ -+ time_statistics_t stat = p->stats_create; -+ pos += sprintf(buff + pos, " create(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); -+ } -+ if(p->stats_delete.max > 1e-9){ -+ time_statistics_t stat = p->stats_delete; -+ pos += sprintf(buff + pos, " delete(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); -+ } -+ } -+} -+ -+static int compare_floats(time_result_t * x, time_result_t * y){ -+ return x->runtime < y->runtime ? -1 : (x->runtime > y->runtime ? +1 : 0); -+} -+ -+static double runtime_quantile(int repeats, time_result_t * times, float quantile){ -+ int pos = round(quantile * (repeats - 1) + 0.49); -+ assert(pos < repeats); -+ return times[pos].runtime; -+} -+ -+static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t * times, time_result_t * global_times){ -+ uint64_t count = 0; -+ int ret; -+ // due to stonewall, the number of repeats may be different per process -+ if(o.rank == 0){ -+ MPI_Status status; -+ memcpy(global_times, times, repeats * 2 * sizeof(float)); -+ count += repeats; -+ for(int i=1; i < o.size; i++){ -+ int cnt; -+ ret = MPI_Recv(& global_times[count], max_repeats*2, MPI_FLOAT, i, 888, o.com, & status); -+ CHECK_MPI_RET(ret) -+ MPI_Get_count(& status, MPI_FLOAT, & cnt); -+ count += cnt / 2; -+ } -+ }else{ -+ ret = MPI_Send(times, repeats * 2, MPI_FLOAT, 0, 888, o.com); -+ CHECK_MPI_RET(ret) -+ } -+ -+ return count; -+} -+ -+static void compute_histogram(const char * name, time_result_t * times, time_statistics_t * stats, size_t repeats, int writeLatencyFile){ -+ if(writeLatencyFile && o.latency_file_prefix ){ -+ char file[MAX_PATHLEN]; -+ sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name); -+ FILE * f = fopen(file, "w+"); -+ if(f == NULL){ -+ ERRF("%d: Error writing to latency file: %s", o.rank, file); -+ return; -+ } -+ fprintf(f, "time,runtime\n"); -+ for(size_t i = 0; i < repeats; i++){ -+ fprintf(f, "%.7f,%.4e\n", times[i].time_since_app_start, times[i].runtime); -+ } -+ fclose(f); -+ } -+ // now sort the times and pick the quantiles -+ qsort(times, repeats, sizeof(time_result_t), (int (*)(const void *, const void *)) compare_floats); -+ stats->min = times[0].runtime; -+ stats->q1 = runtime_quantile(repeats, times, 0.25); -+ if(repeats % 2 == 0){ -+ stats->median = (times[repeats/2].runtime + times[repeats/2 - 1].runtime)/2.0; -+ }else{ -+ stats->median = times[repeats/2].runtime; -+ } -+ stats->q3 = runtime_quantile(repeats, times, 0.75); -+ stats->q90 = runtime_quantile(repeats, times, 0.90); -+ stats->q99 = runtime_quantile(repeats, times, 0.99); -+ stats->max = times[repeats - 1].runtime; -+} -+ -+static void end_phase(const char * name, phase_stat_t * p){ -+ int ret; -+ char buff[MAX_PATHLEN]; -+ -+ //char * limit_memory_P = NULL; -+ MPI_Barrier(o.com); -+ -+ int max_repeats = o.precreate * o.dset_count; -+ if(strcmp(name,"benchmark") == 0){ -+ max_repeats = o.num * o.dset_count; -+ } -+ -+ // prepare the summarized report -+ phase_stat_t g_stat; -+ init_stats(& g_stat, (o.rank == 0 ? 1 : 0) * ((size_t) max_repeats) * o.size); -+ // reduce timers -+ ret = MPI_Reduce(& p->t, & g_stat.t, 2, MPI_DOUBLE, MPI_MAX, 0, o.com); -+ CHECK_MPI_RET(ret) -+ if(o.rank == 0) { -+ g_stat.t_all = (double*) malloc(sizeof(double) * o.size); -+ } -+ ret = MPI_Gather(& p->t, 1, MPI_DOUBLE, g_stat.t_all, 1, MPI_DOUBLE, 0, o.com); -+ CHECK_MPI_RET(ret) -+ ret = MPI_Reduce(& p->dset_create, & g_stat.dset_create, 2*(2+4), MPI_INT, MPI_SUM, 0, o.com); -+ CHECK_MPI_RET(ret) -+ ret = MPI_Reduce(& p->max_op_time, & g_stat.max_op_time, 1, MPI_DOUBLE, MPI_MAX, 0, o.com); -+ CHECK_MPI_RET(ret) -+ if( p->stonewall_iterations ){ -+ ret = MPI_Reduce(& p->repeats, & g_stat.repeats, 1, MPI_UINT64_T, MPI_MIN, 0, o.com); -+ CHECK_MPI_RET(ret) -+ g_stat.stonewall_iterations = p->stonewall_iterations; -+ } -+ int write_rank0_latency_file = (o.rank == 0) && ! o.latency_keep_all; -+ -+ if(strcmp(name,"precreate") == 0){ -+ uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_create, g_stat.time_create); -+ if(o.rank == 0){ -+ compute_histogram("precreate-all", g_stat.time_create, & g_stat.stats_create, repeats, o.latency_keep_all); -+ } -+ compute_histogram("precreate", p->time_create, & p->stats_create, p->repeats, write_rank0_latency_file); -+ }else if(strcmp(name,"cleanup") == 0){ -+ uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_delete, g_stat.time_delete); -+ if(o.rank == 0) { -+ compute_histogram("cleanup-all", g_stat.time_delete, & g_stat.stats_delete, repeats, o.latency_keep_all); -+ } -+ compute_histogram("cleanup", p->time_delete, & p->stats_delete, p->repeats, write_rank0_latency_file); -+ }else if(strcmp(name,"benchmark") == 0){ -+ uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_read, g_stat.time_read); -+ if(o.rank == 0) { -+ compute_histogram("read-all", g_stat.time_read, & g_stat.stats_read, repeats, o.latency_keep_all); -+ } -+ compute_histogram("read", p->time_read, & p->stats_read, p->repeats, write_rank0_latency_file); -+ -+ repeats = aggregate_timers(p->repeats, max_repeats, p->time_stat, g_stat.time_stat); -+ if(o.rank == 0) { -+ compute_histogram("stat-all", g_stat.time_stat, & g_stat.stats_stat, repeats, o.latency_keep_all); -+ } -+ compute_histogram("stat", p->time_stat, & p->stats_stat, p->repeats, write_rank0_latency_file); -+ -+ if(! o.read_only){ -+ repeats = aggregate_timers(p->repeats, max_repeats, p->time_create, g_stat.time_create); -+ if(o.rank == 0) { -+ compute_histogram("create-all", g_stat.time_create, & g_stat.stats_create, repeats, o.latency_keep_all); -+ } -+ compute_histogram("create", p->time_create, & p->stats_create, p->repeats, write_rank0_latency_file); -+ -+ repeats = aggregate_timers(p->repeats, max_repeats, p->time_delete, g_stat.time_delete); -+ if(o.rank == 0) { -+ compute_histogram("delete-all", g_stat.time_delete, & g_stat.stats_delete, repeats, o.latency_keep_all); -+ } -+ compute_histogram("delete", p->time_delete, & p->stats_delete, p->repeats, write_rank0_latency_file); -+ } -+ } -+ -+ if (o.rank == 0){ -+ //print the stats: -+ print_p_stat(buff, name, & g_stat, g_stat.t, 1); -+ oprintf("%s\n", buff); -+ } -+ -+ if(o.process_report){ -+ if(o.rank == 0){ -+ print_p_stat(buff, name, p, p->t, 0); -+ oprintf("0: %s\n", buff); -+ for(int i=1; i < o.size; i++){ -+ MPI_Recv(buff, MAX_PATHLEN, MPI_CHAR, i, 4711, o.com, MPI_STATUS_IGNORE); -+ oprintf("%d: %s\n", i, buff); -+ } -+ }else{ -+ print_p_stat(buff, name, p, p->t, 0); -+ MPI_Send(buff, MAX_PATHLEN, MPI_CHAR, 0, 4711, o.com); -+ } -+ } -+ -+ if(g_stat.t_all){ -+ free(g_stat.t_all); -+ } -+ if(p->time_create){ -+ free(p->time_create); -+ free(p->time_read); -+ free(p->time_stat); -+ free(p->time_delete); -+ } -+ if(g_stat.time_create){ -+ free(g_stat.time_create); -+ free(g_stat.time_read); -+ free(g_stat.time_stat); -+ free(g_stat.time_delete); -+ } -+ -+ // copy the result back for the API -+ mdworkbench_result_t * res = & o.results->result[o.results->count]; -+ memcpy(& res->stats_create, & g_stat.stats_create, sizeof(time_statistics_t)); -+ memcpy(& res->stats_read, & g_stat.stats_read, sizeof(time_statistics_t)); -+ memcpy(& res->stats_stat, & g_stat.stats_stat, sizeof(time_statistics_t)); -+ memcpy(& res->stats_delete, & g_stat.stats_delete, sizeof(time_statistics_t)); -+ -+ o.results->count++; -+ -+ // allocate memory if necessary -+ // ret = mem_preallocate(& limit_memory_P, o.limit_memory_between_phases, o.verbosity >= 3); -+ // if( ret != 0){ -+ // printf("%d: Error allocating memory!\n", o.rank); -+ // } -+ // mem_free_preallocated(& limit_memory_P); -+} -+ -+void run_precreate(phase_stat_t * s, int current_index){ -+ char dset[MAX_PATHLEN]; -+ char obj_name[MAX_PATHLEN]; -+ int ret; -+ -+ for(int i=0; i < o.dset_count; i++){ -+ def_dset_name(dset, o.rank, i); -+ -+ ret = o.backend->mkdir(dset, DIRMODE, o.backend_options); -+ if (ret == 0){ -+ s->dset_create.suc++; -+ }else{ -+ s->dset_create.err++; -+ if (! o.ignore_precreate_errors){ -+ ERRF("%d: Error while creating the dset: %s", o.rank, dset); -+ } -+ } -+ } -+ -+ char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags); -+ generate_memory_pattern(buf, o.file_size, o.random_seed, o.rank, o.dataPacketType); -+ double op_timer; // timer for individual operations -+ size_t pos = -1; // position inside the individual measurement array -+ double op_time; -+ -+ // create the obj -+ for(int f=current_index; f < o.precreate; f++){ -+ for(int d=0; d < o.dset_count; d++){ -+ pos++; -+ def_obj_name(obj_name, o.rank, d, f); -+ -+ op_timer = GetTimeStamp(); -+ aiori_fd_t * aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); -+ if (NULL == aiori_fh){ -+ FAIL("Unable to open file %s", obj_name); -+ } -+ update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_seed, o.rank, o.dataPacketType); -+ if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { -+ s->obj_create.suc++; -+ }else{ -+ s->obj_create.err++; -+ if (! o.ignore_precreate_errors){ -+ ERRF("%d: Error while creating the obj: %s", o.rank, obj_name); -+ } -+ } -+ o.backend->close(aiori_fh, o.backend_options); -+ -+ add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); -+ -+ if (o.verbosity >= 2){ -+ oprintf("%d: write %s:%s (%d) pretend: %d\n", o.rank, dset, obj_name, ret, o.rank); -+ } -+ } -+ } -+ aligned_buffer_free(buf, o.gpu_memory_flags); -+} -+ -+/* FIFO: create a new file, write to it. Then read from the first created file, delete it... */ -+void run_benchmark(phase_stat_t * s, int * current_index_p){ -+ char obj_name[MAX_PATHLEN]; -+ int ret; -+ char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags); -+ memset(buf, o.rank % 256, o.file_size); -+ double op_timer; // timer for individual operations -+ size_t pos = -1; // position inside the individual measurement array -+ int start_index = *current_index_p; -+ int total_num = o.num; -+ int armed_stone_wall = (o.stonewall_timer > 0); -+ int f; -+ double phase_allreduce_time = 0; -+ aiori_fd_t * aiori_fh; -+ -+ for(f=0; f < total_num; f++){ -+ float bench_runtime = 0; // the time since start -+ for(int d=0; d < o.dset_count; d++){ -+ double op_time; -+ struct stat stat_buf; -+ const int prevFile = f + start_index; -+ pos++; -+ -+ int readRank = (o.rank - o.offset * (d+1)) % o.size; -+ readRank = readRank < 0 ? readRank + o.size : readRank; -+ def_obj_name(obj_name, readRank, d, prevFile); -+ -+ op_timer = GetTimeStamp(); -+ -+ ret = o.backend->stat(obj_name, & stat_buf, o.backend_options); -+ // TODO potentially check return value must be identical to o.file_size -+ -+ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_stat, pos, & s->max_op_time, & op_time); -+ if(o.relative_waiting_factor > 1e-9) { -+ mdw_wait(op_time); -+ } -+ -+ if (o.verbosity >= 2){ -+ oprintf("%d: stat %s (%d)\n", o.rank, obj_name, ret); -+ } -+ -+ if(ret != 0){ -+ if (o.verbosity) -+ ERRF("%d: Error while stating the obj: %s", o.rank, obj_name); -+ s->obj_stat.err++; -+ continue; -+ } -+ s->obj_stat.suc++; -+ -+ if (o.verbosity >= 2){ -+ oprintf("%d: read %s pretend: %d\n", o.rank, obj_name, readRank); -+ } -+ -+ op_timer = GetTimeStamp(); -+ aiori_fh = o.backend->open(obj_name, IOR_RDONLY, o.backend_options); -+ if (NULL == aiori_fh){ -+ FAIL("Unable to open file %s", obj_name); -+ } -+ if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) { -+ if(o.verify_read){ -+ if(verify_memory_pattern(prevFile * o.dset_count + d, buf, o.file_size, o.random_seed, readRank, o.dataPacketType) == 0){ -+ s->obj_read.suc++; -+ }else{ -+ s->obj_read.err++; -+ } -+ }else{ -+ s->obj_read.suc++; -+ } -+ }else{ -+ s->obj_read.err++; -+ WARNF("%d: Error while reading the obj: %s", o.rank, obj_name); -+ } -+ o.backend->close(aiori_fh, o.backend_options); -+ -+ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_read, pos, & s->max_op_time, & op_time); -+ if(o.relative_waiting_factor > 1e-9) { -+ mdw_wait(op_time); -+ } -+ if(o.read_only){ -+ continue; -+ } -+ -+ op_timer = GetTimeStamp(); -+ o.backend->delete(obj_name, o.backend_options); -+ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); -+ if(o.relative_waiting_factor > 1e-9) { -+ mdw_wait(op_time); -+ } -+ -+ if (o.verbosity >= 2){ -+ oprintf("%d: delete %s\n", o.rank, obj_name); -+ } -+ s->obj_delete.suc++; -+ -+ int writeRank = (o.rank + o.offset * (d+1)) % o.size; -+ const int newFileIndex = o.precreate + prevFile; -+ def_obj_name(obj_name, writeRank, d, newFileIndex); -+ -+ op_timer = GetTimeStamp(); -+ aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); -+ if (NULL != aiori_fh){ -+ generate_memory_pattern(buf, o.file_size, o.random_seed, writeRank, o.dataPacketType); -+ update_write_memory_pattern(newFileIndex * o.dset_count + d, buf, o.file_size, o.random_seed, writeRank, o.dataPacketType); -+ -+ if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { -+ s->obj_create.suc++; -+ }else{ -+ s->obj_create.err++; -+ if (! o.ignore_precreate_errors){ -+ ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name); -+ } -+ } -+ o.backend->close(aiori_fh, o.backend_options); -+ }else{ -+ if (! o.ignore_precreate_errors){ -+ ERRF("%d: Error while creating the obj: %s", o.rank, obj_name); -+ } -+ WARNF("Unable to open file %s", obj_name); -+ s->obj_create.err++; -+ } -+ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); -+ if(o.relative_waiting_factor > 1e-9) { -+ mdw_wait(op_time); -+ } -+ -+ if (o.verbosity >= 2){ -+ oprintf("%d: write %s (%d) pretend: %d\n", o.rank, obj_name, ret, writeRank); -+ } -+ } // end loop -+ -+ if(armed_stone_wall && bench_runtime >= o.stonewall_timer){ -+ if(o.verbosity){ -+ oprintf("%d: stonewall runtime %fs (%ds)\n", o.rank, bench_runtime, o.stonewall_timer); -+ } -+ if(! o.stonewall_timer_wear_out){ -+ s->stonewall_iterations = f; -+ break; -+ } -+ armed_stone_wall = 0; -+ // wear out mode, now reduce the maximum -+ int cur_pos = f + 1; -+ phase_allreduce_time = GetTimeStamp() - s->phase_start_timer; -+ int ret = MPI_Allreduce(& cur_pos, & total_num, 1, MPI_INT, MPI_MAX, o.com); -+ CHECK_MPI_RET(ret) -+ s->phase_start_timer = GetTimeStamp(); -+ s->stonewall_iterations = total_num; -+ if(o.rank == 0){ -+ oprintf("stonewall wear out %fs (%d iter)\n", bench_runtime, total_num); -+ } -+ if(f == total_num){ -+ break; -+ } -+ } -+ } -+ s->t = GetTimeStamp() - s->phase_start_timer + phase_allreduce_time; -+ if(armed_stone_wall && o.stonewall_timer_wear_out){ -+ int f = total_num; -+ int ret = MPI_Allreduce(& f, & total_num, 1, MPI_INT, MPI_MAX, o.com); -+ CHECK_MPI_RET(ret) -+ s->stonewall_iterations = total_num; -+ } -+ if(o.stonewall_timer && ! o.stonewall_timer_wear_out){ -+ // TODO FIXME -+ int sh = s->stonewall_iterations; -+ int ret = MPI_Allreduce(& sh, & s->stonewall_iterations, 1, MPI_INT, MPI_MAX, o.com); -+ CHECK_MPI_RET(ret) -+ } -+ -+ if(! o.read_only) { -+ *current_index_p += f; -+ } -+ s->repeats = pos + 1; -+ aligned_buffer_free(buf, o.gpu_memory_flags); -+} -+ -+void run_cleanup(phase_stat_t * s, int start_index){ -+ char dset[MAX_PATHLEN]; -+ char obj_name[MAX_PATHLEN]; -+ double op_timer; // timer for individual operations -+ size_t pos = -1; // position inside the individual measurement array -+ -+ for(int d=0; d < o.dset_count; d++){ -+ for(int f=0; f < o.precreate; f++){ -+ double op_time; -+ pos++; -+ def_obj_name(obj_name, o.rank, d, f + start_index); -+ -+ op_timer = GetTimeStamp(); -+ o.backend->delete(obj_name, o.backend_options); -+ add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); -+ -+ if (o.verbosity >= 2){ -+ oprintf("%d: delete %s\n", o.rank, obj_name); -+ } -+ s->obj_delete.suc++; -+ } -+ -+ def_dset_name(dset, o.rank, d); -+ if (o.backend->rmdir(dset, o.backend_options) == 0) { -+ s->dset_delete.suc++; -+ }else{ -+ oprintf("Unable to remove directory %s\n", dset); -+ } -+ if (o.verbosity >= 2){ -+ oprintf("%d: delete dset %s\n", o.rank, dset); -+ } -+ } -+} -+ -+ -+static option_help options [] = { -+ {'O', "offset", "Offset in o.ranks between writers and readers. Writers and readers should be located on different nodes.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.offset}, -+ {'a', "api", "The API (plugin) to use for the benchmark, use list to show all compiled plugins.", OPTION_OPTIONAL_ARGUMENT, 's', & o.interface}, -+ {'I', "obj-per-proc", "Number of I/O operations per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.num}, -+ {'L', "latency", "Measure the latency for individual operations, prefix the result files with the provided filename.", OPTION_OPTIONAL_ARGUMENT, 's', & o.latency_file_prefix}, -+ {0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all}, -+ {'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate}, -+ {'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count}, -+ {'G', NULL, "Timestamp/Random seed for access pattern, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed}, -+ {'o', NULL, "Output directory", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, -+ {'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output}, -+ //{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory}, -+ // {'M', "lim-free-mem-phase", "Allocate memory until this limit (in MiB) is reached between the phases, but free it before starting the next phase; the time is NOT included for the phase.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory_between_phases}, -+ {'S', "object-size", "Size for the created objects.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.file_size}, -+ {'R', "iterations", "Number of times to rerun the main phase", OPTION_OPTIONAL_ARGUMENT, 'd', & o.iterations}, -+ {'t', "waiting-time", "Waiting time relative to runtime (1.0 is 100%%)", OPTION_OPTIONAL_ARGUMENT, 'f', & o.relative_waiting_factor}, -+ {'T', "adaptive-waiting", "Compute an adaptive waiting time", OPTION_FLAG, 'd', & o.adaptive_waiting_mode}, -+ {'1', "run-precreate", "Run precreate phase", OPTION_FLAG, 'd', & o.phase_precreate}, -+ {'2', "run-benchmark", "Run benchmark phase", OPTION_FLAG, 'd', & o.phase_benchmark}, -+ {'3', "run-cleanup", "Run cleanup phase (only run explicit phases)", OPTION_FLAG, 'd', & o.phase_cleanup}, -+ {'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer}, -+ {'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out}, -+ {'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read}, -+ {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|random|o|i|t|r]", OPTION_OPTIONAL_ARGUMENT, 's', & o.packetTypeStr}, -+ {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, -+ {0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number}, -+ {0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats}, -+ {0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only}, -+ {0, "ignore-precreate-errors", "Ignore errors occuring during the pre-creation phase", OPTION_FLAG, 'd', & o.ignore_precreate_errors}, -+ {0, "process-reports", "Independent report per process/rank", OPTION_FLAG, 'd', & o.process_report}, -+ {'v', "verbose", "Increase the verbosity level", OPTION_FLAG, 'd', & o.verbosity}, -+ {0, "run-info-file", "The log file for resuming a previous run", OPTION_OPTIONAL_ARGUMENT, 's', & o.run_info_file}, -+ LAST_OPTION -+ }; -+ -+static void printTime(){ -+ char buff[100]; -+ time_t now = time(0); -+ strftime (buff, 100, "%Y-%m-%d %H:%M:%S", localtime (&now)); -+ oprintf("%s\n", buff); -+} -+ -+static int return_position(){ -+ int position, ret; -+ if( o.rank == 0){ -+ FILE * f = fopen(o.run_info_file, "r"); -+ if(! f){ -+ ERRF("[ERROR] Could not open %s for restart", o.run_info_file); -+ exit(EXIT_FAILURE); -+ } -+ ret = fscanf(f, "pos: %d", & position); -+ if (ret != 1){ -+ ERRF("Could not read from %s for restart", o.run_info_file); -+ exit(EXIT_FAILURE); -+ } -+ fclose(f); -+ } -+ ret = MPI_Bcast( & position, 1, MPI_INT, 0, o.com ); -+ return position; -+} -+ -+static void store_position(int position){ -+ if (o.rank != 0){ -+ return; -+ } -+ FILE * f = fopen(o.run_info_file, "w"); -+ if(! f){ -+ ERRF("[ERROR] Could not open %s for saving data", o.run_info_file); -+ exit(EXIT_FAILURE); -+ } -+ fprintf(f, "pos: %d\n", position); -+ fclose(f); -+} -+ -+mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){ -+ int ret; -+ int printhelp = 0; -+ char * limit_memory_P = NULL; -+ init_options(); -+ init_clock(world_com); -+ -+ o.com = world_com; -+ o.logfile = out_logfile; -+ -+ MPI_Comm_rank(o.com, & o.rank); -+ MPI_Comm_size(o.com, & o.size); -+ -+ if (o.rank == 0 && ! o.quiet_output){ -+ oprintf("Args: %s", argv[0]); -+ for(int i=1; i < argc; i++){ -+ oprintf(" \"%s\"", argv[i]); -+ } -+ oprintf("\n"); -+ } -+ -+ memset(& o.hints, 0, sizeof(o.hints)); -+ o.hints.filePerProc = 1; -+ -+ options_all_t * global_options = airoi_create_all_module_options(options); -+ int parsed = option_parse(argc, argv, global_options); -+ o.backend = aiori_select(o.interface); -+ if (o.backend == NULL){ -+ ERR("Unrecognized I/O API"); -+ } -+ if (! o.backend->enable_mdtest){ -+ ERR("Backend doesn't support MDWorbench"); -+ } -+ o.backend_options = airoi_update_module_options(o.backend, global_options); -+ -+ o.dataPacketType = parsePacketType(o.packetTypeStr[0]); -+ -+ if (!(o.phase_cleanup || o.phase_precreate || o.phase_benchmark)){ -+ // enable all phases -+ o.phase_cleanup = o.phase_precreate = o.phase_benchmark = 1; -+ } -+ if (! o.phase_precreate && o.phase_benchmark && o.stonewall_timer && ! o.stonewall_timer_wear_out){ -+ if(o.rank == 0){ -+ WARN("Dangerous option combination: and benchmark phase (-2) using with stonewall option (-w) without stonewall wear-out will lead to files that cannot be cleaned up using the cleanup phase(-3). Also multiple iterations are problematic."); -+ } -+ } -+ if( o.random_seed == -1 ){ -+ o.random_seed = time(NULL); -+ MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, o.com); -+ } -+ -+ if(o.backend->xfer_hints){ -+ o.backend->xfer_hints(& o.hints); -+ } -+ if(o.backend->check_params){ -+ o.backend->check_params(o.backend_options); -+ } -+ if (o.backend->initialize){ -+ o.backend->initialize(o.backend_options); -+ } -+ -+ int current_index = 0; -+ -+ if ( (o.phase_cleanup || o.phase_benchmark) && ! o.phase_precreate ){ -+ current_index = return_position(); -+ } -+ -+ if(o.start_item_number){ -+ oprintf("Using start position %lld\n", (long long) o.start_item_number); -+ current_index = o.start_item_number; -+ } -+ -+ size_t total_obj_count = o.dset_count * (size_t) (o.num * o.iterations + o.precreate) * o.size; -+ if (o.rank == 0 && ! o.quiet_output){ -+ oprintf("MD-Workbench total objects: %zu workingset size: %.3f MiB (version: %s) time: ", total_obj_count, ((double) o.size) * o.dset_count * o.precreate * o.file_size / 1024.0 / 1024.0, PACKAGE_VERSION); -+ printTime(); -+ if(o.num > o.precreate){ -+ oprintf("WARNING: num > precreate, this may cause the situation that no objects are available to read\n"); -+ } -+ } -+ -+ if ( o.rank == 0 && ! o.quiet_output ){ -+ // print the set output options -+ // option_print_current(options); -+ // oprintf("\n"); -+ } -+ -+ // preallocate memory if necessary -+ //ret = mem_preallocate(& limit_memory_P, o.limit_memory, o.verbosity >= 3); -+ //if(ret != 0){ -+ // printf("%d: Error allocating memory\n", o.rank); -+ // MPI_Abort(o.com, 1); -+ //} -+ -+ double t_bench_start; -+ t_bench_start = GetTimeStamp(); -+ phase_stat_t phase_stats; -+ size_t result_count = (2 + o.iterations) * (o.adaptive_waiting_mode ? 7 : 1); -+ o.results = malloc(sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count); -+ memset(o.results, 0, sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count); -+ o.results->count = 0; -+ -+ if(o.rank == 0 && o.print_detailed_stats && ! o.quiet_output){ -+ print_detailed_stat_header(); -+ } -+ -+ if (o.phase_precreate){ -+ if (o.rank == 0){ -+ if (o.backend->mkdir(o.prefix, DIRMODE, o.backend_options) != 0) { -+ WARNF("Unable to create test directory %s", o.prefix); -+ } -+ } -+ init_stats(& phase_stats, o.precreate * o.dset_count); -+ MPI_Barrier(o.com); -+ -+ // pre-creation phase -+ phase_stats.phase_start_timer = GetTimeStamp(); -+ run_precreate(& phase_stats, current_index); -+ phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; -+ end_phase("precreate", & phase_stats); -+ } -+ -+ if (o.phase_benchmark){ -+ // benchmark phase -+ for(o.global_iteration = 0; o.global_iteration < o.iterations; o.global_iteration++){ -+ if(o.adaptive_waiting_mode){ -+ o.relative_waiting_factor = 0; -+ } -+ init_stats(& phase_stats, o.num * o.dset_count); -+ MPI_Barrier(o.com); -+ phase_stats.phase_start_timer = GetTimeStamp(); -+ run_benchmark(& phase_stats, & current_index); -+ end_phase("benchmark", & phase_stats); -+ -+ if(o.adaptive_waiting_mode){ -+ o.relative_waiting_factor = 0.0625; -+ for(int r=0; r <= 6; r++){ -+ init_stats(& phase_stats, o.num * o.dset_count); -+ MPI_Barrier(o.com); -+ phase_stats.phase_start_timer = GetTimeStamp(); -+ run_benchmark(& phase_stats, & current_index); -+ end_phase("benchmark", & phase_stats); -+ o.relative_waiting_factor *= 2; -+ } -+ } -+ } -+ } -+ -+ // cleanup phase -+ if (o.phase_cleanup){ -+ init_stats(& phase_stats, o.precreate * o.dset_count); -+ phase_stats.phase_start_timer = GetTimeStamp(); -+ run_cleanup(& phase_stats, current_index); -+ phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; -+ end_phase("cleanup", & phase_stats); -+ -+ if (o.rank == 0){ -+ if (o.backend->rmdir(o.prefix, o.backend_options) != 0) { -+ oprintf("Unable to remove directory %s\n", o.prefix); -+ } -+ } -+ }else{ -+ store_position(current_index); -+ } -+ -+ double t_all = GetTimeStamp() - t_bench_start; -+ if(o.backend->finalize){ -+ o.backend->finalize(o.backend_options); -+ } -+ if (o.rank == 0 && ! o.quiet_output){ -+ oprintf("Total runtime: %.0fs time: ", t_all); -+ printTime(); -+ } -+ //mem_free_preallocated(& limit_memory_P); -+ return o.results; -+} -diff --git a/src/md-workbench.h b/src/md-workbench.h -new file mode 100644 -index 0000000..394a43c ---- /dev/null -+++ b/src/md-workbench.h -@@ -0,0 +1,42 @@ -+#ifndef IOR_MD_WORKBENCH_H -+#define IOR_MD_WORKBENCH_H -+ -+#include -+#include -+#include -+ -+typedef struct{ -+ float min; -+ float q1; -+ float median; -+ float q3; -+ float q90; -+ float q99; -+ float max; -+} time_statistics_t; -+ -+ -+// statistics for running a single phase -+typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! -+ time_statistics_t stats_create; -+ time_statistics_t stats_read; -+ time_statistics_t stats_stat; -+ time_statistics_t stats_delete; -+ -+ int errors; -+ double rate; -+ double max_op_time; -+ double runtime; -+ uint64_t iterations_done; -+} mdworkbench_result_t; -+ -+typedef struct{ -+ int count; // the number of results -+ int errors; -+ mdworkbench_result_t result[]; -+} mdworkbench_results_t; -+ -+// @Return The first statistics returned are precreate, then iteration many benchmark runs, the last is cleanup -+mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile); -+ -+#endif -diff --git a/src/mdtest.c b/src/mdtest.c -index de0c52a..9d4cbec 100644 ---- a/src/mdtest.c -+++ b/src/mdtest.c -@@ -76,6 +76,8 @@ - - #include - -+#pragma GCC diagnostic ignored "-Wformat-overflow" -+ - #ifdef HAVE_LUSTRE_LUSTREAPI - #include - #endif /* HAVE_LUSTRE_LUSTREAPI */ -@@ -88,86 +90,99 @@ - - #define LLU "%lu" - --static int size; --static uint64_t *rand_array; --static char testdir[MAX_PATHLEN]; --static char testdirpath[MAX_PATHLEN]; --static char base_tree_name[MAX_PATHLEN]; --static char **filenames; --static char hostname[MAX_PATHLEN]; --static char mk_name[MAX_PATHLEN]; --static char stat_name[MAX_PATHLEN]; --static char read_name[MAX_PATHLEN]; --static char rm_name[MAX_PATHLEN]; --static char unique_mk_dir[MAX_PATHLEN]; --static char unique_chdir_dir[MAX_PATHLEN]; --static char unique_stat_dir[MAX_PATHLEN]; --static char unique_read_dir[MAX_PATHLEN]; --static char unique_rm_dir[MAX_PATHLEN]; --static char unique_rm_uni_dir[MAX_PATHLEN]; --static char *write_buffer; --static char *read_buffer; --static char *verify_read_buffer; --static char *stoneWallingStatusFile; -- -- --static int barriers; --static int create_only; --static int stat_only; --static int read_only; --static int verify_read; --static int verification_error; --static int remove_only; --static int leaf_only; --static unsigned branch_factor; --static int depth; -- --/* -- * This is likely a small value, but it's sometimes computed by -- * branch_factor^(depth+1), so we'll make it a larger variable, -- * just in case. -- */ --static uint64_t num_dirs_in_tree; --/* -- * As we start moving towards Exascale, we could have billions -- * of files in a directory. Make room for that possibility with -- * a larger variable. -- */ --static uint64_t items; --static uint64_t items_per_dir; --static uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */ --static int directory_loops; --static int print_time; --static int print_rate_and_time; --static int random_seed; --static int shared_file; --static int files_only; --static int dirs_only; --static int pre_delay; --static int unique_dir_per_task; --static int time_unique_dir_overhead; --static int throttle; --static int collective_creates; --static size_t write_bytes; --static int stone_wall_timer_seconds; --static size_t read_bytes; --static int sync_file; --static int call_sync; --static int path_count; --static int nstride; /* neighbor stride */ --static int make_node = 0; --#ifdef HAVE_LUSTRE_LUSTREAPI --static int global_dir_layout; --#endif /* HAVE_LUSTRE_LUSTREAPI */ -- --static mdtest_results_t * summary_table; --static pid_t pid; --static uid_t uid; -- --/* Use the POSIX backend by default */ --static const ior_aiori_t *backend; -+typedef struct { -+ int size; -+ uint64_t *rand_array; -+ char testdir[MAX_PATHLEN]; -+ char testdirpath[MAX_PATHLEN]; -+ char base_tree_name[MAX_PATHLEN]; -+ char **filenames; -+ char hostname[MAX_PATHLEN]; -+ char mk_name[MAX_PATHLEN]; -+ char stat_name[MAX_PATHLEN]; -+ char read_name[MAX_PATHLEN]; -+ char rm_name[MAX_PATHLEN]; -+ char unique_mk_dir[MAX_PATHLEN]; -+ char unique_chdir_dir[MAX_PATHLEN]; -+ char unique_stat_dir[MAX_PATHLEN]; -+ char unique_read_dir[MAX_PATHLEN]; -+ char unique_rm_dir[MAX_PATHLEN]; -+ char unique_rm_uni_dir[MAX_PATHLEN]; -+ char *write_buffer; -+ char *stoneWallingStatusFile; -+ int gpu_memory_flags; -+ -+ -+ int barriers; -+ int create_only; -+ int stat_only; -+ int read_only; -+ int verify_read; -+ int verify_write; -+ int verification_error; -+ int remove_only; -+ int rename_dirs; -+ int leaf_only; -+ unsigned branch_factor; -+ int depth; -+ int random_buffer_offset; /* user settable value, otherwise random */ -+ -+ /* -+ * This is likely a small value, but it's sometimes computed by -+ * branch_factor^(depth+1), so we'll make it a larger variable, -+ * just in case. -+ */ -+ uint64_t num_dirs_in_tree; -+ /* -+ * As we start moving towards Exascale, we could have billions -+ * of files in a directory. Make room for that possibility with -+ * a larger variable. -+ */ -+ uint64_t items; -+ uint64_t items_per_dir; -+ uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */ -+ int directory_loops; -+ int print_time; -+ int print_rate_and_time; -+ int print_all_proc; -+ int show_perrank_statistics; -+ ior_dataPacketType_e dataPacketType; -+ int random_seed; -+ int shared_file; -+ int files_only; -+ int dirs_only; -+ int pre_delay; -+ int unique_dir_per_task; -+ int time_unique_dir_overhead; -+ int collective_creates; -+ size_t write_bytes; -+ int stone_wall_timer_seconds; -+ size_t read_bytes; -+ int sync_file; -+ int call_sync; -+ int path_count; -+ int nstride; /* neighbor stride */ -+ int make_node; -+ #ifdef HAVE_LUSTRE_LUSTREAPI -+ int global_dir_layout; -+ #endif /* HAVE_LUSTRE_LUSTREAPI */ -+ char * saveRankDetailsCSV; /* save the details about the performance to a file */ -+ const char *prologue; -+ const char *epilogue; -+ -+ mdtest_results_t * summary_table; -+ pid_t pid; -+ uid_t uid; -+ -+ /* Use the POSIX backend by default */ -+ const ior_aiori_t *backend; -+ void * backend_options; -+ aiori_xfer_hint_t hints; -+ char * api; -+} mdtest_options_t; -+ -+static mdtest_options_t o; - --static IOR_param_t param; - - /* This structure describes the processing status for stonewalling */ - typedef struct{ -@@ -186,6 +201,8 @@ typedef struct{ - /* for making/removing unique directory && stating/deleting subdirectory */ - enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR}; - -+#define PRINT(...) fprintf(out_logfile, __VA_ARGS__); -+ - /* a helper function for passing debug and verbose messages. - use the MACRO as it will insert __LINE__ for you. - Pass the verbose level for root to print, then the verbose level for anyone to print. -@@ -211,25 +228,6 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ... - } - } - --void generate_memory_pattern(char * buffer, size_t bytes){ -- for(int i=0; i < bytes; i++){ -- buffer[i] = i + 1; -- } --} -- --void offset_timers(double * t, int tcount) { -- double toffset; -- int i; -- -- -- VERBOSE(1,-1,"V-1: Entering offset_timers..." ); -- -- toffset = GetTimeStamp() - t[tcount]; -- for (i = 0; i < tcount+1; i++) { -- t[i] += toffset; -- } --} -- - void parse_dirpath(char *dirpath_arg) { - char * tmp, * token; - char delimiter_string[3] = { '@', '\n', '\0' }; -@@ -240,46 +238,57 @@ void parse_dirpath(char *dirpath_arg) { - - tmp = dirpath_arg; - -- if (* tmp != '\0') path_count++; -+ if (* tmp != '\0') o.path_count++; - while (* tmp != '\0') { - if (* tmp == '@') { -- path_count++; -+ o.path_count++; - } - tmp++; - } - // prevent changes to the original dirpath_arg - dirpath_arg = strdup(dirpath_arg); -- filenames = (char **)malloc(path_count * sizeof(char **)); -- if (filenames == NULL || dirpath_arg == NULL) { -- FAIL("out of memory"); -- } -+ o.filenames = (char **) safeMalloc(o.path_count * sizeof(char **)); - - token = strtok(dirpath_arg, delimiter_string); - while (token != NULL) { -- filenames[i] = token; -+ o.filenames[i] = token; - token = strtok(NULL, delimiter_string); - i++; - } - } - - static void prep_testdir(int j, int dir_iter){ -- int pos = sprintf(testdir, "%s", testdirpath); -- if ( testdir[strlen( testdir ) - 1] != '/' ) { -- pos += sprintf(& testdir[pos], "/"); -+ int pos = sprintf(o.testdir, "%s", o.testdirpath); -+ if ( o.testdir[strlen( o.testdir ) - 1] != '/' ) { -+ pos += sprintf(& o.testdir[pos], "/"); -+ } -+ pos += sprintf(& o.testdir[pos], "%s", TEST_DIR); -+ pos += sprintf(& o.testdir[pos], ".%d-%d", j, dir_iter); -+} -+ -+static void phase_prepare(){ -+ if (*o.prologue){ -+ VERBOSE(0,5,"calling prologue: \"%s\"", o.prologue); -+ system(o.prologue); -+ } -+ if (o.barriers) { -+ MPI_Barrier(testComm); - } -- pos += sprintf(& testdir[pos], "%s", TEST_DIR); -- pos += sprintf(& testdir[pos], ".%d-%d", j, dir_iter); - } - - static void phase_end(){ -- if (call_sync){ -- if(! backend->sync){ -+ if (o.call_sync){ -+ if(! o.backend->sync){ - FAIL("Error, backend does not provide the sync method, but you requested to use sync.\n"); - } -- backend->sync(& param); -+ o.backend->sync(o.backend_options); -+ } -+ if (*o.epilogue){ -+ VERBOSE(0,5,"calling epilogue: \"%s\"", o.epilogue); -+ system(o.epilogue); - } - -- if (barriers) { -+ if (o.barriers) { - MPI_Barrier(testComm); - } - } -@@ -292,15 +301,15 @@ static void phase_end(){ - void unique_dir_access(int opt, char *to) { - if (opt == MK_UNI_DIR) { - MPI_Barrier(testComm); -- sprintf( to, "%s/%s", testdir, unique_chdir_dir ); -+ sprintf( to, "%s/%s", o.testdir, o.unique_chdir_dir ); - } else if (opt == STAT_SUB_DIR) { -- sprintf( to, "%s/%s", testdir, unique_stat_dir ); -+ sprintf( to, "%s/%s", o.testdir, o.unique_stat_dir ); - } else if (opt == READ_SUB_DIR) { -- sprintf( to, "%s/%s", testdir, unique_read_dir ); -+ sprintf( to, "%s/%s", o.testdir, o.unique_read_dir ); - } else if (opt == RM_SUB_DIR) { -- sprintf( to, "%s/%s", testdir, unique_rm_dir ); -+ sprintf( to, "%s/%s", o.testdir, o.unique_rm_dir ); - } else if (opt == RM_UNI_DIR) { -- sprintf( to, "%s/%s", testdir, unique_rm_uni_dir ); -+ sprintf( to, "%s/%s", o.testdir, o.unique_rm_uni_dir ); - } - VERBOSE(1,-1,"Entering unique_dir_access, set it to %s", to ); - } -@@ -314,16 +323,16 @@ static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) - } - - //create dirs -- sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? mk_name : rm_name, itemNum); -+ sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? o.mk_name : o.rm_name, itemNum); - VERBOSE(3,5,"create_remove_items_helper (dirs %s): curr_item is '%s'", operation, curr_item); - - if (create) { -- if (backend->mkdir(curr_item, DIRMODE, ¶m) == -1) { -- FAIL("unable to create directory %s", curr_item); -+ if (o.backend->mkdir(curr_item, DIRMODE, o.backend_options) == -1) { -+ WARNF("unable to create directory %s", curr_item); - } - } else { -- if (backend->rmdir(curr_item, ¶m) == -1) { -- FAIL("unable to remove directory %s", curr_item); -+ if (o.backend->rmdir(curr_item, o.backend_options) == -1) { -+ WARNF("unable to remove directory %s", curr_item); - } - } - } -@@ -336,73 +345,83 @@ static void remove_file (const char *path, uint64_t itemNum) { - } - - //remove files -- sprintf(curr_item, "%s/file.%s"LLU"", path, rm_name, itemNum); -+ sprintf(curr_item, "%s/file.%s"LLU"", path, o.rm_name, itemNum); - VERBOSE(3,5,"create_remove_items_helper (non-dirs remove): curr_item is '%s'", curr_item); -- if (!(shared_file && rank != 0)) { -- backend->delete (curr_item, ¶m); -+ if (!(o.shared_file && rank != 0)) { -+ o.backend->delete (curr_item, o.backend_options); - } - } - -+ - static void create_file (const char *path, uint64_t itemNum) { - char curr_item[MAX_PATHLEN]; -- void *aiori_fh = NULL; -+ aiori_fd_t *aiori_fh = NULL; - - if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { - VERBOSE(3,5,"create file: "LLU"", itemNum); - } - - //create files -- sprintf(curr_item, "%s/file.%s"LLU"", path, mk_name, itemNum); -+ sprintf(curr_item, "%s/file.%s"LLU"", path, o.mk_name, itemNum); - VERBOSE(3,5,"create_remove_items_helper (non-dirs create): curr_item is '%s'", curr_item); - -- param.openFlags = IOR_WRONLY; -- -- if (make_node) { -+ if (o.make_node) { - int ret; - VERBOSE(3,5,"create_remove_items_helper : mknod..." ); - -- ret = backend->mknod (curr_item); -+ ret = o.backend->mknod (curr_item); - if (ret != 0) -- FAIL("unable to mknode file %s", curr_item); -+ WARNF("unable to mknode file %s", curr_item); - - return; -- } else if (collective_creates) { -+ } else if (o.collective_creates) { - VERBOSE(3,5,"create_remove_items_helper (collective): open..." ); - -- aiori_fh = backend->open (curr_item, ¶m); -- if (NULL == aiori_fh) -- FAIL("unable to open file %s", curr_item); -+ aiori_fh = o.backend->open (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); -+ if (NULL == aiori_fh){ -+ WARNF("unable to open file %s", curr_item); -+ return; -+ } - - /* - * !collective_creates - */ - } else { -- param.openFlags |= IOR_CREAT; -- param.filePerProc = !shared_file; -- param.mode = FILEMODE; -+ o.hints.filePerProc = ! o.shared_file; - VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." ); - -- aiori_fh = backend->create (curr_item, ¶m); -- if (NULL == aiori_fh) -- FAIL("unable to create file %s", curr_item); -+ aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); -+ if (NULL == aiori_fh){ -+ WARNF("unable to create file %s", curr_item); -+ return; -+ } - } - -- if (write_bytes > 0) { -+ if (o.write_bytes > 0) { - VERBOSE(3,5,"create_remove_items_helper: write..." ); - -- /* -- * According to Bill Loewe, writes are only done one time, so they are always at -- * offset 0 (zero). -- */ -- param.offset = 0; -- param.fsyncPerWrite = sync_file; -- if ( write_bytes != (size_t) backend->xfer (WRITE, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, ¶m)) { -- FAIL("unable to write file %s", curr_item); -+ o.hints.fsyncPerWrite = o.sync_file; -+ update_write_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); -+ -+ if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { -+ WARNF("unable to write file %s", curr_item); -+ } -+ -+ if (o.verify_write) { -+ o.write_buffer[0] = 42; -+ if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { -+ WARNF("unable to verify write (read/back) file %s", curr_item); -+ } -+ int error = verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); -+ o.verification_error += error; -+ if(error){ -+ VERBOSE(1,1,"verification error in file: %s", curr_item); -+ } - } - } - - VERBOSE(3,5,"create_remove_items_helper: close..." ); -- backend->close (aiori_fh, ¶m); -+ o.backend->close (aiori_fh, o.backend_options); - } - - /* helper for creating/removing items */ -@@ -442,24 +461,22 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 - continue; - } - -- sprintf(curr_item, "%s/file.%s"LLU"", path, create ? mk_name : rm_name, itemNum+i); -+ sprintf(curr_item, "%s/file.%s"LLU"", path, create ? o.mk_name : o.rm_name, itemNum+i); - VERBOSE(3,5,"create file: %s", curr_item); - - if (create) { -- void *aiori_fh; -+ aiori_fd_t *aiori_fh; - - //create files -- param.openFlags = IOR_WRONLY | IOR_CREAT; -- param.mode = FILEMODE; -- aiori_fh = backend->create (curr_item, ¶m); -+ aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); - if (NULL == aiori_fh) { -- FAIL("unable to create file %s", curr_item); -+ WARNF("unable to create file %s", curr_item); -+ }else{ -+ o.backend->close (aiori_fh, o.backend_options); - } -- -- backend->close (aiori_fh, ¶m); -- } else if (!(shared_file && rank != 0)) { -+ } else if (!(o.shared_file && rank != 0)) { - //remove files -- backend->delete (curr_item, ¶m); -+ o.backend->delete (curr_item, o.backend_options); - } - if(CHECK_STONE_WALL(progress)){ - progress->items_done = i + 1; -@@ -469,7 +486,7 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 - progress->items_done = progress->items_per_dir; - } - --/* recusive function to create and remove files/directories from the -+/* recursive function to create and remove files/directories from the - directory tree */ - void create_remove_items(int currDepth, const int dirs, const int create, const int collective, const char *path, uint64_t dirNum, rank_progress_t * progress) { - unsigned i; -@@ -488,7 +505,7 @@ void create_remove_items(int currDepth, const int dirs, const int create, const - - if (currDepth == 0) { - /* create items at this depth */ -- if (!leaf_only || (depth == 0 && leaf_only)) { -+ if (! o.leaf_only || (o.depth == 0 && o.leaf_only)) { - if (collective) { - collective_helper(dirs, create, temp_path, 0, progress); - } else { -@@ -496,28 +513,28 @@ void create_remove_items(int currDepth, const int dirs, const int create, const - } - } - -- if (depth > 0) { -+ if (o.depth > 0) { - create_remove_items(++currDepth, dirs, create, - collective, temp_path, ++dirNum, progress); - } - -- } else if (currDepth <= depth) { -+ } else if (currDepth <= o.depth) { - /* iterate through the branches */ -- for (i=0; i 0) { //item is not in tree's root directory - - /* prepend parent directory to item's path */ -- sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); -+ sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); - strcpy(item, temp); - - //still not at the tree's root dir -- while (parent_dir > branch_factor) { -- parent_dir = (uint64_t) ((parent_dir-1) / branch_factor); -- sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); -+ while (parent_dir > o.branch_factor) { -+ parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor); -+ sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); - strcpy(item, temp); - } - } -@@ -616,41 +633,31 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch - - /* below temp used to be hiername */ - VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item); -- if (-1 == backend->stat (item, &buf, ¶m)) { -- FAIL("unable to stat %s %s", dirs ? "directory" : "file", item); -+ if (-1 == o.backend->stat (item, &buf, o.backend_options)) { -+ WARNF("unable to stat %s %s", dirs ? "directory" : "file", item); - } - } - } - -- - /* reads all of the items created as specified by the input parameters */ - void mdtest_read(int random, int dirs, const long dir_iter, char *path) { - uint64_t parent_dir, item_num = 0; - char item[MAX_PATHLEN], temp[MAX_PATHLEN]; -- void *aiori_fh; -+ aiori_fd_t *aiori_fh; - - VERBOSE(1,-1,"Entering mdtest_read on %s", path ); -+ char *read_buffer; - - /* allocate read buffer */ -- if (read_bytes > 0) { -- int alloc_res = posix_memalign((void**)&read_buffer, sysconf(_SC_PAGESIZE), read_bytes); -- if (alloc_res) { -- FAIL("out of memory"); -- } -- -- if (verify_read > 0) { -- verify_read_buffer = (char *)malloc(read_bytes); -- if (verify_read_buffer == NULL) { -- FAIL("out of memory"); -- } -- generate_memory_pattern(verify_read_buffer, read_bytes); -- } -+ if (o.read_bytes > 0) { -+ read_buffer = aligned_buffer_alloc(o.read_bytes, o.gpu_memory_flags); -+ memset(read_buffer, -1, o.read_bytes); - } - -- uint64_t stop_items = items; -+ uint64_t stop_items = o.items; - -- if( directory_loops != 1 ){ -- stop_items = items_per_dir; -+ if( o.directory_loops != 1 ){ -+ stop_items = o.items_per_dir; - } - - /* iterate over all of the item IDs */ -@@ -669,15 +676,15 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { - - /* determine the item number to read */ - if (random) { -- item_num = rand_array[i]; -+ item_num = o.rand_array[i]; - } else { - item_num = i; - } - - /* make adjustments if in leaf only mode*/ -- if (leaf_only) { -- item_num += items_per_dir * -- (num_dirs_in_tree - (uint64_t) pow (branch_factor, depth)); -+ if (o.leaf_only) { -+ item_num += o.items_per_dir * -+ (o.num_dirs_in_tree - (uint64_t) pow (o.branch_factor, o.depth)); - } - - /* create name of file to read */ -@@ -685,22 +692,22 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { - if ((i%ITEM_COUNT == 0) && (i != 0)) { - VERBOSE(3,5,"read file: "LLU"", i); - } -- sprintf(item, "file.%s"LLU"", read_name, item_num); -+ sprintf(item, "file.%s"LLU"", o.read_name, item_num); - } - - /* determine the path to the file/dir to be read'ed */ -- parent_dir = item_num / items_per_dir; -+ parent_dir = item_num / o.items_per_dir; - - if (parent_dir > 0) { //item is not in tree's root directory - - /* prepend parent directory to item's path */ -- sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); -+ sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); - strcpy(item, temp); - - /* still not at the tree's root dir */ -- while (parent_dir > branch_factor) { -- parent_dir = (unsigned long long) ((parent_dir-1) / branch_factor); -- sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); -+ while (parent_dir > o.branch_factor) { -+ parent_dir = (unsigned long long) ((parent_dir-1) / o.branch_factor); -+ sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); - strcpy(item, temp); - } - } -@@ -712,29 +719,40 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { - /* below temp used to be hiername */ - VERBOSE(3,5,"mdtest_read file: %s", item); - -+ o.hints.filePerProc = ! o.shared_file; -+ - /* open file for reading */ -- param.openFlags = O_RDONLY; -- aiori_fh = backend->open (item, ¶m); -+ aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options); - if (NULL == aiori_fh) { -- FAIL("unable to open file %s", item); -+ WARNF("unable to open file %s", item); -+ continue; - } - - /* read file */ -- if (read_bytes > 0) { -- read_buffer[0] = 42; /* use a random value to ensure that the read_buffer is now different from the expected buffer and read isn't sometimes NOOP */ -- if (read_bytes != (size_t) backend->xfer (READ, aiori_fh, (IOR_size_t *) read_buffer, read_bytes, ¶m)) { -- FAIL("unable to read file %s", item); -+ if (o.read_bytes > 0) { -+ read_buffer[0] = 42; -+ if (o.read_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, o.read_bytes, 0, o.backend_options)) { -+ WARNF("unable to read file %s", item); -+ continue; - } -- if(verify_read){ -- if (memcmp(read_buffer, verify_read_buffer, read_bytes) != 0){ -- VERBOSE(2, -1, "Error verifying %s", item); -- verification_error++; -+ int pretend_rank = (2 * o.nstride + rank) % o.size; -+ if(o.verify_read){ -+ if (o.shared_file) { -+ pretend_rank = rank; -+ } -+ int error = verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank, o.dataPacketType); -+ o.verification_error += error; -+ if(error){ -+ VERBOSE(1,1,"verification error in file: %s", item); - } - } - } - - /* close file */ -- backend->close (aiori_fh, ¶m); -+ o.backend->close (aiori_fh, o.backend_options); -+ } -+ if(o.read_bytes){ -+ aligned_buffer_free(read_buffer, o.gpu_memory_flags); - } - } - -@@ -749,40 +767,40 @@ void collective_create_remove(const int create, const int dirs, const int ntasks - for (int i = 0 ; i < ntasks ; ++i) { - memset(temp, 0, MAX_PATHLEN); - -- strcpy(temp, testdir); -+ strcpy(temp, o.testdir); - strcat(temp, "/"); - - /* set the base tree name appropriately */ -- if (unique_dir_per_task) { -- sprintf(base_tree_name, "mdtest_tree.%d", i); -+ if (o.unique_dir_per_task) { -+ sprintf(o.base_tree_name, "mdtest_tree.%d", i); - } else { -- sprintf(base_tree_name, "mdtest_tree"); -+ sprintf(o.base_tree_name, "mdtest_tree"); - } - - /* Setup to do I/O to the appropriate test dir */ -- strcat(temp, base_tree_name); -+ strcat(temp, o.base_tree_name); - strcat(temp, ".0"); - - /* set all item names appropriately */ -- if (!shared_file) { -- sprintf(mk_name, "mdtest.%d.", (i+(0*nstride))%ntasks); -- sprintf(stat_name, "mdtest.%d.", (i+(1*nstride))%ntasks); -- sprintf(read_name, "mdtest.%d.", (i+(2*nstride))%ntasks); -- sprintf(rm_name, "mdtest.%d.", (i+(3*nstride))%ntasks); -- } -- if (unique_dir_per_task) { -- VERBOSE(3,5,"i %d nstride %d ntasks %d", i, nstride, ntasks); -- sprintf(unique_mk_dir, "%s/mdtest_tree.%d.0", testdir, -- (i+(0*nstride))%ntasks); -- sprintf(unique_chdir_dir, "%s/mdtest_tree.%d.0", testdir, -- (i+(1*nstride))%ntasks); -- sprintf(unique_stat_dir, "%s/mdtest_tree.%d.0", testdir, -- (i+(2*nstride))%ntasks); -- sprintf(unique_read_dir, "%s/mdtest_tree.%d.0", testdir, -- (i+(3*nstride))%ntasks); -- sprintf(unique_rm_dir, "%s/mdtest_tree.%d.0", testdir, -- (i+(4*nstride))%ntasks); -- sprintf(unique_rm_uni_dir, "%s", testdir); -+ if (! o.shared_file) { -+ sprintf(o.mk_name, "mdtest.%d.", (i+(0*o.nstride))%ntasks); -+ sprintf(o.stat_name, "mdtest.%d.", (i+(1*o.nstride))%ntasks); -+ sprintf(o.read_name, "mdtest.%d.", (i+(2*o.nstride))%ntasks); -+ sprintf(o.rm_name, "mdtest.%d.", (i+(3*o.nstride))%ntasks); -+ } -+ if (o.unique_dir_per_task) { -+ VERBOSE(3,5,"i %d nstride %d ntasks %d", i, o.nstride, ntasks); -+ sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (i+(0*o.nstride))%ntasks); -+ sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (i+(1*o.nstride))%ntasks); -+ sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (i+(2*o.nstride))%ntasks); -+ sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (i+(3*o.nstride))%ntasks); -+ sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (i+(4*o.nstride))%ntasks); -+ sprintf(o.unique_rm_uni_dir, "%s", o.testdir); - } - - /* Now that everything is set up as it should be, do the create or remove */ -@@ -792,61 +810,155 @@ void collective_create_remove(const int create, const int dirs, const int ntasks - } - - /* reset all of the item names */ -- if (unique_dir_per_task) { -- sprintf(base_tree_name, "mdtest_tree.0"); -+ if (o.unique_dir_per_task) { -+ sprintf(o.base_tree_name, "mdtest_tree.0"); - } else { -- sprintf(base_tree_name, "mdtest_tree"); -- } -- if (!shared_file) { -- sprintf(mk_name, "mdtest.%d.", (0+(0*nstride))%ntasks); -- sprintf(stat_name, "mdtest.%d.", (0+(1*nstride))%ntasks); -- sprintf(read_name, "mdtest.%d.", (0+(2*nstride))%ntasks); -- sprintf(rm_name, "mdtest.%d.", (0+(3*nstride))%ntasks); -- } -- if (unique_dir_per_task) { -- sprintf(unique_mk_dir, "%s/mdtest_tree.%d.0", testdir, -- (0+(0*nstride))%ntasks); -- sprintf(unique_chdir_dir, "%s/mdtest_tree.%d.0", testdir, -- (0+(1*nstride))%ntasks); -- sprintf(unique_stat_dir, "%s/mdtest_tree.%d.0", testdir, -- (0+(2*nstride))%ntasks); -- sprintf(unique_read_dir, "%s/mdtest_tree.%d.0", testdir, -- (0+(3*nstride))%ntasks); -- sprintf(unique_rm_dir, "%s/mdtest_tree.%d.0", testdir, -- (0+(4*nstride))%ntasks); -- sprintf(unique_rm_uni_dir, "%s", testdir); -+ sprintf(o.base_tree_name, "mdtest_tree"); -+ } -+ if (! o.shared_file) { -+ sprintf(o.mk_name, "mdtest.%d.", (0+(0*o.nstride))%ntasks); -+ sprintf(o.stat_name, "mdtest.%d.", (0+(1*o.nstride))%ntasks); -+ sprintf(o.read_name, "mdtest.%d.", (0+(2*o.nstride))%ntasks); -+ sprintf(o.rm_name, "mdtest.%d.", (0+(3*o.nstride))%ntasks); -+ } -+ if (o.unique_dir_per_task) { -+ sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (0+(0*o.nstride))%ntasks); -+ sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (0+(1*o.nstride))%ntasks); -+ sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (0+(2*o.nstride))%ntasks); -+ sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (0+(3*o.nstride))%ntasks); -+ sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir, -+ (0+(4*o.nstride))%ntasks); -+ sprintf(o.unique_rm_uni_dir, "%s", o.testdir); -+ } -+} -+ -+void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank_progress_t * progress) { -+ uint64_t parent_dir, item_num = 0; -+ char item[MAX_PATHLEN], temp[MAX_PATHLEN]; -+ char item_last[MAX_PATHLEN]; -+ -+ if(o.backend->rename == NULL){ -+ WARN("Backend doesn't support rename\n"); -+ return; -+ } -+ -+ VERBOSE(1,-1,"Entering mdtest_rename on %s", path ); -+ -+ uint64_t stop_items = o.items; -+ -+ if( o.directory_loops != 1 ){ -+ stop_items = o.items_per_dir; -+ } -+ -+ if(stop_items == 1) return; -+ -+ /* iterate over all of the item IDs */ -+ char first_item_name[MAX_PATHLEN]; -+ for (uint64_t i = 0 ; i < stop_items; ++i) { -+ item_num = i; -+ /* make adjustments if in leaf only mode*/ -+ if (o.leaf_only) { -+ item_num += o.items_per_dir * (o.num_dirs_in_tree - (uint64_t) pow( o.branch_factor, o.depth )); -+ } -+ -+ /* create name of file/dir to stat */ -+ if (dirs) { -+ sprintf(item, "dir.%s"LLU"", o.stat_name, item_num); -+ } else { -+ sprintf(item, "file.%s"LLU"", o.stat_name, item_num); -+ } -+ -+ /* determine the path to the file/dir to be stat'ed */ -+ parent_dir = item_num / o.items_per_dir; -+ -+ if (parent_dir > 0) { //item is not in tree's root directory -+ /* prepend parent directory to item's path */ -+ sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); -+ strcpy(item, temp); -+ -+ //still not at the tree's root dir -+ while (parent_dir > o.branch_factor) { -+ parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor); -+ sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); -+ strcpy(item, temp); -+ } -+ } -+ -+ /* Now get item to have the full path */ -+ sprintf( temp, "%s/%s", path, item ); -+ strcpy( item, temp ); -+ -+ VERBOSE(3,5,"mdtest_rename %4s: %s", (dirs ? "dir" : "file"), item); -+ if(i == 0){ -+ sprintf(first_item_name, "%s-XX", item); -+ strcpy(item_last, first_item_name); -+ }else if(i == stop_items - 1){ -+ strcpy(item, first_item_name); -+ } -+ if (-1 == o.backend->rename(item, item_last, o.backend_options)) { -+ WARNF("unable to rename %s %s", dirs ? "directory" : "file", item); -+ } -+ -+ strcpy(item_last, item); - } - } - -+static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, double t_start, double t_end, double t_end_before_barrier){ -+ res->time[test] = t_end - t_start; -+ if(isfinite(t_end_before_barrier)){ -+ res->time_before_barrier[test] = t_end_before_barrier - t_start; -+ }else{ -+ res->time_before_barrier[test] = res->time[test]; -+ } -+ if(item_count == 0){ -+ res->rate[test] = 0.0; -+ res->rate_before_barrier[test] = 0.0; -+ }else{ -+ res->rate[test] = item_count/res->time[test]; -+ res->rate_before_barrier[test] = item_count/res->time_before_barrier[test]; -+ } -+ res->items[test] = item_count; -+ res->stonewall_last_item[test] = o.items; -+} -+ - void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { - int size; -- double t[5] = {0}; -+ double t_start, t_end, t_end_before_barrier; - char temp_path[MAX_PATHLEN]; -+ mdtest_results_t * res = & o.summary_table[iteration]; - - MPI_Comm_size(testComm, &size); - - VERBOSE(1,-1,"Entering directory_test on %s", path ); - - MPI_Barrier(testComm); -- t[0] = GetTimeStamp(); - - /* create phase */ -- if(create_only) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ if(o.create_only) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; -+ progress->items_done = 0; -+ progress->start_time = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(MK_UNI_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 0); -+ if (! o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,-1,"directory_test: create path is '%s'", temp_path ); - - /* "touch" the files */ -- if (collective_creates) { -+ if (o.collective_creates) { - if (rank == 0) { - collective_create_remove(1, 1, ntasks, temp_path, progress); - } -@@ -855,80 +967,117 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran - create_remove_items(0, 1, 1, 0, temp_path, 0, progress); - } - } -+ progress->stone_wall_timer_seconds = 0; -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_DIR_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier); - } - -- phase_end(); -- t[1] = GetTimeStamp(); -- - /* stat phase */ -- if (stat_only) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ if (o.stat_only) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(STAT_SUB_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 1); -+ if (! o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"stat path is '%s'", temp_path ); - - /* stat directories */ -- if (random_seed > 0) { -+ if (o.random_seed > 0) { - mdtest_stat(1, 1, dir_iter, temp_path, progress); - } else { - mdtest_stat(0, 1, dir_iter, temp_path, progress); - } - } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_DIR_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier); - } -- phase_end(); -- t[2] = GetTimeStamp(); - - /* read phase */ -- if (read_only) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ if (o.read_only) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(READ_SUB_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 2); -+ if (! o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"directory_test: read path is '%s'", temp_path ); - - /* read directories */ -- if (random_seed > 0) { -+ if (o.random_seed > 0) { - ; /* N/A */ - } else { - ; /* N/A */ - } - } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_DIR_READ_NUM, o.items, t_start, t_end, t_end_before_barrier); - } - -- phase_end(); -- t[3] = GetTimeStamp(); -+ /* rename phase */ -+ if(o.rename_dirs && o.items > 1){ -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ -+ prep_testdir(iteration, dir_iter); -+ if (o.unique_dir_per_task) { -+ unique_dir_access(STAT_SUB_DIR, temp_path); -+ if (! o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); -+ } -+ } else { -+ sprintf( temp_path, "%s/%s", o.testdir, path ); -+ } -+ -+ VERBOSE(3,5,"rename path is '%s'", temp_path ); -+ -+ rename_dir_test(1, dir_iter, temp_path, progress); -+ } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, t_start, t_end, t_end_before_barrier); -+ } - -- if (remove_only) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ /* remove phase */ -+ if (o.remove_only) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(RM_SUB_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 3); -+ if (!o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"directory_test: remove directories path is '%s'", temp_path ); - - /* remove directories */ -- if (collective_creates) { -+ if (o.collective_creates) { - if (rank == 0) { - collective_create_remove(0, 1, ntasks, temp_path, progress); - } -@@ -936,234 +1085,231 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran - create_remove_items(0, 1, 0, 0, temp_path, 0, progress); - } - } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier); - } - -- phase_end(); -- t[4] = GetTimeStamp(); -- -- if (remove_only) { -- if (unique_dir_per_task) { -+ if (o.remove_only) { -+ if (o.unique_dir_per_task) { - unique_dir_access(RM_UNI_DIR, temp_path); - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"directory_test: remove unique directories path is '%s'\n", temp_path ); - } - -- if (unique_dir_per_task && !time_unique_dir_overhead) { -- offset_timers(t, 4); -- } -- -- /* calculate times */ -- if (create_only) { -- summary_table[iteration].rate[0] = items*size/(t[1] - t[0]); -- summary_table[iteration].time[0] = t[1] - t[0]; -- summary_table[iteration].items[0] = items*size; -- summary_table[iteration].stonewall_last_item[0] = items; -- } -- if (stat_only) { -- summary_table[iteration].rate[1] = items*size/(t[2] - t[1]); -- summary_table[iteration].time[1] = t[2] - t[1]; -- summary_table[iteration].items[1] = items*size; -- summary_table[iteration].stonewall_last_item[1] = items; -- } -- if (read_only) { -- summary_table[iteration].rate[2] = items*size/(t[3] - t[2]); -- summary_table[iteration].time[2] = t[3] - t[2]; -- summary_table[iteration].items[2] = items*size; -- summary_table[iteration].stonewall_last_item[2] = items; -- } -- if (remove_only) { -- summary_table[iteration].rate[3] = items*size/(t[4] - t[3]); -- summary_table[iteration].time[3] = t[4] - t[3]; -- summary_table[iteration].items[3] = items*size; -- summary_table[iteration].stonewall_last_item[3] = items; -- } -- -- VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[0]); -- VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[1]); -- /* N/A -- VERBOSE(1,-1," Directory read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[2]); -- */ -- VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[3]); -+ VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_CREATE_NUM], o.summary_table[iteration].rate[MDTEST_DIR_CREATE_NUM]); -+ VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_STAT_NUM], o.summary_table[iteration].rate[MDTEST_DIR_STAT_NUM]); -+ VERBOSE(1,-1," Directory rename : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_RENAME_NUM], o.summary_table[iteration].rate[MDTEST_DIR_RENAME_NUM]); -+ VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_REMOVE_NUM], o.summary_table[iteration].rate[MDTEST_DIR_REMOVE_NUM]); - } - - /* Returns if the stonewall was hit */ --int updateStoneWallIterations(int iteration, rank_progress_t * progress, double tstart){ -+int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, uint64_t * out_max_iter){ - int hit = 0; -- uint64_t done = progress->items_done; - long long unsigned max_iter = 0; - -- VERBOSE(1,1,"stonewall hit with %lld items", (long long) progress->items_done ); -- MPI_Allreduce(& progress->items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); -- summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; -+ VERBOSE(1,1,"stonewall hit with %lld items", (long long) items_done ); -+ MPI_Allreduce(& items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); -+ o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; -+ o.summary_table[iteration].stonewall_last_item[MDTEST_FILE_CREATE_NUM] = items_done; -+ *out_max_iter = max_iter; - - // continue to the maximum... - long long min_accessed = 0; -- MPI_Reduce(& progress->items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); -+ MPI_Reduce(& items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); - long long sum_accessed = 0; -- MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); -- summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; -- summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; -+ MPI_Reduce(& items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); -+ o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; -+ o.summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * o.size; - -- if(items != (sum_accessed / size)){ -- VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); -+ if(o.items != (sum_accessed / o.size)){ -+ VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / o.size); - hit = 1; - } -- progress->items_start = done; -- progress->items_per_dir = max_iter; - - return hit; - } - -+void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t_start){ -+ char temp_path[MAX_PATHLEN]; -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ -+ prep_testdir(iteration, dir_iter); -+ -+ if (o.unique_dir_per_task) { -+ unique_dir_access(MK_UNI_DIR, temp_path); -+ VERBOSE(5,5,"operating on %s", temp_path); -+ if (! o.time_unique_dir_overhead) { -+ *t_start = GetTimeStamp(); -+ } -+ } else { -+ sprintf( temp_path, "%s/%s", o.testdir, path ); -+ } -+ -+ VERBOSE(3,-1,"file_test: create path is '%s'", temp_path ); -+ /* "touch" the files */ -+ if (o.collective_creates) { -+ if (rank == 0) { -+ collective_create_remove(1, 0, ntasks, temp_path, progress); -+ } -+ MPI_Barrier(testComm); -+ } -+ -+ /* create files */ -+ create_remove_items(0, 0, 1, 0, temp_path, 0, progress); -+ if(o.stone_wall_timer_seconds){ -+ // hit the stonewall -+ uint64_t max_iter = 0; -+ uint64_t items_done = progress->items_done + dir_iter * o.items_per_dir; -+ int hit = updateStoneWallIterations(iteration, items_done, *t_start, & max_iter); -+ progress->items_start = items_done; -+ progress->items_per_dir = max_iter; -+ if (hit){ -+ progress->stone_wall_timer_seconds = 0; -+ VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir); -+ create_remove_items(0, 0, 1, 0, temp_path, 0, progress); -+ // now reset the values -+ progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; -+ o.items = progress->items_done; -+ } -+ if (o.stoneWallingStatusFile){ -+ StoreStoneWallingIterations(o.stoneWallingStatusFile, max_iter); -+ } -+ // reset stone wall timer to allow proper cleanup -+ progress->stone_wall_timer_seconds = 0; -+ // at the moment, stonewall can be done only with one directory_loop, so we can return here safely -+ break; -+ } -+ } -+} -+ - void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { - int size; -- double t[5] = {0}; -+ double t_start, t_end, t_end_before_barrier; - char temp_path[MAX_PATHLEN]; -+ mdtest_results_t * res = & o.summary_table[iteration]; -+ - MPI_Comm_size(testComm, &size); - - VERBOSE(3,5,"Entering file_test on %s", path); - - MPI_Barrier(testComm); -- t[0] = GetTimeStamp(); - - /* create phase */ -- if (create_only ) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -- prep_testdir(iteration, dir_iter); -- -- if (unique_dir_per_task) { -- unique_dir_access(MK_UNI_DIR, temp_path); -- VERBOSE(5,5,"operating on %s", temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 0); -- } -- } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -- } -- -- -- -- VERBOSE(3,-1,"file_test: create path is '%s'", temp_path ); -- -- /* "touch" the files */ -- if (collective_creates) { -- if (rank == 0) { -- collective_create_remove(1, 0, ntasks, temp_path, progress); -- } -- MPI_Barrier(testComm); -- } -- -- /* create files */ -- create_remove_items(0, 0, 1, 0, temp_path, 0, progress); -- if(stone_wall_timer_seconds){ -- int hit = updateStoneWallIterations(iteration, progress, t[0]); -- -- if (hit){ -- progress->stone_wall_timer_seconds = 0; -- VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir); -- create_remove_items(0, 0, 1, 0, temp_path, 0, progress); -- // now reset the values -- progress->stone_wall_timer_seconds = stone_wall_timer_seconds; -- items = progress->items_done; -- } -- if (stoneWallingStatusFile){ -- StoreStoneWallingIterations(stoneWallingStatusFile, progress->items_done); -- } -- // reset stone wall timer to allow proper cleanup -- progress->stone_wall_timer_seconds = 0; -- } -- } -+ if (o.create_only ) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; -+ progress->items_done = 0; -+ progress->start_time = GetTimeStamp(); -+ file_test_create(iteration, ntasks, path, progress, &t_start); -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier); - }else{ -- if (stoneWallingStatusFile){ -+ if (o.stoneWallingStatusFile){ - int64_t expected_items; - /* The number of items depends on the stonewalling file */ -- expected_items = ReadStoneWallingIterations(stoneWallingStatusFile); -+ expected_items = ReadStoneWallingIterations(o.stoneWallingStatusFile, testComm); - if(expected_items >= 0){ -- items = expected_items; -- progress->items_per_dir = items; -+ if(o.directory_loops > 1){ -+ o.directory_loops = expected_items / o.items_per_dir; -+ o.items = o.items_per_dir; -+ }else{ -+ o.items = expected_items; -+ progress->items_per_dir = o.items; -+ } - } - if (rank == 0) { - if(expected_items == -1){ -- fprintf(out_logfile, "WARNING: could not read stonewall status file\n"); -+ WARN("Could not read stonewall status file"); - }else { -- VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", items); -+ VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", o.items); - } - } - } - } - -- phase_end(); -- t[1] = GetTimeStamp(); -- - /* stat phase */ -- if (stat_only ) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ if (o.stat_only ) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(STAT_SUB_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 1); -+ if (!o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"file_test: stat path is '%s'", temp_path ); - - /* stat files */ -- mdtest_stat((random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress); -+ mdtest_stat((o.random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress); - } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_FILE_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier); - } - -- phase_end(); -- t[2] = GetTimeStamp(); -- - /* read phase */ -- if (read_only ) { -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ if (o.read_only ) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(READ_SUB_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 2); -+ if (! o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"file_test: read path is '%s'", temp_path ); - - /* read files */ -- if (random_seed > 0) { -+ if (o.random_seed > 0) { - mdtest_read(1,0, dir_iter, temp_path); - } else { - mdtest_read(0,0, dir_iter, temp_path); - } - } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_FILE_READ_NUM, o.items, t_start, t_end, t_end_before_barrier); - } - -- phase_end(); -- t[3] = GetTimeStamp(); -- -- if (remove_only) { -+ /* remove phase */ -+ if (o.remove_only) { -+ phase_prepare(); -+ t_start = GetTimeStamp(); - progress->items_start = 0; - -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); -- if (unique_dir_per_task) { -+ if (o.unique_dir_per_task) { - unique_dir_access(RM_SUB_DIR, temp_path); -- if (!time_unique_dir_overhead) { -- offset_timers(t, 3); -+ if (! o.time_unique_dir_overhead) { -+ t_start = GetTimeStamp(); - } - } else { -- sprintf( temp_path, "%s/%s", testdir, path ); -+ sprintf( temp_path, "%s/%s", o.testdir, path ); - } - - VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path ); - -- if (collective_creates) { -+ if (o.collective_creates) { - if (rank == 0) { - collective_create_remove(0, 0, ntasks, temp_path, progress); - } -@@ -1172,12 +1318,14 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro - create_remove_items(0, 0, 0, 0, temp_path, 0, progress); - } - } -+ t_end_before_barrier = GetTimeStamp(); -+ phase_end(); -+ t_end = GetTimeStamp(); -+ updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier); - } - -- phase_end(); -- t[4] = GetTimeStamp(); -- if (remove_only) { -- if (unique_dir_per_task) { -+ if (o.remove_only) { -+ if (o.unique_dir_per_task) { - unique_dir_access(RM_UNI_DIR, temp_path); - } else { - strcpy( temp_path, path ); -@@ -1186,228 +1334,391 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro - VERBOSE(3,5,"file_test: rm unique directories path is '%s'", temp_path ); - } - -- if (unique_dir_per_task && !time_unique_dir_overhead) { -- offset_timers(t, 4); -+ if(o.num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */ -+ o.items *= o.num_dirs_in_tree_calc; - } - -- if(num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */ -- items *= num_dirs_in_tree_calc; -+ VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].rate[MDTEST_FILE_CREATE_NUM]); -+ if(o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ -+ VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); - } -+ VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_STAT_NUM], o.summary_table[iteration].rate[MDTEST_FILE_STAT_NUM]); -+ VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_READ_NUM], o.summary_table[iteration].rate[MDTEST_FILE_READ_NUM]); -+ VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_REMOVE_NUM], o.summary_table[iteration].rate[MDTEST_FILE_REMOVE_NUM]); -+} - -- /* calculate times */ -- if (create_only) { -- summary_table[iteration].rate[4] = items*size/(t[1] - t[0]); -- summary_table[iteration].time[4] = t[1] - t[0]; -- summary_table[iteration].items[4] = items*size; -- summary_table[iteration].stonewall_last_item[4] = items; -- } -- if (stat_only) { -- summary_table[iteration].rate[5] = items*size/(t[2] - t[1]); -- summary_table[iteration].time[5] = t[2] - t[1]; -- summary_table[iteration].items[5] = items*size; -- summary_table[iteration].stonewall_last_item[5] = items; -- } -- if (read_only) { -- summary_table[iteration].rate[6] = items*size/(t[3] - t[2]); -- summary_table[iteration].time[6] = t[3] - t[2]; -- summary_table[iteration].items[6] = items*size; -- summary_table[iteration].stonewall_last_item[6] = items; -- } -- if (remove_only) { -- summary_table[iteration].rate[7] = items*size/(t[4] - t[3]); -- summary_table[iteration].time[7] = t[4] - t[3]; -- summary_table[iteration].items[7] = items*size; -- summary_table[iteration].stonewall_last_item[7] = items; -- } -+char const * mdtest_test_name(int i){ -+ switch (i) { -+ case MDTEST_DIR_CREATE_NUM: return "Directory creation"; -+ case MDTEST_DIR_STAT_NUM: return "Directory stat"; -+ case MDTEST_DIR_READ_NUM: return "Directory read"; -+ case MDTEST_DIR_REMOVE_NUM: return "Directory removal"; -+ case MDTEST_DIR_RENAME_NUM: return "Directory rename"; -+ case MDTEST_FILE_CREATE_NUM: return "File creation"; -+ case MDTEST_FILE_STAT_NUM: return "File stat"; -+ case MDTEST_FILE_READ_NUM: return "File read"; -+ case MDTEST_FILE_REMOVE_NUM: return "File removal"; -+ case MDTEST_TREE_CREATE_NUM: return "Tree creation"; -+ case MDTEST_TREE_REMOVE_NUM: return "Tree removal"; -+ default: return "ERR INVALID TESTNAME :"; -+ } -+ return NULL; -+} - -- VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[4]); -- if(summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ -- VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); -+/* -+ * Store the results of each process in a file -+ */ -+static void StoreRankInformation(int iterations, mdtest_results_t * agg){ -+ const size_t size = sizeof(mdtest_results_t) * iterations; -+ if(rank == 0){ -+ FILE* fd = fopen(o.saveRankDetailsCSV, "a"); -+ if (fd == NULL){ -+ FAIL("Cannot open saveRankPerformanceDetails file for writes!"); -+ } -+ -+ mdtest_results_t * results = safeMalloc(size * o.size); -+ MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, results, size / sizeof(double), MPI_DOUBLE, 0, testComm); -+ -+ char buff[4096]; -+ char * cpos = buff; -+ cpos += sprintf(cpos, "all,%llu", (long long unsigned) o.items); -+ for(int e = 0; e < MDTEST_LAST_NUM; e++){ -+ if(agg->items[e] == 0){ -+ cpos += sprintf(cpos, ",,"); -+ }else{ -+ cpos += sprintf(cpos, ",%.10e,%.10e", agg->items[e] / agg->time[e], agg->time[e]); -+ } - } -- VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[5]); -- VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[6]); -- VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[7]); -+ cpos += sprintf(cpos, "\n"); -+ int ret = fwrite(buff, cpos - buff, 1, fd); -+ -+ for(int iter = 0; iter < iterations; iter++){ -+ for(int i=0; i < o.size; i++){ -+ mdtest_results_t * cur = & results[i * iterations + iter]; -+ cpos = buff; -+ cpos += sprintf(cpos, "%d,", i); -+ for(int e = 0; e < MDTEST_TREE_CREATE_NUM; e++){ -+ if(cur->items[e] == 0){ -+ cpos += sprintf(cpos, ",,"); -+ }else{ -+ cpos += sprintf(cpos, ",%.10e,%.10e", cur->items[e] / cur->time_before_barrier[e], cur->time_before_barrier[e]); -+ } -+ } -+ cpos += sprintf(cpos, "\n"); -+ ret = fwrite(buff, cpos - buff, 1, fd); -+ if(ret != 1){ -+ WARN("Couln't append to saveRankPerformanceDetailsCSV file\n"); -+ break; -+ } -+ } -+ } -+ fclose(fd); -+ free(results); -+ }else{ -+ /* this is a hack for now assuming all datatypes in the structure are double */ -+ MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm); -+ } - } - --int calc_allreduce_index(int iter, int rank, int op){ -- int tableSize = MDTEST_LAST_NUM; -- return iter * tableSize * size + rank * tableSize + op; -+static mdtest_results_t* get_result_index(mdtest_results_t* all_results, int proc, int iter, int interation_count){ -+ return & all_results[proc * interation_count + iter]; - } - --void summarize_results(int iterations, int print_time) { -- char access[MAX_PATHLEN]; -- int i, j, k; -- int start, stop, tableSize = MDTEST_LAST_NUM; -- double min, max, mean, sd, sum = 0, var = 0, curr = 0; -- -- double all[iterations * size * tableSize]; -+static void summarize_results_rank0(int iterations, mdtest_results_t * all_results, int print_time) { -+ int start, stop; -+ double min, max, mean, sd, sum, var, curr = 0; -+ double imin, imax, imean, isum, icur; // calculation per iteration -+ char const * access; -+ /* if files only access, skip entries 0-3 (the dir tests) */ -+ if (o.files_only && ! o.dirs_only) { -+ start = MDTEST_FILE_CREATE_NUM; -+ } else { -+ start = 0; -+ } - -+ /* if directories only access, skip entries 4-7 (the file tests) */ -+ if (o.dirs_only && !o.files_only) { -+ stop = MDTEST_FILE_CREATE_NUM; -+ } else { -+ stop = MDTEST_TREE_CREATE_NUM; -+ } - -- VERBOSE(1,-1,"Entering summarize_results..." ); -+ /* special case: if no directory or file tests, skip all */ -+ if (!o.dirs_only && !o.files_only) { -+ start = stop = 0; -+ } - -- MPI_Barrier(testComm); -- for(int i=0; i < iterations; i++){ -- if(print_time){ -- MPI_Gather(& summary_table[i].time[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); -- }else{ -- MPI_Gather(& summary_table[i].rate[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); -+ if(o.print_all_proc){ -+ fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate"); -+ for (int j = 0; j < iterations; j++) { -+ fprintf(out_logfile, "iteration: %d\n", j); -+ for (int i = start; i < MDTEST_LAST_NUM; i++) { -+ access = mdtest_test_name(i); -+ if(access == NULL){ -+ continue; -+ } -+ fprintf(out_logfile, "Test %s", access); -+ for (int k=0; k < o.size; k++) { -+ mdtest_results_t * cur = get_result_index(all_results, k, j, iterations); -+ if(print_time){ -+ curr = cur->time_before_barrier[i]; -+ }else{ -+ curr = cur->rate_before_barrier[i]; -+ } -+ fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); -+ } -+ fprintf(out_logfile, "\n"); - } - } -+ } - -- if (rank != 0) { -- return; -- } -+ VERBOSE(0, -1, "\nSUMMARY %s: (of %d iterations)", print_time ? "time" : "rate", iterations); -+ PRINT(" Operation "); -+ if(o.show_perrank_statistics){ -+ PRINT("per Rank: Max Min Mean per Iteration:"); -+ }else{ -+ PRINT(" "); -+ } -+ PRINT(" Max Min Mean Std Dev\n"); -+ PRINT(" --------- "); -+ -+ if(o.show_perrank_statistics){ -+ PRINT(" --- --- ---- "); -+ } -+ PRINT(" --- --- ---- -------\n"); -+ for (int i = start; i < stop; i++) { -+ min = 1e308; -+ max = 0; -+ sum = var = 0; -+ imin = 1e308; -+ isum = imax = 0; -+ double iter_result[iterations]; -+ for (int j = 0; j < iterations; j++) { -+ icur = print_time ? 0 : 1e308; -+ for (int k = 0; k < o.size; k++) { -+ mdtest_results_t * cur = get_result_index(all_results, k, j, iterations); -+ if(print_time){ -+ curr = cur->time_before_barrier[i]; -+ }else{ -+ curr = cur->rate_before_barrier[i]; -+ } -+ if (min > curr) { -+ min = curr; -+ } -+ if (max < curr) { -+ max = curr; -+ } -+ sum += curr; - -- VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); -- VERBOSE(0,-1," Operation Max Min Mean Std Dev"); -- VERBOSE(0,-1," --------- --- --- ---- -------"); -+ if (print_time) { -+ curr = cur->time[i]; -+ if (icur < curr) { -+ icur = curr; -+ } -+ } else { -+ curr = cur->rate[i]; -+ if (icur > curr) { -+ icur = curr; -+ } -+ } -+ } - -- /* if files only access, skip entries 0-3 (the dir tests) */ -- if (files_only && !dirs_only) { -- start = 4; -- } else { -- start = 0; -+ if (icur > imax) { -+ imax = icur; -+ } -+ if (icur < imin) { -+ imin = icur; -+ } -+ isum += icur; -+ if(print_time){ -+ iter_result[j] = icur; -+ }else{ -+ iter_result[j] = icur * o.size; -+ } - } -- -- /* if directories only access, skip entries 4-7 (the file tests) */ -- if (dirs_only && !files_only) { -- stop = 4; -- } else { -- stop = 8; -+ mean = sum / iterations / o.size; -+ imean = isum / iterations; -+ if(! print_time){ -+ imax *= o.size; -+ imin *= o.size; -+ isum *= o.size; -+ imean *= o.size; -+ } -+ for (int j = 0; j < iterations; j++) { -+ var += (imean - iter_result[j]) * (imean - iter_result[j]); -+ } -+ var = var / (iterations - 1); -+ sd = sqrt(var); -+ access = mdtest_test_name(i); -+ if (i != 2) { -+ fprintf(out_logfile, " %-18s ", access); -+ -+ if(o.show_perrank_statistics){ -+ fprintf(out_logfile, "%14.3f ", max); -+ fprintf(out_logfile, "%14.3f ", min); -+ fprintf(out_logfile, "%14.3f ", mean); -+ fprintf(out_logfile, " "); -+ } -+ fprintf(out_logfile, " "); -+ fprintf(out_logfile, "%14.3f ", imax); -+ fprintf(out_logfile, "%14.3f ", imin); -+ fprintf(out_logfile, "%14.3f ", imean); -+ fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd); -+ fflush(out_logfile); - } -+ } - -- /* special case: if no directory or file tests, skip all */ -- if (!dirs_only && !files_only) { -- start = stop = 0; -- } -+ /* calculate tree create/remove rates, applies only to Rank 0 */ -+ for (int i = MDTEST_TREE_CREATE_NUM; i < MDTEST_LAST_NUM; i++) { -+ min = imin = 1e308; -+ max = imax = 0; -+ sum = var = 0; -+ for (int j = 0; j < iterations; j++) { -+ if(print_time){ -+ curr = o.summary_table[j].time[i]; -+ }else{ -+ curr = o.summary_table[j].rate[i]; -+ } -+ if (min > curr) { -+ min = curr; -+ } -+ if (max < curr) { -+ max = curr; -+ } -+ sum += curr; -+ if(curr > imax){ -+ imax = curr; -+ } -+ if(curr < imin){ -+ imin = curr; -+ } -+ } - -- for (i = start; i < stop; i++) { -- min = max = all[i]; -- for (k=0; k < size; k++) { -- for (j = 0; j < iterations; j++) { -- curr = all[calc_allreduce_index(j, k, i)]; -- if (min > curr) { -- min = curr; -- } -- if (max < curr) { -- max = curr; -- } -- sum += curr; -- } -- } -- mean = sum / (iterations * size); -- for (k=0; krate[i]; -+ double t = cur->time[i]; -+ max_time = max_time < t ? t : max_time; -+ -+ sum_items += cur->items[i]; -+ -+ t = cur->stonewall_time[i]; -+ max_stonewall_time = max_stonewall_time < t ? t : max_stonewall_time; -+ } -+ -+ results[j].items[i] = sum_items; -+ results[j].time[i] = max_time; -+ results[j].stonewall_time[i] = max_stonewall_time; -+ if(sum_items == 0){ -+ results[j].rate[i] = 0.0; -+ }else{ -+ results[j].rate[i] = sum_items / max_time; -+ } -+ -+ /* These results have already been reduced to Rank 0 */ -+ results[j].stonewall_item_sum[i] = o.summary_table[j].stonewall_item_sum[i]; -+ results[j].stonewall_item_min[i] = o.summary_table[j].stonewall_item_min[i]; -+ results[j].stonewall_time[i] = o.summary_table[j].stonewall_time[i]; - } - } -- if(stonewall_items != 0){ -- fprintf(out_logfile, " File create (stonewall) : "); -- fprintf(out_logfile, "%14s %14s %14.3f %14s\n", "NA", "NA", print_time ? stonewall_time : stonewall_items / stonewall_time, "NA"); -+ }else{ -+ MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm); -+ } -+ -+ /* share global results across processes as these are returned by the API */ -+ MPI_Bcast(results, size / sizeof(double), MPI_DOUBLE, 0, testComm); -+ -+ /* update relevant result values with local values as these are returned by the API */ -+ for(int j=0; j < iterations; j++){ -+ for(int i=0; i < MDTEST_LAST_NUM; i++){ -+ results[j].time_before_barrier[i] = o.summary_table[j].time_before_barrier[i]; -+ results[j].stonewall_last_item[i] = o.summary_table[j].stonewall_last_item[i]; - } -+ } - -- /* calculate tree create/remove rates */ -- for (i = 8; i < tableSize; i++) { -- min = max = all[i]; -- for (j = 0; j < iterations; j++) { -- if(print_time){ -- curr = summary_table[j].time[i]; -- }else{ -- curr = summary_table[j].rate[i]; -- } -+ if(rank != 0){ -+ return; -+ } - -- if (min > curr) { -- min = curr; -- } -- if (max < curr) { -- max = curr; -- } -- sum += curr; -- } -- mean = sum / (iterations); -- for (j = 0; j < iterations; j++) { -- if(print_time){ -- curr = summary_table[j].time[i]; -- }else{ -- curr = summary_table[j].rate[i]; -- } -+ if (o.print_rate_and_time){ -+ summarize_results_rank0(iterations, all_results, 0); -+ summarize_results_rank0(iterations, all_results, 1); -+ }else{ -+ summarize_results_rank0(iterations, all_results, o.print_time); -+ } - -- var += pow((mean - curr), 2); -- } -- var = var / (iterations); -- sd = sqrt(var); -- switch (i) { -- case 8: strcpy(access, "Tree creation :"); break; -- case 9: strcpy(access, "Tree removal :"); break; -- default: strcpy(access, "ERR"); break; -- } -- fprintf(out_logfile, " %s ", access); -- fprintf(out_logfile, "%14.3f ", max); -- fprintf(out_logfile, "%14.3f ", min); -- fprintf(out_logfile, "%14.3f ", mean); -- fprintf(out_logfile, "%14.3f\n", sd); -- fflush(out_logfile); -- sum = var = 0; -- } -+ free(all_results); - } - - /* Checks to see if the test setup is valid. If it isn't, fail. */ --void valid_tests() { -+void md_validate_tests() { - -- if (((stone_wall_timer_seconds > 0) && (branch_factor > 1)) || ! barriers) { -- FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", branch_factor); -+ if (((o.stone_wall_timer_seconds > 0) && (o.branch_factor > 1)) || ! o.barriers) { -+ FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", o.branch_factor); - } - -- if (!create_only && !stat_only && !read_only && !remove_only) { -- create_only = stat_only = read_only = remove_only = 1; -+ if (!o.create_only && ! o.stat_only && ! o.read_only && !o.remove_only && !o.rename_dirs) { -+ o.create_only = o.stat_only = o.read_only = o.remove_only = o.rename_dirs = 1; - VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" ); - } - -- VERBOSE(1,-1,"Entering valid_tests..." ); -+ VERBOSE(1,-1,"Entering md_validate_tests..." ); - - /* if dirs_only and files_only were both left unset, set both now */ -- if (!dirs_only && !files_only) { -- dirs_only = files_only = 1; -+ if (!o.dirs_only && !o.files_only) { -+ o.dirs_only = o.files_only = 1; - } - - /* if shared file 'S' access, no directory tests */ -- if (shared_file) { -- dirs_only = 0; -+ if (o.shared_file) { -+ o.dirs_only = 0; - } - - /* check for no barriers with shifting processes for different phases. -@@ -1415,63 +1726,95 @@ void valid_tests() { - race conditions that may cause errors stat'ing or deleting after - creates. - */ -- if (( barriers == 0 ) && ( nstride != 0 ) && ( rank == 0 )) { -+ if (( o.barriers == 0 ) && ( o.nstride != 0 ) && ( rank == 0 )) { - FAIL( "Possible race conditions will occur: -B not compatible with -N"); - } - - /* check for collective_creates incompatibilities */ -- if (shared_file && collective_creates && rank == 0) { -+ if (o.shared_file && o.collective_creates && rank == 0) { - FAIL("-c not compatible with -S"); - } -- if (path_count > 1 && collective_creates && rank == 0) { -+ if (o.path_count > 1 && o.collective_creates && rank == 0) { - FAIL("-c not compatible with multiple test directories"); - } -- if (collective_creates && !barriers) { -+ if (o.collective_creates && !o.barriers) { - FAIL("-c not compatible with -B"); - } - - /* check for shared file incompatibilities */ -- if (unique_dir_per_task && shared_file && rank == 0) { -+ if (o.unique_dir_per_task && o.shared_file && rank == 0) { - FAIL("-u not compatible with -S"); - } - - /* check multiple directory paths and strided option */ -- if (path_count > 1 && nstride > 0) { -+ if (o.path_count > 1 && o.nstride > 0) { - FAIL("cannot have multiple directory paths with -N strides between neighbor tasks"); - } - - /* check for shared directory and multiple directories incompatibility */ -- if (path_count > 1 && unique_dir_per_task != 1) { -+ if (o.path_count > 1 && o.unique_dir_per_task != 1) { - FAIL("shared directory mode is not compatible with multiple directory paths"); - } - - /* check if more directory paths than ranks */ -- if (path_count > size) { -+ if (o.path_count > o.size) { - FAIL("cannot have more directory paths than MPI tasks"); - } - - /* check depth */ -- if (depth < 0) { -+ if (o.depth < 0) { - FAIL("depth must be greater than or equal to zero"); - } - /* check branch_factor */ -- if (branch_factor < 1 && depth > 0) { -+ if (o.branch_factor < 1 && o.depth > 0) { - FAIL("branch factor must be greater than or equal to zero"); - } - /* check for valid number of items */ -- if ((items > 0) && (items_per_dir > 0)) { -- if(unique_dir_per_task){ -+ if ((o.items > 0) && (o.items_per_dir > 0)) { -+ if(o.unique_dir_per_task){ - FAIL("only specify the number of items or the number of items per directory"); -- }else if( items % items_per_dir != 0){ -+ }else if( o.items % o.items_per_dir != 0){ - FAIL("items must be a multiple of items per directory"); -- }else if( stone_wall_timer_seconds != 0){ -- FAIL("items + items_per_dir can only be set without stonewalling"); - } - } - /* check for using mknod */ -- if (write_bytes > 0 && make_node) { -+ if (o.write_bytes > 0 && o.make_node) { - FAIL("-k not compatible with -w"); - } -+ -+ if(o.verify_read && ! o.read_only) -+ FAIL("Verify read requires that the read test is used"); -+ -+ if(o.verify_read && o.read_bytes <= 0) -+ FAIL("Verify read requires that read bytes is > 0"); -+ -+ if(o.read_only && o.read_bytes <= 0) -+ WARN("Read bytes is 0, thus, a read test will actually just open/close"); -+ -+ if(o.create_only && o.read_only && o.read_bytes > o.write_bytes) -+ FAIL("When writing and reading files, read bytes must be smaller than write bytes"); -+ -+ if (rank == 0 && o.saveRankDetailsCSV){ -+ // check that the file is writeable, truncate it and add header -+ FILE* fd = fopen(o.saveRankDetailsCSV, "w"); -+ if (fd == NULL){ -+ FAIL("Cannot open saveRankPerformanceDetails file for write!"); -+ } -+ char * head = "rank,items"; -+ int ret = fwrite(head, strlen(head), 1, fd); -+ for(int e = 0; e < MDTEST_LAST_NUM; e++){ -+ char buf[1024]; -+ const char * str = mdtest_test_name(e); -+ -+ sprintf(buf, ",rate-%s,time-%s", str, str); -+ ret = fwrite(buf, strlen(buf), 1, fd); -+ if(ret != 1){ -+ FAIL("Cannot write header to saveRankPerformanceDetails file"); -+ } -+ } -+ fwrite("\n", 1, 1, fd); -+ fclose(fd); -+ } - } - - void show_file_system_size(char *file_system) { -@@ -1492,7 +1835,7 @@ void show_file_system_size(char *file_system) { - - VERBOSE(1,-1,"Entering show_file_system_size on %s", file_system ); - -- ret = backend->statfs (file_system, &stat_buf, ¶m); -+ ret = o.backend->statfs (file_system, &stat_buf, o.backend_options); - if (0 != ret) { - FAIL("unable to stat file system %s", file_system); - } -@@ -1530,42 +1873,6 @@ void show_file_system_size(char *file_system) { - return; - } - --void display_freespace(char *testdirpath) --{ -- char dirpath[MAX_PATHLEN] = {0}; -- int i; -- int directoryFound = 0; -- -- -- VERBOSE(3,5,"Entering display_freespace on %s...", testdirpath ); -- -- strcpy(dirpath, testdirpath); -- -- /* get directory for outfile */ -- i = strlen(dirpath); -- while (i-- > 0) { -- if (dirpath[i] == '/') { -- dirpath[i] = '\0'; -- directoryFound = 1; -- break; -- } -- } -- -- /* if no directory/, use '.' */ -- if (directoryFound == 0) { -- strcpy(dirpath, "."); -- } -- -- if (param.api && strcasecmp(param.api, "DFS") == 0) -- return; -- -- VERBOSE(3,5,"Before show_file_system_size, dirpath is '%s'", dirpath ); -- show_file_system_size(dirpath); -- VERBOSE(3,5, "After show_file_system_size, dirpath is '%s'\n", dirpath ); -- -- return; --} -- - void create_remove_directory_tree(int create, - int currDepth, char* path, int dirNum, rank_progress_t * progress) { - -@@ -1576,16 +1883,16 @@ void create_remove_directory_tree(int create, - VERBOSE(1,5,"Entering create_remove_directory_tree on %s, currDepth = %d...", path, currDepth ); - - if (currDepth == 0) { -- sprintf(dir, "%s/%s.%d/", path, base_tree_name, dirNum); -+ sprintf(dir, "%s/%s.%d/", path, o.base_tree_name, dirNum); - - if (create) { - VERBOSE(2,5,"Making directory '%s'", dir); -- if (-1 == backend->mkdir (dir, DIRMODE, ¶m)) { -- fprintf(out_logfile, "error could not create directory '%s'\n", dir); -+ if (-1 == o.backend->mkdir (dir, DIRMODE, o.backend_options)) { -+ WARNF("unable to create tree directory '%s'", dir); - } - #ifdef HAVE_LUSTRE_LUSTREAPI - /* internal node for branching, can be non-striped for children */ -- if (global_dir_layout && \ -+ if (o.global_dir_layout && \ - llapi_dir_set_default_lmv_stripe(dir, -1, 0, - LMV_HASH_TYPE_FNV_1A_64, - NULL) == -1) { -@@ -1598,35 +1905,35 @@ void create_remove_directory_tree(int create, - - if (!create) { - VERBOSE(2,5,"Remove directory '%s'", dir); -- if (-1 == backend->rmdir(dir, ¶m)) { -- FAIL("Unable to remove directory %s", dir); -+ if (-1 == o.backend->rmdir(dir, o.backend_options)) { -+ WARNF("Unable to remove directory %s", dir); - } - } -- } else if (currDepth <= depth) { -+ } else if (currDepth <= o.depth) { - - char temp_path[MAX_PATHLEN]; - strcpy(temp_path, path); - int currDir = dirNum; - -- for (i=0; imkdir(temp_path, DIRMODE, ¶m)) { -- FAIL("Unable to create directory %s", temp_path); -+ if (-1 == o.backend->mkdir(temp_path, DIRMODE, o.backend_options)) { -+ WARNF("Unable to create directory %s", temp_path); - } - } - - create_remove_directory_tree(create, ++currDepth, -- temp_path, (branch_factor*currDir)+1, progress); -+ temp_path, (o.branch_factor*currDir)+1, progress); - currDepth--; - - if (!create) { - VERBOSE(2,5,"Remove directory '%s'", temp_path); -- if (-1 == backend->rmdir(temp_path, ¶m)) { -- FAIL("Unable to remove directory %s", temp_path); -+ if (-1 == o.backend->rmdir(temp_path, o.backend_options)) { -+ WARNF("Unable to remove directory %s", temp_path); - } - } - -@@ -1636,12 +1943,11 @@ void create_remove_directory_tree(int create, - } - } - --static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t * summary_table){ -+static void mdtest_iteration(int i, int j, mdtest_results_t * summary_table){ - rank_progress_t progress_o; - memset(& progress_o, 0 , sizeof(progress_o)); -- progress_o.start_time = GetTimeStamp(); -- progress_o.stone_wall_timer_seconds = stone_wall_timer_seconds; -- progress_o.items_per_dir = items_per_dir; -+ progress_o.stone_wall_timer_seconds = 0; -+ progress_o.items_per_dir = o.items_per_dir; - rank_progress_t * progress = & progress_o; - - /* start and end times of directory tree create/remove */ -@@ -1650,255 +1956,229 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t - - VERBOSE(1,-1,"main: * iteration %d *", j+1); - -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -- prep_testdir(j, dir_iter); -+ if(o.create_only){ -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ -+ if (rank >= o.path_count) { -+ continue; -+ } -+ prep_testdir(j, dir_iter); - -- VERBOSE(2,5,"main (for j loop): making testdir, '%s'", testdir ); -- if ((rank < path_count) && backend->access(testdir, F_OK, ¶m) != 0) { -- if (backend->mkdir(testdir, DIRMODE, ¶m) != 0) { -- FAIL("Unable to create test directory %s", testdir); -- } -+ VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir ); -+ if (o.backend->access(o.testdir, F_OK, o.backend_options) != 0) { -+ if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) { -+ WARNF("Unable to create test directory %s", o.testdir); -+ } - #ifdef HAVE_LUSTRE_LUSTREAPI -- /* internal node for branching, can be non-striped for children */ -- if (global_dir_layout && unique_dir_per_task && llapi_dir_set_default_lmv_stripe(testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { -- FAIL("Unable to reset to global default directory layout"); -- } -+ /* internal node for branching, can be non-striped for children */ -+ if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { -+ WARN("Unable to reset to global default directory layout"); -+ } - #endif /* HAVE_LUSTRE_LUSTREAPI */ -+ } - } -- } - -- if (create_only) { -- /* create hierarchical directory structure */ -- MPI_Barrier(testComm); -+ /* create hierarchical directory structure */ -+ MPI_Barrier(testComm); - -- startCreate = GetTimeStamp(); -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -- prep_testdir(j, dir_iter); -+ startCreate = GetTimeStamp(); -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ -+ prep_testdir(j, dir_iter); - -- if (unique_dir_per_task) { -- if (collective_creates && (rank == 0)) { -- /* -- * This is inside two loops, one of which already uses "i" and the other uses "j". -- * I don't know how this ever worked. I'm changing this loop to use "k". -- */ -- for (k=0; krate[8] = -- num_dirs_in_tree / (endCreate - startCreate); -- summary_table->time[8] = (endCreate - startCreate); -- summary_table->items[8] = num_dirs_in_tree; -- summary_table->stonewall_last_item[8] = num_dirs_in_tree; -- VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[8]); -+ } -+ MPI_Barrier(testComm); -+ endCreate = GetTimeStamp(); -+ summary_table->rate[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate); -+ summary_table->time[MDTEST_TREE_CREATE_NUM] = (endCreate - startCreate); -+ summary_table->items[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree; -+ summary_table->stonewall_last_item[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree; -+ VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_CREATE_NUM]); - } -- sprintf(unique_mk_dir, "%s.0", base_tree_name); -- sprintf(unique_chdir_dir, "%s.0", base_tree_name); -- sprintf(unique_stat_dir, "%s.0", base_tree_name); -- sprintf(unique_read_dir, "%s.0", base_tree_name); -- sprintf(unique_rm_dir, "%s.0", base_tree_name); -- unique_rm_uni_dir[0] = 0; -- -- if (!unique_dir_per_task) { -- VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", unique_mk_dir ); -+ -+ sprintf(o.unique_mk_dir, "%s.0", o.base_tree_name); -+ sprintf(o.unique_chdir_dir, "%s.0", o.base_tree_name); -+ sprintf(o.unique_stat_dir, "%s.0", o.base_tree_name); -+ sprintf(o.unique_read_dir, "%s.0", o.base_tree_name); -+ sprintf(o.unique_rm_dir, "%s.0", o.base_tree_name); -+ o.unique_rm_uni_dir[0] = 0; -+ -+ if (! o.unique_dir_per_task) { -+ VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", o.unique_mk_dir ); - } - - if (rank < i) { -- if (!shared_file) { -- sprintf(mk_name, "mdtest.%d.", (rank+(0*nstride))%i); -- sprintf(stat_name, "mdtest.%d.", (rank+(1*nstride))%i); -- sprintf(read_name, "mdtest.%d.", (rank+(2*nstride))%i); -- sprintf(rm_name, "mdtest.%d.", (rank+(3*nstride))%i); -+ if (! o.shared_file) { -+ sprintf(o.mk_name, "mdtest.%d.", (rank+(0*o.nstride))%i); -+ sprintf(o.stat_name, "mdtest.%d.", (rank+(1*o.nstride))%i); -+ sprintf(o.read_name, "mdtest.%d.", (rank+(2*o.nstride))%i); -+ sprintf(o.rm_name, "mdtest.%d.", (rank+(3*o.nstride))%i); - } -- if (unique_dir_per_task) { -- VERBOSE(3,5,"i %d nstride %d", i, nstride); -- sprintf(unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*nstride))%i); -- sprintf(unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*nstride))%i); -- sprintf(unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*nstride))%i); -- sprintf(unique_read_dir, "mdtest_tree.%d.0", (rank+(3*nstride))%i); -- sprintf(unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*nstride))%i); -- unique_rm_uni_dir[0] = 0; -- VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", unique_mk_dir,unique_chdir_dir,unique_stat_dir,unique_read_dir,unique_rm_dir); -+ if (o.unique_dir_per_task) { -+ VERBOSE(3,5,"i %d nstride %d", i, o.nstride); -+ sprintf(o.unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*o.nstride))%i); -+ sprintf(o.unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*o.nstride))%i); -+ sprintf(o.unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*o.nstride))%i); -+ sprintf(o.unique_read_dir, "mdtest_tree.%d.0", (rank+(3*o.nstride))%i); -+ sprintf(o.unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*o.nstride))%i); -+ o.unique_rm_uni_dir[0] = 0; -+ VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", o.unique_mk_dir, o.unique_chdir_dir, o.unique_stat_dir, o.unique_read_dir, o.unique_rm_dir); - } - -- VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", unique_mk_dir ); -+ VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", o.unique_mk_dir ); - -- if (dirs_only && !shared_file) { -- if (pre_delay) { -- DelaySecs(pre_delay); -+ if (o.dirs_only && ! o.shared_file) { -+ if (o.pre_delay) { -+ DelaySecs(o.pre_delay); - } -- directory_test(j, i, unique_mk_dir, progress); -+ directory_test(j, i, o.unique_mk_dir, progress); - } -- if (files_only) { -- if (pre_delay) { -- DelaySecs(pre_delay); -+ if (o.files_only) { -+ if (o.pre_delay) { -+ DelaySecs(o.pre_delay); - } -- VERBOSE(3,5,"will file_test on %s", unique_mk_dir); -- file_test(j, i, unique_mk_dir, progress); -+ VERBOSE(3,5,"will file_test on %s", o.unique_mk_dir); -+ -+ file_test(j, i, o.unique_mk_dir, progress); - } - } - - /* remove directory structure */ -- if (!unique_dir_per_task) { -- VERBOSE(3,-1,"main: Using testdir, '%s'", testdir ); -+ if (! o.unique_dir_per_task) { -+ VERBOSE(3,-1,"main: Using o.testdir, '%s'", o.testdir ); - } - - MPI_Barrier(testComm); -- if (remove_only) { -+ if (o.remove_only) { - progress->items_start = 0; - startCreate = GetTimeStamp(); -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(j, dir_iter); -- if (unique_dir_per_task) { -- if (collective_creates && (rank == 0)) { -+ if (o.unique_dir_per_task) { -+ if (o.collective_creates && (rank == 0)) { - /* - * This is inside two loops, one of which already uses "i" and the other uses "j". - * I don't know how this ever worked. I'm changing this loop to use "k". - */ -- for (k=0; krate[9] = num_dirs_in_tree / (endCreate - startCreate); -- summary_table->time[9] = endCreate - startCreate; -- summary_table->items[9] = num_dirs_in_tree; -- summary_table->stonewall_last_item[8] = num_dirs_in_tree; -- VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[9]); -- VERBOSE(2,-1,"main (at end of for j loop): Removing testdir of '%s'\n", testdir ); -- -- for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ -+ summary_table->rate[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate); -+ summary_table->time[MDTEST_TREE_REMOVE_NUM] = endCreate - startCreate; -+ summary_table->items[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree; -+ summary_table->stonewall_last_item[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree; -+ VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_REMOVE_NUM]); -+ VERBOSE(2,-1,"main (at end of for j loop): Removing o.testdir of '%s'\n", o.testdir ); -+ -+ for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(j, dir_iter); -- if ((rank < path_count) && backend->access(testdir, F_OK, ¶m) == 0) { -- //if (( rank == 0 ) && access(testdir, F_OK) == 0) { -- if (backend->rmdir(testdir, ¶m) == -1) { -- FAIL("unable to remove directory %s", testdir); -+ if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) == 0) { -+ //if (( rank == 0 ) && access(o.testdir, F_OK) == 0) { -+ if (o.backend->rmdir(o.testdir, o.backend_options) == -1) { -+ WARNF("unable to remove directory %s", o.testdir); - } - } - } - } else { -- summary_table->rate[9] = 0; -+ summary_table->rate[MDTEST_TREE_REMOVE_NUM] = 0; - } - } - - void mdtest_init_args(){ -- barriers = 1; -- branch_factor = 1; -- throttle = 1; -- stoneWallingStatusFile = NULL; -- create_only = 0; -- stat_only = 0; -- read_only = 0; -- verify_read = 0; -- verification_error = 0; -- remove_only = 0; -- leaf_only = 0; -- depth = 0; -- num_dirs_in_tree = 0; -- items_per_dir = 0; -- random_seed = 0; -- print_time = 0; -- print_rate_and_time = 0; -- shared_file = 0; -- files_only = 0; -- dirs_only = 0; -- pre_delay = 0; -- unique_dir_per_task = 0; -- time_unique_dir_overhead = 0; -- items = 0; -- num_dirs_in_tree_calc = 0; -- collective_creates = 0; -- write_bytes = 0; -- stone_wall_timer_seconds = 0; -- read_bytes = 0; -- sync_file = 0; -- call_sync = 0; -- path_count = 0; -- nstride = 0; -- make_node = 0; --#ifdef HAVE_LUSTRE_LUSTREAPI -- global_dir_layout = 0; --#endif /* HAVE_LUSTRE_LUSTREAPI */ -+ o = (mdtest_options_t) { -+ .barriers = 1, -+ .branch_factor = 1, -+ .random_buffer_offset = -1, -+ .prologue = "", -+ .epilogue = "", -+ }; - } - - mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) { - testComm = world_com; - out_logfile = world_out; -- mpi_comm_world = world_com; -+ out_resultfile = world_out; - -- init_clock(); -+ init_clock(world_com); - - mdtest_init_args(); - int i, j; - int numNodes; - int numTasksOnNode0 = 0; -- MPI_Group worldgroup, testgroup; -+ MPI_Group worldgroup; - struct { - int first; - int last; -@@ -1908,6 +2188,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * - int last = 0; - int stride = 1; - int iterations = 1; -+ int created_root_dir = 0; // was the root directory existing or newly created - - verbose = 0; - int no_barriers = 0; -@@ -1918,67 +2199,94 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * - aiori_supported_apis(APIs, APIs_legacy, MDTEST); - char apiStr[1024]; - sprintf(apiStr, "API for I/O [%s]", APIs); -+ memset(& o.hints, 0, sizeof(o.hints)); -+ -+ char * packetType = "t"; - - option_help options [] = { -- {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & param.api}, -- {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & branch_factor}, -- {'d', NULL, "the directory in which the tests will run", OPTION_OPTIONAL_ARGUMENT, 's', & path}, -+ {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & o.api}, -+ {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.branch_factor}, -+ {'d', NULL, "directory or multiple directories where the test will run [dir|dir1@dir2@dir3...]", OPTION_OPTIONAL_ARGUMENT, 's', & path}, - {'B', NULL, "no barriers between phases", OPTION_OPTIONAL_ARGUMENT, 'd', & no_barriers}, -- {'C', NULL, "only create files/dirs", OPTION_FLAG, 'd', & create_only}, -- {'T', NULL, "only stat files/dirs", OPTION_FLAG, 'd', & stat_only}, -- {'E', NULL, "only read files/dir", OPTION_FLAG, 'd', & read_only}, -- {'r', NULL, "only remove files or directories left behind by previous runs", OPTION_FLAG, 'd', & remove_only}, -- {'D', NULL, "perform test on directories only (no files)", OPTION_FLAG, 'd', & dirs_only}, -- {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & read_bytes}, -+ {'C', NULL, "only create files/dirs", OPTION_FLAG, 'd', & o.create_only}, -+ {'T', NULL, "only stat files/dirs", OPTION_FLAG, 'd', & o.stat_only}, -+ {'E', NULL, "only read files/dir", OPTION_FLAG, 'd', & o.read_only}, -+ {'r', NULL, "only remove files or directories left behind by previous runs", OPTION_FLAG, 'd', & o.remove_only}, -+ {'D', NULL, "perform test on directories only (no files)", OPTION_FLAG, 'd', & o.dirs_only}, -+ {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & o.read_bytes}, - {'f', NULL, "first number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & first}, -- {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & files_only}, -+ {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & o.files_only}, - #ifdef HAVE_LUSTRE_LUSTREAPI -- {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & global_dir_layout}, -+ {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & o.global_dir_layout}, - #endif /* HAVE_LUSTRE_LUSTREAPI */ -+ {'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset}, - {'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations}, -- {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & items_per_dir}, -- {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & make_node}, -+ {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items_per_dir}, -+ {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & o.make_node}, - {'l', NULL, "last number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & last}, -- {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & leaf_only}, -- {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & items}, -- {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & nstride}, -- {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & pre_delay}, -- {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & print_rate_and_time}, -+ {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & o.leaf_only}, -+ {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items}, -+ {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.nstride}, -+ {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.pre_delay}, -+ {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & o.print_rate_and_time}, -+ {0, "print-all-procs", "all processes print an excerpt of their results", OPTION_FLAG, 'd', & o.print_all_proc}, - {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize}, -- {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & random_seed}, -+ {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed}, - {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride}, -- {'S', NULL, "shared file access (file only, no directories)", OPTION_FLAG, 'd', & shared_file}, -- {'c', NULL, "collective creates: task 0 does all creates", OPTION_FLAG, 'd', & collective_creates}, -- {'t', NULL, "time unique working directory overhead", OPTION_FLAG, 'd', & time_unique_dir_overhead}, -- {'u', NULL, "unique working directory for each task", OPTION_FLAG, 'd', & unique_dir_per_task}, -+ {'S', NULL, "shared file access (file only, no directories)", OPTION_FLAG, 'd', & o.shared_file}, -+ {'c', NULL, "collective creates: task 0 does all creates", OPTION_FLAG, 'd', & o.collective_creates}, -+ {'t', NULL, "time unique working directory overhead", OPTION_FLAG, 'd', & o.time_unique_dir_overhead}, -+ {'u', NULL, "unique working directory for each task", OPTION_FLAG, 'd', & o.unique_dir_per_task}, - {'v', NULL, "verbosity (each instance of option increments by one)", OPTION_FLAG, 'd', & verbose}, - {'V', NULL, "verbosity value", OPTION_OPTIONAL_ARGUMENT, 'd', & verbose}, -- {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & write_bytes}, -- {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase)", OPTION_OPTIONAL_ARGUMENT, 'd', & stone_wall_timer_seconds}, -- {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, -- {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & verify_read}, -- {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & sync_file}, -- {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, -- {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, -- {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, -+ {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & o.write_bytes}, -+ {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase and files)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stone_wall_timer_seconds}, -+ {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & o.stoneWallingStatusFile}, -+ {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & o.verify_read}, -+ {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & o.verify_write}, -+ {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & o.sync_file}, -+ {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync}, -+ {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, -+ {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, -+ {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|random|o|i|t|r]", OPTION_OPTIONAL_ARGUMENT, 's', & packetType}, -+ {0, "run-cmd-before-phase", "call this external command before each phase (excluded from the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.prologue}, -+ {0, "run-cmd-after-phase", "call this external command after each phase (included in the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.epilogue}, -+ {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, -+ {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, -+ {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV}, -+ {0, "showRankStatistics", "Include statistics per rank", OPTION_FLAG, 'd', & o.show_perrank_statistics}, -+ - LAST_OPTION - }; - options_all_t * global_options = airoi_create_all_module_options(options); - option_parse(argc, argv, global_options); -- updateParsedOptions(& param, global_options); -+ o.backend = aiori_select(o.api); -+ if (o.backend == NULL) -+ ERR("Unrecognized I/O API"); -+ if (! o.backend->enable_mdtest) -+ ERR("Backend doesn't support MDTest"); -+ o.backend_options = airoi_update_module_options(o.backend, global_options); - - free(global_options->modules); - free(global_options); -- backend = param.backend; -+ -+ o.dataPacketType = parsePacketType(packetType[0]); - - MPI_Comm_rank(testComm, &rank); -- MPI_Comm_size(testComm, &size); -+ MPI_Comm_size(testComm, &o.size); - -- if (backend->initialize) -- backend->initialize(); -+ if(o.backend->xfer_hints){ -+ o.backend->xfer_hints(& o.hints); -+ } -+ if(o.backend->check_params){ -+ o.backend->check_params(o.backend_options); -+ } -+ if (o.backend->initialize){ -+ o.backend->initialize(o.backend_options); -+ } - -- pid = getpid(); -- uid = getuid(); -+ o.pid = getpid(); -+ o.uid = getuid(); - - numNodes = GetNumNodes(testComm); - numTasksOnNode0 = GetNumTasksOnNode0(testComm); -@@ -1990,118 +2298,122 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * - } - - VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp()); -- VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, numNodes); -+ VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, o.size, numNodes); - VERBOSE(0,-1,"Command line used: %s", cmd_buffer); - - /* adjust special variables */ -- barriers = ! no_barriers; -+ o.barriers = ! no_barriers; - if (path != NULL){ - parse_dirpath(path); - } - if( randomize > 0 ){ -- if (random_seed == 0) { -+ if (o.random_seed == 0) { - /* Ensure all procs have the same random number */ -- random_seed = time(NULL); -+ o.random_seed = time(NULL); - MPI_Barrier(testComm); -- MPI_Bcast(&random_seed, 1, MPI_INT, 0, testComm); -+ MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, testComm); - } -- random_seed += rank; -+ o.random_seed += rank; -+ } -+ if( o.random_buffer_offset == -1 ){ -+ o.random_buffer_offset = time(NULL); -+ MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, testComm); - } -- if ((items > 0) && (items_per_dir > 0) && (! unique_dir_per_task)) { -- directory_loops = items / items_per_dir; -+ if ((o.items > 0) && (o.items_per_dir > 0) && (! o.unique_dir_per_task)) { -+ o.directory_loops = o.items / o.items_per_dir; - }else{ -- directory_loops = 1; -+ o.directory_loops = 1; - } -- valid_tests(); -+ md_validate_tests(); - - // option_print_current(options); -- VERBOSE(1,-1, "api : %s", param.api); -- VERBOSE(1,-1, "barriers : %s", ( barriers ? "True" : "False" )); -- VERBOSE(1,-1, "collective_creates : %s", ( collective_creates ? "True" : "False" )); -- VERBOSE(1,-1, "create_only : %s", ( create_only ? "True" : "False" )); -+ VERBOSE(1,-1, "api : %s", o.api); -+ VERBOSE(1,-1, "barriers : %s", ( o.barriers ? "True" : "False" )); -+ VERBOSE(1,-1, "collective_creates : %s", ( o.collective_creates ? "True" : "False" )); -+ VERBOSE(1,-1, "create_only : %s", ( o.create_only ? "True" : "False" )); - VERBOSE(1,-1, "dirpath(s):" ); -- for ( i = 0; i < path_count; i++ ) { -- VERBOSE(1,-1, "\t%s", filenames[i] ); -+ for ( i = 0; i < o.path_count; i++ ) { -+ VERBOSE(1,-1, "\t%s", o.filenames[i] ); - } -- VERBOSE(1,-1, "dirs_only : %s", ( dirs_only ? "True" : "False" )); -- VERBOSE(1,-1, "read_bytes : "LLU"", read_bytes ); -- VERBOSE(1,-1, "read_only : %s", ( read_only ? "True" : "False" )); -+ VERBOSE(1,-1, "dirs_only : %s", ( o.dirs_only ? "True" : "False" )); -+ VERBOSE(1,-1, "read_bytes : "LLU"", o.read_bytes ); -+ VERBOSE(1,-1, "read_only : %s", ( o.read_only ? "True" : "False" )); - VERBOSE(1,-1, "first : %d", first ); -- VERBOSE(1,-1, "files_only : %s", ( files_only ? "True" : "False" )); -+ VERBOSE(1,-1, "files_only : %s", ( o.files_only ? "True" : "False" )); - #ifdef HAVE_LUSTRE_LUSTREAPI -- VERBOSE(1,-1, "global_dir_layout : %s", ( global_dir_layout ? "True" : "False" )); -+ VERBOSE(1,-1, "global_dir_layout : %s", ( o.global_dir_layout ? "True" : "False" )); - #endif /* HAVE_LUSTRE_LUSTREAPI */ - VERBOSE(1,-1, "iterations : %d", iterations ); -- VERBOSE(1,-1, "items_per_dir : "LLU"", items_per_dir ); -+ VERBOSE(1,-1, "items_per_dir : "LLU"", o.items_per_dir ); - VERBOSE(1,-1, "last : %d", last ); -- VERBOSE(1,-1, "leaf_only : %s", ( leaf_only ? "True" : "False" )); -- VERBOSE(1,-1, "items : "LLU"", items ); -- VERBOSE(1,-1, "nstride : %d", nstride ); -- VERBOSE(1,-1, "pre_delay : %d", pre_delay ); -- VERBOSE(1,-1, "remove_only : %s", ( leaf_only ? "True" : "False" )); -- VERBOSE(1,-1, "random_seed : %d", random_seed ); -+ VERBOSE(1,-1, "leaf_only : %s", ( o.leaf_only ? "True" : "False" )); -+ VERBOSE(1,-1, "items : "LLU"", o.items ); -+ VERBOSE(1,-1, "nstride : %d", o.nstride ); -+ VERBOSE(1,-1, "pre_delay : %d", o.pre_delay ); -+ VERBOSE(1,-1, "remove_only : %s", ( o.leaf_only ? "True" : "False" )); -+ VERBOSE(1,-1, "random_seed : %d", o.random_seed ); - VERBOSE(1,-1, "stride : %d", stride ); -- VERBOSE(1,-1, "shared_file : %s", ( shared_file ? "True" : "False" )); -- VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( time_unique_dir_overhead ? "True" : "False" )); -- VERBOSE(1,-1, "stone_wall_timer_seconds: %d", stone_wall_timer_seconds); -- VERBOSE(1,-1, "stat_only : %s", ( stat_only ? "True" : "False" )); -- VERBOSE(1,-1, "unique_dir_per_task : %s", ( unique_dir_per_task ? "True" : "False" )); -- VERBOSE(1,-1, "write_bytes : "LLU"", write_bytes ); -- VERBOSE(1,-1, "sync_file : %s", ( sync_file ? "True" : "False" )); -- VERBOSE(1,-1, "call_sync : %s", ( call_sync ? "True" : "False" )); -- VERBOSE(1,-1, "depth : %d", depth ); -- VERBOSE(1,-1, "make_node : %d", make_node ); -+ VERBOSE(1,-1, "shared_file : %s", ( o.shared_file ? "True" : "False" )); -+ VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( o.time_unique_dir_overhead ? "True" : "False" )); -+ VERBOSE(1,-1, "stone_wall_timer_seconds: %d", o.stone_wall_timer_seconds); -+ VERBOSE(1,-1, "stat_only : %s", ( o.stat_only ? "True" : "False" )); -+ VERBOSE(1,-1, "unique_dir_per_task : %s", ( o.unique_dir_per_task ? "True" : "False" )); -+ VERBOSE(1,-1, "write_bytes : "LLU"", o.write_bytes ); -+ VERBOSE(1,-1, "sync_file : %s", ( o.sync_file ? "True" : "False" )); -+ VERBOSE(1,-1, "call_sync : %s", ( o.call_sync ? "True" : "False" )); -+ VERBOSE(1,-1, "depth : %d", o.depth ); -+ VERBOSE(1,-1, "make_node : %d", o.make_node ); - - /* setup total number of items and number of items per dir */ -- if (depth <= 0) { -- num_dirs_in_tree = 1; -+ if (o.depth <= 0) { -+ o.num_dirs_in_tree = 1; - } else { -- if (branch_factor < 1) { -- num_dirs_in_tree = 1; -- } else if (branch_factor == 1) { -- num_dirs_in_tree = depth + 1; -+ if (o.branch_factor < 1) { -+ o.num_dirs_in_tree = 1; -+ } else if (o.branch_factor == 1) { -+ o.num_dirs_in_tree = o.depth + 1; - } else { -- num_dirs_in_tree = (pow(branch_factor, depth+1) - 1) / (branch_factor - 1); -+ o.num_dirs_in_tree = (pow(o.branch_factor, o.depth+1) - 1) / (o.branch_factor - 1); - } - } -- if (items_per_dir > 0) { -- if(items == 0){ -- if (leaf_only) { -- items = items_per_dir * (uint64_t) pow(branch_factor, depth); -+ if (o.items_per_dir > 0) { -+ if(o.items == 0){ -+ if (o.leaf_only) { -+ o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth); - } else { -- items = items_per_dir * num_dirs_in_tree; -+ o.items = o.items_per_dir * o.num_dirs_in_tree; - } - }else{ -- num_dirs_in_tree_calc = num_dirs_in_tree; -+ o.num_dirs_in_tree_calc = o.num_dirs_in_tree; - } - } else { -- if (leaf_only) { -- if (branch_factor <= 1) { -- items_per_dir = items; -+ if (o.leaf_only) { -+ if (o.branch_factor <= 1) { -+ o.items_per_dir = o.items; - } else { -- items_per_dir = (uint64_t) (items / pow(branch_factor, depth)); -- items = items_per_dir * (uint64_t) pow(branch_factor, depth); -+ o.items_per_dir = (uint64_t) (o.items / pow(o.branch_factor, o.depth)); -+ o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth); - } - } else { -- items_per_dir = items / num_dirs_in_tree; -- items = items_per_dir * num_dirs_in_tree; -+ o.items_per_dir = o.items / o.num_dirs_in_tree; -+ o.items = o.items_per_dir * o.num_dirs_in_tree; - } - } - - /* initialize rand_array */ -- if (random_seed > 0) { -- srand(random_seed); -+ if (o.random_seed > 0) { -+ srand(o.random_seed); - - uint64_t s; - -- rand_array = (uint64_t *) malloc( items * sizeof(*rand_array)); -+ o.rand_array = (uint64_t *) safeMalloc( o.items * sizeof(*o.rand_array)); - -- for (s=0; s < items; s++) { -- rand_array[s] = s; -+ for (s=0; s < o.items; s++) { -+ o.rand_array[s] = s; - } - - /* shuffle list randomly */ -- uint64_t n = items; -+ uint64_t n = o.items; - while (n>1) { - n--; - -@@ -2120,121 +2432,132 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * - * element, and the kth element to the nth element. - */ - -- uint64_t tmp = rand_array[k]; -- rand_array[k] = rand_array[n]; -- rand_array[n] = tmp; -+ uint64_t tmp = o.rand_array[k]; -+ o.rand_array[k] = o.rand_array[n]; -+ o.rand_array[n] = tmp; - } - } - - /* allocate and initialize write buffer with # */ -- if (write_bytes > 0) { -- int alloc_res = posix_memalign((void**)&write_buffer, sysconf(_SC_PAGESIZE), write_bytes); -- if (alloc_res) { -- FAIL("out of memory"); -- } -- generate_memory_pattern(write_buffer, write_bytes); -+ if (o.write_bytes > 0) { -+ o.write_buffer = aligned_buffer_alloc(o.write_bytes, o.gpu_memory_flags); -+ generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); - } - - /* setup directory path to work in */ -- if (path_count == 0) { /* special case where no directory path provided with '-d' option */ -- char *ret = getcwd(testdirpath, MAX_PATHLEN); -+ if (o.path_count == 0) { /* special case where no directory path provided with '-d' option */ -+ char *ret = getcwd(o.testdirpath, MAX_PATHLEN); - if (ret == NULL) { -- FAIL("Unable to get current working directory on %s", testdirpath); -+ FAIL("Unable to get current working directory on %s", o.testdirpath); - } -- path_count = 1; -+ o.path_count = 1; - } else { -- strcpy(testdirpath, filenames[rank%path_count]); -+ strcpy(o.testdirpath, o.filenames[rank % o.path_count]); - } - - /* if directory does not exist, create it */ -- if ((rank < path_count) && backend->access(testdirpath, F_OK, ¶m) != 0) { -- if (backend->mkdir(testdirpath, DIRMODE, ¶m) != 0) { -- FAIL("Unable to create test directory path %s", testdirpath); -+ if ((rank < o.path_count) && o.backend->access(o.testdirpath, F_OK, o.backend_options) != 0) { -+ if (o.backend->mkdir(o.testdirpath, DIRMODE, o.backend_options) != 0) { -+ WARNF("Unable to create test directory path %s", o.testdirpath); - } -+ created_root_dir = 1; - } - - /* display disk usage */ -- VERBOSE(3,-1,"main (before display_freespace): testdirpath is '%s'", testdirpath ); -+ VERBOSE(3,-1,"main (before display_freespace): o.testdirpath is '%s'", o.testdirpath ); - -- if (rank == 0) display_freespace(testdirpath); -+ if (rank == 0) ShowFileSystemSize(o.testdirpath, o.backend, o.backend_options); - int tasksBlockMapping = QueryNodeMapping(testComm, true); - - /* set the shift to mimic IOR and shift by procs per node */ -- if (nstride > 0) { -+ if (o.nstride > 0) { - if ( numNodes > 1 && tasksBlockMapping ) { - /* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks - however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */ -- nstride *= numTasksOnNode0; -+ o.nstride *= numTasksOnNode0; - } -- VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride); -+ VERBOSE(0,5,"Shifting ranks by %d for each phase.", o.nstride); - } - -- VERBOSE(3,-1,"main (after display_freespace): testdirpath is '%s'", testdirpath ); -+ VERBOSE(3,-1,"main (after display_freespace): o.testdirpath is '%s'", o.testdirpath ); - - if (rank == 0) { -- if (random_seed > 0) { -- VERBOSE(0,-1,"random seed: %d", random_seed); -+ if (o.random_seed > 0) { -+ VERBOSE(0,-1,"random seed: %d", o.random_seed); - } - } - -- if (gethostname(hostname, MAX_PATHLEN) == -1) { -+ if (gethostname(o.hostname, MAX_PATHLEN) == -1) { - perror("gethostname"); - MPI_Abort(testComm, 2); - } - - if (last == 0) { -- first = size; -- last = size; -+ first = o.size; -+ last = o.size; - } -- -- /* setup summary table for recording results */ -- summary_table = (mdtest_results_t *) malloc(iterations * sizeof(mdtest_results_t)); -- memset(summary_table, 0, iterations * sizeof(mdtest_results_t)); -- for(int i=0; i < iterations; i++){ -- for(int j=0; j < MDTEST_LAST_NUM; j++){ -- summary_table[i].rate[j] = 0.0; -- summary_table[i].time[j] = 0.0; -- } -+ if(first > last){ -+ FAIL("process number: first > last doesn't make sense"); - } -- -- if (summary_table == NULL) { -- FAIL("out of memory"); -+ if(last > o.size){ -+ FAIL("process number: last > number of processes doesn't make sense"); - } - -- if (unique_dir_per_task) { -- sprintf(base_tree_name, "mdtest_tree.%d", rank); -+ /* setup summary table for recording results */ -+ o.summary_table = (mdtest_results_t *) safeMalloc(iterations * sizeof(mdtest_results_t)); -+ memset(o.summary_table, 0, iterations * sizeof(mdtest_results_t)); -+ -+ if (o.unique_dir_per_task) { -+ sprintf(o.base_tree_name, "mdtest_tree.%d", rank); - } else { -- sprintf(base_tree_name, "mdtest_tree"); -+ sprintf(o.base_tree_name, "mdtest_tree"); - } - -+ mdtest_results_t * aggregated_results = safeMalloc(iterations * sizeof(mdtest_results_t)); -+ - /* default use shared directory */ -- strcpy(mk_name, "mdtest.shared."); -- strcpy(stat_name, "mdtest.shared."); -- strcpy(read_name, "mdtest.shared."); -- strcpy(rm_name, "mdtest.shared."); -+ strcpy(o.mk_name, "mdtest.shared."); -+ strcpy(o.stat_name, "mdtest.shared."); -+ strcpy(o.read_name, "mdtest.shared."); -+ strcpy(o.rm_name, "mdtest.shared."); - - MPI_Comm_group(testComm, &worldgroup); -+ -+ last = o.size < last ? o.size : last; -+ -+ /* Run the tests */ -+ for (i = first; i <= last; i += stride) { -+ sleep(1); -+ -+ if(i < last){ -+ MPI_Group testgroup; -+ range.last = i - 1; -+ MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup); -+ MPI_Comm_create(world_com, testgroup, &testComm); -+ MPI_Group_free(&testgroup); -+ if(testComm == MPI_COMM_NULL){ -+ continue; -+ } -+ }else{ -+ MPI_Comm_dup(world_com, & testComm); -+ } -+ MPI_Comm_size(testComm, &o.size); - -- /* Run the tests */ -- for (i = first; i <= last && i <= size; i += stride) { -- range.last = i - 1; -- MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup); -- MPI_Comm_create(testComm, testgroup, &testComm); - if (rank == 0) { -- uint64_t items_all = i * items; -- if(num_dirs_in_tree_calc){ -- items_all *= num_dirs_in_tree_calc; -+ uint64_t items_all = i * o.items; -+ if(o.num_dirs_in_tree_calc){ -+ items_all *= o.num_dirs_in_tree_calc; - } -- if (files_only && dirs_only) { -+ if (o.files_only && o.dirs_only) { - VERBOSE(0,-1,"%d tasks, "LLU" files/directories", i, items_all); -- } else if (files_only) { -- if (!shared_file) { -+ } else if (o.files_only) { -+ if (! o.shared_file) { - VERBOSE(0,-1,"%d tasks, "LLU" files", i, items_all); - } - else { - VERBOSE(0,-1,"%d tasks, 1 file", i); - } -- } else if (dirs_only) { -+ } else if (o.dirs_only) { - VERBOSE(0,-1,"%d tasks, "LLU" directories", i, items_all); - } - } -@@ -2244,30 +2567,42 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * - - for (j = 0; j < iterations; j++) { - // keep track of the current status for stonewalling -- mdtest_iteration(i, j, testgroup, & summary_table[j]); -+ mdtest_iteration(i, j, & o.summary_table[j]); - } -- if (print_rate_and_time){ -- summarize_results(iterations, 0); -- summarize_results(iterations, 1); -- }else{ -- summarize_results(iterations, print_time); -+ summarize_results(iterations, aggregated_results); -+ if(o.saveRankDetailsCSV){ -+ StoreRankInformation(iterations, aggregated_results); - } -- if (i == 1 && stride > 1) { -- i = 0; -+ int total_errors = 0; -+ MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm); -+ if(rank == 0 && total_errors){ -+ VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors); - } -+ -+ MPI_Comm_free(&testComm); - } -+ -+ MPI_Group_free(&worldgroup); -+ testComm = world_com; - -- if(verification_error){ -- VERBOSE(0, -1, "\nERROR: verifying the data read! Take the performance values with care!\n"); -+ if (created_root_dir && o.remove_only && o.backend->rmdir(o.testdirpath, o.backend_options) != 0) { -+ FAIL("Unable to remove test directory path %s", o.testdirpath); - } -+ - VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp()); - -- if (random_seed > 0) { -- free(rand_array); -+ if (o.random_seed > 0) { -+ free(o.rand_array); - } - -- if (backend->finalize) -- backend->finalize(); -+ if (o.backend->finalize){ -+ o.backend->finalize(o.backend_options); -+ } -+ -+ if (o.write_bytes > 0) { -+ aligned_buffer_free(o.write_buffer, o.gpu_memory_flags); -+ } -+ free(o.summary_table); - -- return summary_table; -+ return aggregated_results; - } -diff --git a/src/mdtest.h b/src/mdtest.h -index 6267282..09f14be 100644 ---- a/src/mdtest.h -+++ b/src/mdtest.h -@@ -8,28 +8,31 @@ - typedef enum { - MDTEST_DIR_CREATE_NUM = 0, - MDTEST_DIR_STAT_NUM = 1, -- MDTEST_DIR_READ_NUM = 1, -- MDTEST_DIR_REMOVE_NUM = 3, -- MDTEST_FILE_CREATE_NUM = 4, -- MDTEST_FILE_STAT_NUM = 5, -- MDTEST_FILE_READ_NUM = 6, -- MDTEST_FILE_REMOVE_NUM = 7, -- MDTEST_TREE_CREATE_NUM = 8, -- MDTEST_TREE_REMOVE_NUM = 9, -+ MDTEST_DIR_READ_NUM = 2, -+ MDTEST_DIR_RENAME_NUM = 3, -+ MDTEST_DIR_REMOVE_NUM = 4, -+ MDTEST_FILE_CREATE_NUM = 5, -+ MDTEST_FILE_STAT_NUM = 6, -+ MDTEST_FILE_READ_NUM = 7, -+ MDTEST_FILE_REMOVE_NUM = 8, -+ MDTEST_TREE_CREATE_NUM = 9, -+ MDTEST_TREE_REMOVE_NUM = 10, - MDTEST_LAST_NUM - } mdtest_test_num_t; - - typedef struct - { -- double rate[MDTEST_LAST_NUM]; /* Calculated throughput */ -+ double rate[MDTEST_LAST_NUM]; /* Calculated throughput after the barrier */ -+ double rate_before_barrier[MDTEST_LAST_NUM]; /* Calculated throughput before the barrier */ - double time[MDTEST_LAST_NUM]; /* Time */ -- uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */ -+ double time_before_barrier[MDTEST_TREE_CREATE_NUM]; /* individual time before executing the barrier */ -+ uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done in this process*/ - - /* Statistics when hitting the stonewall */ -- double stonewall_time[MDTEST_LAST_NUM]; /* runtime until completion / hit of the stonewall */ -- uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* Max number of items a process has accessed */ -- uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items a process has accessed */ -- uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed until stonewall */ -+ double stonewall_time[MDTEST_LAST_NUM]; /* Max runtime of any process until completion / hit of the stonewall */ -+ uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* The number of items a process has accessed */ -+ uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items any process has accessed */ -+ uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed by all processes until stonewall */ - } mdtest_results_t; - - mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile); -diff --git a/src/option.c b/src/option.c -index 2c3e8ef..da19c78 100644 ---- a/src/option.c -+++ b/src/option.c -@@ -7,6 +7,23 @@ - - #include - -+ -+/* merge two option lists and return the total size */ -+option_help * option_merge(option_help * a, option_help * b){ -+ int count_a = 0; -+ for(option_help * i = a; i->type != 0; i++){ -+ count_a++; -+ } -+ int count = count_a + 1; // LAST_OPTION is one -+ for(option_help * i = b; i->type != 0; i++){ -+ count++; -+ } -+ option_help * h = malloc(sizeof(option_help) * count); -+ memcpy(h, a, sizeof(option_help) * count_a); -+ memcpy(h + count_a, b, sizeof(option_help) * (count - count_a)); -+ return h; -+} -+ - /* - * Takes a string of the form 64, 8m, 128k, 4g, etc. and converts to bytes. - */ -@@ -236,115 +253,136 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi - int i = 0; - if(arg != NULL){ - arg[0] = 0; -- arg++; - replaced_equal = 1; -+ -+ // Check empty value -+ arg = (arg[1] == 0) ? NULL : arg + 1; - } - *flag_parsed_next = 0; - -- for(int m = 0; m < opt_all->module_count; m++ ){ -- option_help * args = opt_all->modules[m].options; -- if(args == NULL) continue; -- // try to find matching option help -- for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++ ){ -- if( o->shortVar == 0 && o->longVar == 0 ){ -- // section -- continue; -- } -- if ( (txt[0] == '-' && o->shortVar == txt[1]) || (strlen(txt) > 2 && txt[0] == '-' && txt[1] == '-' && o->longVar != NULL && strcmp(txt + 2, o->longVar) == 0)){ -- // now process the option. -- switch(o->arg){ -- case (OPTION_FLAG):{ -- assert(o->type == 'd'); -- if(arg != NULL){ -- int val = atoi(arg); -- (*(int*) o->variable) = (val < 0) ? 0 : val; -- }else{ -- (*(int*) o->variable)++; -- } -- break; -- } -- case (OPTION_OPTIONAL_ARGUMENT): -- case (OPTION_REQUIRED_ARGUMENT):{ -- // check if next is an argument -- if(arg == NULL){ -- if(o->shortVar == txt[1] && txt[2] != 0){ -- arg = & txt[2]; -+ // just skip over the first dash so we don't have to handle it everywhere below -+ if(txt[0] != '-'){ -+ *error = 1; -+ return; -+ } -+ txt++; -+ int parsed = 0; -+ -+ // printf("Parsing: %s : %s\n", txt, arg); -+ // support groups of multiple flags like -vvv or -vq -+ for(int flag_index = 0; flag_index < strlen(txt); ++flag_index){ -+ // don't loop looking for multiple flags if we already processed a long option -+ if(txt[flag_index] == '=' || (txt[0] == '-' && flag_index > 0)) -+ break; -+ -+ for(int m = 0; m < opt_all->module_count; m++ ){ -+ option_help * args = opt_all->modules[m].options; -+ if(args == NULL) continue; -+ // try to find matching option help -+ for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++ ){ -+ if( o->shortVar == 0 && o->longVar == 0 ){ -+ // section -+ continue; -+ } -+ if ( (o->shortVar == txt[flag_index]) || (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0)){ -+ // printf("Found %s %c=%c? %d %d\n", o->help, o->shortVar, txt[flag_index], (o->shortVar == txt[flag_index]), (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0)); -+ // now process the option. -+ switch(o->arg){ -+ case (OPTION_FLAG):{ -+ assert(o->type == 'd'); -+ if(arg != NULL){ -+ int val = atoi(arg); -+ (*(int*) o->variable) = (val < 0) ? 0 : val; - }else{ -- // simply take the next value as argument -- i++; -- arg = argv[1]; -- *flag_parsed_next = 1; -+ (*(int*) o->variable)++; - } -+ break; - } -- -- if(arg == NULL){ -- const char str[] = {o->shortVar, 0}; -- printf("Error, argument missing for option %s\n", (o->longVar != NULL) ? o->longVar : str); -- exit(1); -- } -- -- switch(o->type){ -- case('p'):{ -- // call the function in the variable -- void(*fp)() = o->variable; -- fp(arg); -- break; -- } -- case('F'):{ -- *(double*) o->variable = atof(arg); -- break; -- } -- case('f'):{ -- *(float*) o->variable = atof(arg); -- break; -- } -- case('d'):{ -- int64_t val = string_to_bytes(arg); -- if (val > INT_MAX || val < INT_MIN){ -- printf("WARNING: parsing the number %s to integer, this produced an overflow!\n", arg); -+ case (OPTION_OPTIONAL_ARGUMENT): -+ case (OPTION_REQUIRED_ARGUMENT):{ -+ // check if next is an argument -+ if(arg == NULL && replaced_equal != 1){ -+ if(o->shortVar == txt[0] && txt[1] != 0){ -+ arg = & txt[1]; -+ }else{ -+ // simply take the next value as argument -+ i++; -+ arg = argv[1]; -+ *flag_parsed_next = 1; - } -- *(int*) o->variable = val; -- break; - } -- case('H'): -- case('s'):{ -- (*(char **) o->variable) = strdup(arg); -- break; -+ -+ if(arg == NULL){ -+ const char str[] = {o->shortVar, 0}; -+ printf("Error, argument missing for option %s\n", (o->longVar != NULL) ? o->longVar : str); -+ exit(EXIT_FAILURE); - } -- case('c'):{ -- (*(char *)o->variable) = arg[0]; -- if(strlen(arg) > 1){ -- printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar); -+ -+ switch(o->type){ -+ case('p'):{ -+ // call the function in the variable -+ void(*fp)() = o->variable; -+ fp(arg); -+ break; -+ } -+ case('F'):{ -+ *(double*) o->variable = atof(arg); -+ break; -+ } -+ case('f'):{ -+ *(float*) o->variable = atof(arg); -+ break; -+ } -+ case('d'):{ -+ int64_t val = string_to_bytes(arg); -+ if (val > INT_MAX || val < INT_MIN){ -+ printf("WARNING: parsing the number %s to integer, this produced an overflow!\n", arg); -+ } -+ *(int*) o->variable = val; -+ break; -+ } -+ case('H'): -+ case('s'):{ -+ (*(char **) o->variable) = strdup(arg); -+ break; - } -- break; -- } -- case('l'):{ -- *(long long*) o->variable = string_to_bytes(arg); -- break; -- } -- case('u'):{ -- *(uint64_t*) o->variable = string_to_bytes(arg); -- break; -+ case('c'):{ -+ (*(char *)o->variable) = arg[0]; -+ if(strlen(arg) > 1){ -+ printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar); -+ } -+ break; -+ } -+ case('l'):{ -+ *(long long*) o->variable = string_to_bytes(arg); -+ break; -+ } -+ case('u'):{ -+ *(uint64_t*) o->variable = string_to_bytes(arg); -+ break; -+ } -+ default: -+ printf("ERROR: Unknown option type %c\n", o->type); -+ break; - } -- default: -- printf("ERROR: Unknown option type %c\n", o->type); - } - } -- } -- if(replaced_equal){ -- arg[-1] = '='; -- } -+ if(replaced_equal){ -+ arg[-1] = '='; -+ } - -- if(o->arg == OPTION_REQUIRED_ARGUMENT){ -- (*requiredArgsSeen)++; -- } -+ if(o->arg == OPTION_REQUIRED_ARGUMENT){ -+ (*requiredArgsSeen)++; -+ } - -- return; -+ parsed = 1; -+ } - } - } - } -- -- if(strcmp(txt, "-h") == 0 || strcmp(txt, "--help") == 0){ -+ if(parsed) return; -+ -+ if(strcmp(txt, "h") == 0 || strcmp(txt, "-help") == 0){ - *print_help = 1; - }else{ - *error = 1; -@@ -422,7 +460,7 @@ int option_parse(int argc, char ** argv, options_all_t * opt_all){ - } - option_print_help(args); - } -- exit(0); -+ exit(EXIT_FAILURE); - } - - return i; -diff --git a/src/option.h b/src/option.h -index 624da51..0afa519 100644 ---- a/src/option.h -+++ b/src/option.h -@@ -23,10 +23,12 @@ typedef struct{ - void * variable; - } option_help; - -+typedef struct aiori_mod_opt_t aiori_mod_opt_t; -+ - typedef struct{ - char * prefix; // may be NULL to include it in the standard name - option_help * options; -- void * defaults; // these default values are taken from the command line -+ aiori_mod_opt_t * defaults; // these default values are taken from the command line - } option_module; - - typedef struct{ -@@ -41,6 +43,7 @@ void option_print_current(option_help * args); - //@return the number of parsed arguments - int option_parse(int argc, char ** argv, options_all_t * args); - int option_parse_str(char*val, options_all_t * opt_all); -+option_help * option_merge(option_help * a, option_help * b); - - /* Parse a single line */ - int option_parse_key_value(char * key, char * value, options_all_t * opt_all); -diff --git a/src/parse_options.c b/src/parse_options.c -index 607d014..69c27c4 100755 ---- a/src/parse_options.c -+++ b/src/parse_options.c -@@ -32,9 +32,7 @@ - #include "option.h" - #include "aiori.h" - --#define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1))) -- --IOR_param_t initialTestParams; -+static IOR_param_t initialTestParams; - - option_help * createGlobalOptions(IOR_param_t * params); - -@@ -63,22 +61,18 @@ static void CheckRunSettings(IOR_test_t *tests) - params->writeFile = TRUE; - } - -- /* If only read or write is requested, then fix the default -- * openFlags to not be open RDWR. It matters in the case -- * of HDFS, which doesn't support opening RDWR. -- * (We assume int-valued params are exclusively 0 or 1.) -- */ -- if ((params->openFlags & IOR_RDWR) -- && ((params->readFile | params->checkRead | params->checkWrite) -- ^ params->writeFile)) { -- -- params->openFlags &= ~(IOR_RDWR); -- if (params->readFile | params->checkRead) { -- params->openFlags |= IOR_RDONLY; -- params->openFlags &= ~(IOR_CREAT|IOR_EXCL); -- } -- else -- params->openFlags |= IOR_WRONLY; -+ if(params->dualMount && !params->filePerProc) { -+ ERR("Dual Mount can only be used with File Per Process"); -+ } -+ -+ if(params->gpuDirect){ -+ if(params->gpuMemoryFlags == IOR_MEMORY_TYPE_GPU_MANAGED){ -+ ERR("GPUDirect cannot be used with managed memory"); -+ } -+ params->gpuMemoryFlags = IOR_MEMORY_TYPE_GPU_DEVICE_ONLY; -+ if(params->checkRead || params->checkWrite){ -+ ERR("GPUDirect data cannot yet be checked"); -+ } - } - } - } -@@ -101,7 +95,7 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - if (initialized) - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - else -- exit(-1); -+ exit(EXIT_FAILURE); - } - if (strcasecmp(option, "api") == 0) { - params->api = strdup(value); -@@ -109,7 +103,7 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - params->backend = aiori_select(params->api); - if (params->backend == NULL){ - fprintf(out_logfile, "Could not load backend API %s\n", params->api); -- exit(-1); -+ exit(EXIT_FAILURE); - } - } else if (strcasecmp(option, "summaryFile") == 0) { - if (rank == 0){ -@@ -119,6 +113,21 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - } - printf("Writing output to %s\n", value); - } -+ } else if (strcasecmp(option, "saveRankPerformanceDetailsCSV") == 0){ -+ if (rank == 0){ -+ // check that the file is writeable, truncate it and add header -+ FILE* fd = fopen(value, "w"); -+ if (fd == NULL){ -+ FAIL("Cannot open saveRankPerformanceDetailsCSV file for write!"); -+ } -+ char buff[] = "access,rank,runtime-with-openclose,runtime,throughput-withopenclose,throughput\n"; -+ int ret = fwrite(buff, strlen(buff), 1, fd); -+ if(ret != 1){ -+ FAIL("Cannot write header to saveRankPerformanceDetailsCSV file"); -+ } -+ fclose(fd); -+ } -+ params->saveRankDetailsCSV = strdup(value); - } else if (strcasecmp(option, "summaryFormat") == 0) { - if(strcasecmp(value, "default") == 0){ - outputFormat = OUTPUT_DEFAULT; -@@ -137,8 +146,14 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - params->platform = strdup(value); - } else if (strcasecmp(option, "testfile") == 0) { - params->testFileName = strdup(value); -- } else if (strcasecmp(option, "hintsfilename") == 0) { -- params->hintsFileName = strdup(value); -+ } else if (strcasecmp(option, "dualmount") == 0){ -+ params->dualMount = atoi(value); -+ } else if (strcasecmp(option, "allocateBufferOnGPU") == 0) { -+ params->gpuMemoryFlags = atoi(value); -+ } else if (strcasecmp(option, "GPUid") == 0) { -+ params->gpuID = atoi(value); -+ } else if (strcasecmp(option, "GPUDirect") == 0) { -+ params->gpuDirect = atoi(value); - } else if (strcasecmp(option, "deadlineforstonewalling") == 0) { - params->deadlineForStonewalling = atoi(value); - } else if (strcasecmp(option, "stoneWallingWearOut") == 0) { -@@ -191,42 +206,26 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - params->keepFileWithError = atoi(value); - } else if (strcasecmp(option, "multiFile") == 0) { - params->multiFile = atoi(value); -- } else if (strcasecmp(option, "quitonerror") == 0) { -- params->quitOnError = atoi(value); -+ } else if (strcasecmp(option, "warningAsErrors") == 0) { -+ params->warningAsErrors = atoi(value); - } else if (strcasecmp(option, "segmentcount") == 0) { - params->segmentCount = string_to_bytes(value); - } else if (strcasecmp(option, "blocksize") == 0) { - params->blockSize = string_to_bytes(value); - } else if (strcasecmp(option, "transfersize") == 0) { - params->transferSize = string_to_bytes(value); -- } else if (strcasecmp(option, "setalignment") == 0) { -- params->setAlignment = string_to_bytes(value); - } else if (strcasecmp(option, "singlexferattempt") == 0) { - params->singleXferAttempt = atoi(value); -- } else if (strcasecmp(option, "individualdatasets") == 0) { -- params->individualDataSets = atoi(value); - } else if (strcasecmp(option, "intraTestBarriers") == 0) { - params->intraTestBarriers = atoi(value); -- } else if (strcasecmp(option, "nofill") == 0) { -- params->noFill = atoi(value); - } else if (strcasecmp(option, "verbose") == 0) { - params->verbose = atoi(value); -- } else if (strcasecmp(option, "settimestampsignature") == 0) { -- params->setTimeStampSignature = atoi(value); - } else if (strcasecmp(option, "collective") == 0) { - params->collective = atoi(value); -- } else if (strcasecmp(option, "preallocate") == 0) { -- params->preallocate = atoi(value); -- } else if (strcasecmp(option, "storefileoffset") == 0) { -- params->storeFileOffset = atoi(value); -- } else if (strcasecmp(option, "usefileview") == 0) { -- params->useFileView = atoi(value); -- } else if (strcasecmp(option, "usesharedfilepointer") == 0) { -- params->useSharedFilePointer = atoi(value); -- } else if (strcasecmp(option, "usestrideddatatype") == 0) { -- params->useStridedDatatype = atoi(value); -- } else if (strcasecmp(option, "showhints") == 0) { -- params->showHints = atoi(value); -+ } else if (strcasecmp(option, "settimestampsignature") == 0) { -+ params->setTimeStampSignature = atoi(value); -+ } else if (strcasecmp(option, "dataPacketType") == 0) { -+ params->dataPacketType = parsePacketType(value[0]); - } else if (strcasecmp(option, "uniqueDir") == 0) { - params->uniqueDir = atoi(value); - } else if (strcasecmp(option, "useexistingtestfile") == 0) { -@@ -243,53 +242,6 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - } else if (strcasecmp(option, "memoryPerNode") == 0) { - params->memoryPerNode = NodeMemoryStringToBytes(value); - params->memoryPerTask = 0; -- } else if (strcasecmp(option, "lustrestripecount") == 0) { --#ifndef HAVE_LUSTRE_LUSTRE_USER_H -- ERR("ior was not compiled with Lustre support"); --#endif -- params->lustre_stripe_count = atoi(value); -- params->lustre_set_striping = 1; -- } else if (strcasecmp(option, "lustrestripesize") == 0) { --#ifndef HAVE_LUSTRE_LUSTRE_USER_H -- ERR("ior was not compiled with Lustre support"); --#endif -- params->lustre_stripe_size = string_to_bytes(value); -- params->lustre_set_striping = 1; -- } else if (strcasecmp(option, "lustrestartost") == 0) { --#ifndef HAVE_LUSTRE_LUSTRE_USER_H -- ERR("ior was not compiled with Lustre support"); --#endif -- params->lustre_start_ost = atoi(value); -- params->lustre_set_striping = 1; -- } else if (strcasecmp(option, "lustreignorelocks") == 0) { --#ifndef HAVE_LUSTRE_LUSTRE_USER_H -- ERR("ior was not compiled with Lustre support"); --#endif -- params->lustre_ignore_locks = atoi(value); -- } else if (strcasecmp(option, "gpfshintaccess") == 0) { --#ifndef HAVE_GPFS_FCNTL_H -- ERR("ior was not compiled with GPFS hint support"); --#endif -- params->gpfs_hint_access = atoi(value); -- } else if (strcasecmp(option, "gpfsreleasetoken") == 0) { --#ifndef HAVE_GPFS_FCNTL_H -- ERR("ior was not compiled with GPFS hint support"); --#endif -- params->gpfs_release_token = atoi(value); -- } else if (strcasecmp(option, "beegfsNumTargets") == 0) { --#ifndef HAVE_BEEGFS_BEEGFS_H -- ERR("ior was not compiled with BeeGFS support"); --#endif -- params->beegfs_numTargets = atoi(value); -- if (params->beegfs_numTargets < 1) -- ERR("beegfsNumTargets must be >= 1"); -- } else if (strcasecmp(option, "beegfsChunkSize") == 0) { -- #ifndef HAVE_BEEGFS_BEEGFS_H -- ERR("ior was not compiled with BeeGFS support"); -- #endif -- params->beegfs_chunkSize = string_to_bytes(value); -- if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16)) -- ERR("beegfsChunkSize must be a power of two and >64k"); - } else if (strcasecmp(option, "summaryalways") == 0) { - params->summary_every_test = atoi(value); - } else { -@@ -306,7 +258,7 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt - if (initialized) - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - else -- exit(-1); -+ exit(EXIT_FAILURE); - } - } - } -@@ -333,7 +285,7 @@ void ParseLine(char *line, IOR_param_t * test, options_all_t * module_options) - } - if(strlen(start) < 3){ - fprintf(out_logfile, "Invalid option substring string: \"%s\" in \"%s\"\n", start, line); -- exit(1); -+ exit(EXIT_FAILURE); - } - DecodeDirective(start, test, module_options); - start = end + 1; -@@ -363,7 +315,7 @@ int contains_only(char *haystack, char *needle) - /* check for "needle" */ - if (strncasecmp(ptr, needle, strlen(needle)) != 0) - return 0; -- /* make sure the rest of the line is only whitspace as well */ -+ /* make sure the rest of the line is only whitespace as well */ - for (ptr += strlen(needle); ptr < end; ptr++) { - if (!isspace(*ptr)) - return 0; -@@ -418,8 +370,8 @@ IOR_test_t *ReadConfigScript(char *scriptName) - continue; - - /* skip lines containing only comments */ -- if (sscanf(ptr, " #%s", empty) == 1) -- continue; -+ if (sscanf(ptr, " #%c", empty) == 1) -+ continue; - - if (contains_only(ptr, "ior stop")) { - break; -@@ -465,20 +417,27 @@ option_help * createGlobalOptions(IOR_param_t * params){ - char APIs[1024]; - char APIs_legacy[1024]; - aiori_supported_apis(APIs, APIs_legacy, IOR); -- char apiStr[1024]; -+ char * apiStr = safeMalloc(1024); - sprintf(apiStr, "API for I/O [%s]", APIs); - - option_help o [] = { - {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & params->api}, - {'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & params->referenceNumber}, - {'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->blockSize}, -- {'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & params->collective}, -+ {'c', "collective", "Use collective I/O", OPTION_FLAG, 'd', & params->collective}, - {'C', NULL, "reorderTasks -- changes task ordering for readback (useful to avoid client cache)", OPTION_FLAG, 'd', & params->reorderTasks}, - {'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & params->interTestDelay}, - {'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & params->deadlineForStonewalling}, -- {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, -+ {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timeout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, - {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, - {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, -+#ifdef HAVE_CUDA -+ {.help=" -O allocateBufferOnGPU=X -- allocate I/O buffers on the GPU: X=1 uses managed memory, X=2 device memory.", .arg = OPTION_OPTIONAL_ARGUMENT}, -+ {.help=" -O GPUid=X -- select the GPU to use.", .arg = OPTION_OPTIONAL_ARGUMENT}, -+#ifdef HAVE_GPU_DIRECT -+ {0, "gpuDirect", "allocate I/O buffers on the GPU and use gpuDirect to store data; this option is incompatible with any option requiring CPU access to data.", OPTION_FLAG, 'd', & params->gpuDirect}, -+#endif -+#endif - {'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync}, - {'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile}, - {'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts}, -@@ -489,43 +448,38 @@ option_help * createGlobalOptions(IOR_param_t * params){ - * after all the arguments are in and we know which it keep. - */ - {'G', NULL, "setTimeStampSignature -- set value for time stamp signature/random seed", OPTION_OPTIONAL_ARGUMENT, 'd', & params->setTimeStampSignature}, -- {'H', NULL, "showHints -- show hints", OPTION_FLAG, 'd', & params->showHints}, - {'i', NULL, "repetitions -- number of repetitions of test", OPTION_OPTIONAL_ARGUMENT, 'd', & params->repetitions}, -- {'I', NULL, "individualDataSets -- datasets not shared by all procs [not working]", OPTION_FLAG, 'd', & params->individualDataSets}, - {'j', NULL, "outlierThreshold -- warn on outlier N seconds from mean", OPTION_OPTIONAL_ARGUMENT, 'd', & params->outlierThreshold}, -- {'J', NULL, "setAlignment -- HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->setAlignment}, - {'k', NULL, "keepFile -- don't remove the test file(s) on program exit", OPTION_FLAG, 'd', & params->keepFile}, - {'K', NULL, "keepFileWithError -- keep error-filled file(s) after data-checking", OPTION_FLAG, 'd', & params->keepFileWithError}, -- {'l', NULL, "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & params->buffer_type}, -+ {'l', "dataPacketType", "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|random|o|i|t|r]", OPTION_OPTIONAL_ARGUMENT, 's', & params->buffer_type}, - {'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile}, - {'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr}, -- {'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & params->noFill}, - {'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, - {'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName}, -- {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, -- {'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & params->preallocate}, -- {'P', NULL, "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & params->useSharedFilePointer}, -- {'q', NULL, "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & params->quitOnError}, -+ {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,GPUid=2)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, - {'Q', NULL, "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->taskPerNodeOffset}, - {'r', NULL, "readFile -- read existing file", OPTION_FLAG, 'd', & params->readFile}, - {'R', NULL, "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & params->checkRead}, - {'s', NULL, "segmentCount -- number of segments", OPTION_OPTIONAL_ARGUMENT, 'd', & params->segmentCount}, -- {'S', NULL, "useStridedDatatype -- put strided access into datatype [not working]", OPTION_FLAG, 'd', & params->useStridedDatatype}, - {'t', NULL, "transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->transferSize}, - {'T', NULL, "maxTimeDuration -- max time in minutes executing repeated test; it aborts only between iterations and not within a test!", OPTION_OPTIONAL_ARGUMENT, 'd', & params->maxTimeDuration}, - {'u', NULL, "uniqueDir -- use unique directory name for each file-per-process", OPTION_FLAG, 'd', & params->uniqueDir}, -- {'U', NULL, "hintsFileName -- full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & params->hintsFileName}, - {'v', NULL, "verbose -- output information (repeating flag increases level)", OPTION_FLAG, 'd', & params->verbose}, -- {'V', NULL, "useFileView -- use MPI_File_set_view", OPTION_FLAG, 'd', & params->useFileView}, - {'w', NULL, "writeFile -- write file", OPTION_FLAG, 'd', & params->writeFile}, - {'W', NULL, "checkWrite -- check read after write", OPTION_FLAG, 'd', & params->checkWrite}, - {'x', NULL, "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & params->singleXferAttempt}, - {'X', NULL, "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & params->reorderTasksRandomSeed}, -+ {'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount}, - {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, - {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, -+ {0, "randomPrefill", "For random -z access only: Prefill the file with this blocksize, e.g., 2m", OPTION_OPTIONAL_ARGUMENT, 'l', & params->randomPrefillBlocksize}, -+ {0, "random-offset-seed", "The seed for -z", OPTION_OPTIONAL_ARGUMENT, 'd', & params->randomSeed}, - {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, -+ {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, - {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, -- {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, -+ {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputting the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, -+ {.help=" -O saveRankPerformanceDetailsCSV= -- store the performance of each rank into the named CSV file.", .arg = OPTION_OPTIONAL_ARGUMENT}, - {0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun}, - LAST_OPTION, - }; -@@ -538,9 +492,9 @@ option_help * createGlobalOptions(IOR_param_t * params){ - /* - * Parse Commandline. - */ --IOR_test_t *ParseCommandLine(int argc, char **argv) -+IOR_test_t *ParseCommandLine(int argc, char **argv, MPI_Comm com) - { -- init_IOR_Param_t(& initialTestParams); -+ init_IOR_Param_t(& initialTestParams, com); - - IOR_test_t *tests = NULL; - -diff --git a/src/parse_options.h b/src/parse_options.h -index 45b93ca..b12dd78 100755 ---- a/src/parse_options.h -+++ b/src/parse_options.h -@@ -13,8 +13,6 @@ - - #include "ior.h" - --extern IOR_param_t initialTestParams; -- --IOR_test_t *ParseCommandLine(int argc, char **argv); -+IOR_test_t *ParseCommandLine(int argc, char **argv, MPI_Comm com); - - #endif /* !_PARSE_OPTIONS_H */ -diff --git a/src/test/Makefile.am b/src/test/Makefile.am -index 1f2b141..758d526 100755 ---- a/src/test/Makefile.am -+++ b/src/test/Makefile.am -@@ -1,4 +1,4 @@ --LDFLAGS = $(extraLDFLAGS) -+AM_LDFLAGS = $(extraLDFLAGS) - LDADD = ../libaiori.a $(extraLDADD) - - # Add test here -diff --git a/src/test/example.c b/src/test/example.c -index 5bb4b2b..c9bb152 100644 ---- a/src/test/example.c -+++ b/src/test/example.c -@@ -1,13 +1,16 @@ - #include - --#include --#include -+#include "../ior.h" -+#include "../ior-internal.h" - -+// Run all tests via: -+// make distcheck - // build a single test via, e.g., mpicc example.c -I ../src/ ../src/libaiori.a -lm - --int main(){ -+int main(int argc, char** argv) { -+ MPI_Init(&argc, &argv); - IOR_param_t test; -- init_IOR_Param_t(& test); -+ init_IOR_Param_t(& test, MPI_COMM_WORLD); - test.blockSize = 10; - test.transferSize = 10; - test.segmentCount = 5; -@@ -16,16 +19,7 @@ int main(){ - // having an individual file - test.filePerProc = 1; - -- IOR_offset_t * offsets; -- offsets = GetOffsetArraySequential(& test, 0); -- assert(offsets[0] == 0); -- assert(offsets[1] == 10); -- assert(offsets[2] == 20); -- assert(offsets[3] == 30); -- assert(offsets[4] == 40); -- // for(int i = 0; i < test.segmentCount; i++){ -- // printf("%lld\n", (long long int) offsets[i]); -- // } - printf("OK\n"); -+ MPI_Finalize(); - return 0; - } -diff --git a/src/utilities.c b/src/utilities.c -index bcb1e03..65eaec1 100755 ---- a/src/utilities.c -+++ b/src/utilities.c -@@ -16,6 +16,12 @@ - # include "config.h" - #endif - -+#ifdef HAVE_GETCPU_SYSCALL -+# define _GNU_SOURCE -+# include -+# include -+#endif -+ - #ifdef __linux__ - # define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */ - #endif /* __linux__ */ -@@ -31,6 +37,10 @@ - #include - #include - -+#ifdef HAVE_CUDA -+#include -+#endif -+ - #ifndef _WIN32 - # include - # ifdef __sun /* SunOS does not support statfs(), instead uses statvfs() */ -@@ -47,6 +57,9 @@ - #include "utilities.h" - #include "aiori.h" - #include "ior.h" -+#include "ior-internal.h" -+ -+#define RANDALGO_GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL - - /************************** D E C L A R A T I O N S ***************************/ - -@@ -57,14 +70,147 @@ extern int numTasks; - int rank = 0; - int rankOffset = 0; - int verbose = VERBOSE_0; /* verbose output */ --MPI_Comm testComm; --MPI_Comm mpi_comm_world; --FILE * out_logfile; --FILE * out_resultfile; -+MPI_Comm testComm = MPI_COMM_NULL; -+FILE * out_logfile = NULL; -+FILE * out_resultfile = NULL; - enum OutputFormat_t outputFormat; - -+/* local */ -+int rand_state_init = 0; -+uint64_t rand_state = 0; -+ - /***************************** F U N C T I O N S ******************************/ - -+/** -+ * Modifies a buffer for a write. Performance sensitive because it is called -+ * before each write. -+ * -+ * @param buf pointer to byte buffer to fill -+ * @param bytes number of bytes to produce to fill buffer -+ * @param rand_seed seed to use for PRNG -+ * @param pretendRank unique identifier for this process -+ * @param dataPacketType identifier to designate pattern to fill buffer -+ */ -+void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType){ -+ if (dataPacketType == DATA_TIMESTAMP || bytes < 8) -+ return; -+ -+ size_t size = bytes / sizeof(uint64_t); -+ uint64_t * buffi = (uint64_t*) buf; -+ -+ if (dataPacketType == DATA_RANDOM) { -+ uint64_t rand_state_local = rand_state; -+ if (!rand_state_init) { -+ unsigned seed = rand_seed + pretendRank; -+ rand_state_init = 1; -+ rand_state_local = rand_r(&seed); -+ } -+ for (size_t i = 0; i < size; i++) { -+ rand_state_local *= RANDALGO_GOLDEN_RATIO_PRIME; -+ rand_state_local >>= 3; -+ buffi[i] = rand_state_local; -+ } -+ rand_state = rand_state_local; -+ return; -+ } -+ -+ /* DATA_INCOMPRESSIBLE and DATA_OFFSET */ -+ int k = 1; -+ for(size_t i=0; i < size; i+=512, k++){ -+ buffi[i] = ((uint32_t) item * k) | ((uint64_t) pretendRank) << 32; -+ } -+} -+ -+/** -+ * Fills a buffer with bytes of a given pattern. Not performance-sensitive -+ * because it is called once per test. -+ * -+ * @param buf pointer to byte buffer to fill -+ * @param bytes number of bytes to produce to fill buffer -+ * @param rand_seed seed to use for PRNG -+ * @param pretendRank unique identifier for this process -+ * @param dataPacketType identifier to designate pattern to fill buffer -+ */ -+void generate_memory_pattern(char * buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType){ -+ uint64_t * buffi = (uint64_t*) buf; -+ // first half of 64 bits use the rank -+ const size_t size = bytes / 8; -+ // the first 8 bytes of each 4k block are updated at runtime -+ unsigned seed = rand_seed + pretendRank; -+ if (dataPacketType == DATA_RANDOM && !rand_state_init) { -+ rand_state_init = 1; -+ rand_state = rand_r(&seed); -+ } -+ for(size_t i=0; i < size; i++){ -+ switch(dataPacketType){ -+ case(DATA_RANDOM): -+ rand_state *= RANDALGO_GOLDEN_RATIO_PRIME; -+ rand_state >>= 3; -+ buffi[i] = rand_state; -+ break; -+ case(DATA_INCOMPRESSIBLE):{ -+ uint64_t hi = ((uint64_t) rand_r(& seed) << 32); -+ uint64_t lo = (uint64_t) rand_r(& seed); -+ buffi[i] = hi | lo; -+ break; -+ }case(DATA_OFFSET):{ -+ }case(DATA_TIMESTAMP):{ -+ buffi[i] = ((uint64_t) pretendRank) << 32 | rand_seed + i; -+ break; -+ } -+ } -+ } -+ -+ for(size_t i=size*8; i < bytes; i++){ -+ buf[i] = (char) i; -+ } -+} -+ -+int verify_memory_pattern(uint64_t item, char * buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType){ -+ int error = 0; -+ // always read all data to ensure that performance numbers stay the same -+ uint64_t * buffi = (uint64_t*) buffer; -+ -+ // the first 8 bytes are set to item number -+ int k=1; -+ unsigned seed = rand_seed + pretendRank; -+ const size_t size = bytes / 8; -+ for(size_t i=0; i < size; i++){ -+ uint64_t exp; -+ -+ switch(dataPacketType){ -+ case(DATA_RANDOM): -+ rand_state *= RANDALGO_GOLDEN_RATIO_PRIME; -+ rand_state >>= 3; -+ buffi[i] = rand_state; -+ break; -+ case(DATA_INCOMPRESSIBLE):{ -+ uint64_t hi = ((uint64_t) rand_r(& seed) << 32); -+ uint64_t lo = (uint64_t) rand_r(& seed); -+ exp = hi | lo; -+ break; -+ }case(DATA_OFFSET):{ -+ }case(DATA_TIMESTAMP):{ -+ exp = ((uint64_t) pretendRank) << 32 | rand_seed + i; -+ break; -+ } -+ } -+ if(i % 512 == 0 && dataPacketType != DATA_TIMESTAMP){ -+ exp = ((uint32_t) item * k) | ((uint64_t) pretendRank) << 32; -+ k++; -+ } -+ if(buffi[i] != exp){ -+ error = 1; -+ } -+ } -+ for(size_t i=size*8; i < bytes; i++){ -+ if(buffer[i] != (char) i){ -+ error = 1; -+ } -+ } -+ return error; -+} -+ - void* safeMalloc(uint64_t size){ - void * d = malloc(size); - if (d == NULL){ -@@ -80,8 +226,8 @@ void FailMessage(int rank, const char *location, char *format, ...) { - va_start(args, format); - vsnprintf(msg, 4096, format, args); - va_end(args); -- fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", -- PrintTimestamp(), rank, location, msg, strerror(errno)); -+ fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s\n", -+ PrintTimestamp(), rank, location, msg); - fflush(out_logfile); - MPI_Abort(testComm, 1); - } -@@ -118,35 +264,37 @@ size_t NodeMemoryStringToBytes(char *size_str) - return mem / 100 * percent; - } - -+ior_dataPacketType_e parsePacketType(char t){ -+ switch(t) { -+ case '\0': return DATA_TIMESTAMP; -+ case 'i': /* Incompressible */ -+ return DATA_INCOMPRESSIBLE; -+ case 't': /* timestamp */ -+ return DATA_TIMESTAMP; -+ case 'o': /* offset packet */ -+ return DATA_OFFSET; -+ case 'r': /* randomized blocks */ -+ return DATA_RANDOM; -+ default: -+ ERRF("Unknown packet type \"%c\"; generic assumed\n", t); -+ return DATA_OFFSET; -+ } -+} -+ - void updateParsedOptions(IOR_param_t * options, options_all_t * global_options){ - if (options->setTimeStampSignature){ - options->incompressibleSeed = options->setTimeStampSignature; - } - - if (options->buffer_type && options->buffer_type[0] != 0){ -- switch(options->buffer_type[0]) { -- case 'i': /* Incompressible */ -- options->dataPacketType = incompressible; -- break; -- case 't': /* timestamp */ -- options->dataPacketType = timestamp; -- break; -- case 'o': /* offset packet */ -- options->storeFileOffset = TRUE; -- options->dataPacketType = offset; -- break; -- default: -- fprintf(out_logfile, -- "Unknown argument for -l %s; generic assumed\n", options->buffer_type); -- break; -- } -+ options->dataPacketType = parsePacketType(options->buffer_type[0]); - } - if (options->memoryPerNodeStr){ - options->memoryPerNode = NodeMemoryStringToBytes(options->memoryPerNodeStr); - } - const ior_aiori_t * backend = aiori_select(options->api); - if (backend == NULL) -- ERR_SIMPLE("unrecognized I/O API"); -+ ERR("Unrecognized I/O API"); - - options->backend = backend; - /* copy the actual module options into the test */ -@@ -157,7 +305,7 @@ void updateParsedOptions(IOR_param_t * options, options_all_t * global_options){ - /* Used in aiori-POSIX.c and aiori-PLFS.c - */ - --void set_o_direct_flag(int *fd) -+void set_o_direct_flag(int *flag) - { - /* note that TRU64 needs O_DIRECTIO, SunOS uses directio(), - and everyone else needs O_DIRECT */ -@@ -170,7 +318,7 @@ void set_o_direct_flag(int *fd) - # endif /* not O_DIRECTIO */ - #endif /* not O_DIRECT */ - -- *fd |= O_DIRECT; -+ *flag |= O_DIRECT; - } - - -@@ -565,91 +713,64 @@ IOR_offset_t StringToBytes(char *size_str) - /* - * Displays size of file system and percent of data blocks and inodes used. - */ --void ShowFileSystemSize(char *fileSystem) // this might be converted to an AIORI call -+void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options) // this might be converted to an AIORI call - { --#ifndef _WIN32 /* FIXME */ -- char realPath[PATH_MAX]; -- char *fileSystemUnitStr; -- long long int totalFileSystemSize; -- long long int freeFileSystemSize; -- long long int totalInodes; -- long long int freeInodes; -- double totalFileSystemSizeHR; -- double usedFileSystemPercentage; -- double usedInodePercentage; --#ifdef __sun /* SunOS does not support statfs(), instead uses statvfs() */ -- struct statvfs statusBuffer; --#else /* !__sun */ -- struct statfs statusBuffer; --#endif /* __sun */ -- --#ifdef __sun -- if (statvfs(fileSystem, &statusBuffer) != 0) { -- WARN("unable to statvfs() file system"); -- return; -- } --#else /* !__sun */ -- if (statfs(fileSystem, &statusBuffer) != 0) { -- WARN("unable to statfs() file system"); -- return; -- } --#endif /* __sun */ -- -- /* data blocks */ --#ifdef __sun -- totalFileSystemSize = statusBuffer.f_blocks * statusBuffer.f_frsize; -- freeFileSystemSize = statusBuffer.f_bfree * statusBuffer.f_frsize; --#else /* !__sun */ -- totalFileSystemSize = statusBuffer.f_blocks * statusBuffer.f_bsize; -- freeFileSystemSize = statusBuffer.f_bfree * statusBuffer.f_bsize; --#endif /* __sun */ -- -- usedFileSystemPercentage = (1 - ((double)freeFileSystemSize -- / (double)totalFileSystemSize)) * 100; -- totalFileSystemSizeHR = -- (double)totalFileSystemSize / (double)(1<<30); -- fileSystemUnitStr = "GiB"; -- if (totalFileSystemSizeHR > 1024) { -- totalFileSystemSizeHR = (double)totalFileSystemSize / (double)((long long)1<<40); -- fileSystemUnitStr = "TiB"; -- } -- -- /* inodes */ -- totalInodes = statusBuffer.f_files; -- freeInodes = statusBuffer.f_ffree; -- usedInodePercentage = -- (1 - ((double)freeInodes / (double)totalInodes)) * 100; -- -- /* show results */ -- if (realpath(fileSystem, realPath) == NULL) { -- WARN("unable to use realpath()"); -- return; -- } -- -- if(outputFormat == OUTPUT_DEFAULT){ -- fprintf(out_resultfile, "%-20s: %s\n", "Path", realPath); -- fprintf(out_resultfile, "%-20s: %.1f %s Used FS: %2.1f%% ", -- "FS", totalFileSystemSizeHR, fileSystemUnitStr, -- usedFileSystemPercentage); -- fprintf(out_resultfile, "Inodes: %.1f Mi Used Inodes: %2.1f%%\n", -- (double)totalInodes / (double)(1<<20), -- usedInodePercentage); -- fflush(out_logfile); -- }else if(outputFormat == OUTPUT_JSON){ -- fprintf(out_resultfile, " , \"Path\": \"%s\",", realPath); -- fprintf(out_resultfile, "\"Capacity\": \"%.1f %s\", \"Used Capacity\": \"%2.1f%%\",", -- totalFileSystemSizeHR, fileSystemUnitStr, -- usedFileSystemPercentage); -- fprintf(out_resultfile, "\"Inodes\": \"%.1f Mi\", \"Used Inodes\" : \"%2.1f%%\"\n", -- (double)totalInodes / (double)(1<<20), -- usedInodePercentage); -- }else if(outputFormat == OUTPUT_CSV){ -- -- } -+ ior_aiori_statfs_t stat; -+ if(! backend->statfs){ -+ WARN("Backend doesn't implement statfs"); -+ return; -+ } -+ int ret = backend->statfs(filename, & stat, backend_options); -+ if( ret != 0 ){ -+ WARN("Backend returned error during statfs"); -+ return; -+ } -+ long long int totalFileSystemSize; -+ long long int freeFileSystemSize; -+ long long int totalInodes; -+ long long int freeInodes; -+ double totalFileSystemSizeHR; -+ double usedFileSystemPercentage; -+ double usedInodePercentage; -+ char *fileSystemUnitStr; -+ -+ totalFileSystemSize = stat.f_blocks * stat.f_bsize; -+ freeFileSystemSize = stat.f_bfree * stat.f_bsize; -+ usedFileSystemPercentage = (1 - ((double)freeFileSystemSize / (double)totalFileSystemSize)) * 100; -+ totalFileSystemSizeHR = (double)totalFileSystemSize / (double)(1<<30); -+ -+ /* inodes */ -+ totalInodes = stat.f_files; -+ freeInodes = stat.f_ffree; -+ usedInodePercentage = (1 - ((double)freeInodes / (double)totalInodes)) * 100; -+ -+ fileSystemUnitStr = "GiB"; -+ if (totalFileSystemSizeHR > 1024) { -+ totalFileSystemSizeHR = (double)totalFileSystemSize / (double)((long long)1<<40); -+ fileSystemUnitStr = "TiB"; -+ } -+ if(outputFormat == OUTPUT_DEFAULT){ -+ fprintf(out_resultfile, "%-20s: %s\n", "Path", filename); -+ fprintf(out_resultfile, "%-20s: %.1f %s Used FS: %2.1f%% ", -+ "FS", totalFileSystemSizeHR, fileSystemUnitStr, -+ usedFileSystemPercentage); -+ fprintf(out_resultfile, "Inodes: %.1f Mi Used Inodes: %2.1f%%\n", -+ (double)totalInodes / (double)(1<<20), -+ usedInodePercentage); -+ fflush(out_logfile); -+ }else if(outputFormat == OUTPUT_JSON){ -+ fprintf(out_resultfile, " , \"Path\": \"%s\",", filename); -+ fprintf(out_resultfile, "\"Capacity\": \"%.1f %s\", \"Used Capacity\": \"%2.1f%%\",", -+ totalFileSystemSizeHR, fileSystemUnitStr, -+ usedFileSystemPercentage); -+ fprintf(out_resultfile, "\"Inodes\": \"%.1f Mi\", \"Used Inodes\" : \"%2.1f%%\"\n", -+ (double)totalInodes / (double)(1<<20), -+ usedInodePercentage); -+ }else if(outputFormat == OUTPUT_CSV){ - --#endif /* !_WIN32 */ -+ } - -- return; -+ return; - } - - /* -@@ -672,27 +793,6 @@ int Regex(char *string, char *pattern) - return (retValue); - } - --/* -- * Seed random generator. -- */ --void SeedRandGen(MPI_Comm testComm) --{ -- unsigned int randomSeed; -- -- if (rank == 0) { --#ifdef _WIN32 -- rand_s(&randomSeed); --#else -- struct timeval randGenTimer; -- gettimeofday(&randGenTimer, (struct timezone *)NULL); -- randomSeed = randGenTimer.tv_usec; --#endif -- } -- MPI_CHECK(MPI_Bcast(&randomSeed, 1, MPI_INT, 0, -- testComm), "cannot broadcast random seed value"); -- srandom(randomSeed); --} -- - /* - * System info for Windows. - */ -@@ -715,10 +815,6 @@ int uname(struct utsname *name) - } - #endif /* _WIN32 */ - -- --double wall_clock_deviation; --double wall_clock_delta = 0; -- - /* - * Get time stamp. Use MPI_Timer() unless _NO_MPI_TIMER is defined, - * in which case use gettimeofday(). -@@ -726,55 +822,46 @@ double wall_clock_delta = 0; - double GetTimeStamp(void) - { - double timeVal; --#ifdef _NO_MPI_TIMER - struct timeval timer; - - if (gettimeofday(&timer, (struct timezone *)NULL) != 0) - ERR("cannot use gettimeofday()"); - timeVal = (double)timer.tv_sec + ((double)timer.tv_usec / 1000000); --#else /* not _NO_MPI_TIMER */ -- timeVal = MPI_Wtime(); /* no MPI_CHECK(), just check return value */ -- if (timeVal < 0) -- ERR("cannot use MPI_Wtime()"); --#endif /* _NO_MPI_TIMER */ -- -- /* wall_clock_delta is difference from root node's time */ -- timeVal -= wall_clock_delta; - - return (timeVal); - } - - /* - * Determine any spread (range) between node times. -+ * Obsolete - */ --static double TimeDeviation(void) -+static double TimeDeviation(MPI_Comm com) - { - double timestamp; - double min = 0; - double max = 0; - double roottimestamp; - -- MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); -+ MPI_CHECK(MPI_Barrier(com), "barrier error"); - timestamp = GetTimeStamp(); - MPI_CHECK(MPI_Reduce(×tamp, &min, 1, MPI_DOUBLE, -- MPI_MIN, 0, mpi_comm_world), -+ MPI_MIN, 0, com), - "cannot reduce tasks' times"); - MPI_CHECK(MPI_Reduce(×tamp, &max, 1, MPI_DOUBLE, -- MPI_MAX, 0, mpi_comm_world), -+ MPI_MAX, 0, com), - "cannot reduce tasks' times"); - - /* delta between individual nodes' time and root node's time */ - roottimestamp = timestamp; -- MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, mpi_comm_world), -+ MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, com), - "cannot broadcast root's time"); -- wall_clock_delta = timestamp - roottimestamp; -+ // wall_clock_delta = timestamp - roottimestamp; - - return max - min; - } - --void init_clock(){ -- /* check for skew between tasks' start times */ -- wall_clock_deviation = TimeDeviation(); -+void init_clock(MPI_Comm com){ -+ - } - - char * PrintTimestamp() { -@@ -792,24 +879,25 @@ char * PrintTimestamp() { - return datestring; - } - --int64_t ReadStoneWallingIterations(char * const filename){ -+int64_t ReadStoneWallingIterations(char * const filename, MPI_Comm com){ - long long data; - if(rank != 0){ -- MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); -+ MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com); - return data; - }else{ - FILE * out = fopen(filename, "r"); - if (out == NULL){ - data = -1; -- MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); -+ MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com); - return data; - } - int ret = fscanf(out, "%lld", & data); - if (ret != 1){ -+ fclose(out); - return -1; - } - fclose(out); -- MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); -+ MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com); - return data; - } - } -@@ -892,3 +980,99 @@ char *HumanReadable(IOR_offset_t value, int base) - } - return valueStr; - } -+ -+#if defined(HAVE_GETCPU_SYSCALL) -+// Assume we aren't worried about thread/process migration. -+// Test on Intel systems and see if we can get rid of the architecture specificity -+// of the code. -+unsigned long GetProcessorAndCore(int *chip, int *core){ -+ return syscall(SYS_getcpu, core, chip, NULL); -+} -+#elif defined(HAVE_RDTSCP_ASM) -+// We're on an intel processor and use the -+// rdtscp instruction. -+unsigned long GetProcessorAndCore(int *chip, int *core){ -+ unsigned long a,d,c; -+ __asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); -+ *chip = (c & 0xFFF000)>>12; -+ *core = c & 0xFFF; -+ return ((unsigned long)a) | (((unsigned long)d) << 32);; -+} -+#else -+// TODO: Add in AMD function -+unsigned long GetProcessorAndCore(int *chip, int *core){ -+#warning GetProcessorAndCore is implemented as a dummy -+ *chip = 0; -+ *core = 0; -+ return 1; -+} -+#endif -+ -+ -+ -+/* -+ * Allocate a page-aligned (required by O_DIRECT) buffer. -+ */ -+void *aligned_buffer_alloc(size_t size, ior_memory_flags type) -+{ -+ size_t pageMask; -+ char *buf, *tmp; -+ char *aligned; -+ -+ if(type == IOR_MEMORY_TYPE_GPU_MANAGED){ -+#ifdef HAVE_CUDA -+ // use unified memory here to allow drop-in-replacement -+ if (cudaMallocManaged((void**) & buf, size, cudaMemAttachGlobal) != cudaSuccess){ -+ ERR("Cannot allocate buffer on GPU"); -+ } -+ return buf; -+#else -+ ERR("No CUDA supported, cannot allocate on the GPU"); -+#endif -+ }else if(type == IOR_MEMORY_TYPE_GPU_DEVICE_ONLY){ -+#ifdef HAVE_GPU_DIRECT -+ if (cudaMalloc((void**) & buf, size) != cudaSuccess){ -+ ERR("Cannot allocate buffer on GPU"); -+ } -+ return buf; -+#else -+ ERR("No GPUDirect supported, cannot allocate on the GPU"); -+#endif -+ } -+ -+#ifdef HAVE_SYSCONF -+ long pageSize = sysconf(_SC_PAGESIZE); -+#else -+ size_t pageSize = getpagesize(); -+#endif -+ -+ pageMask = pageSize - 1; -+ buf = safeMalloc(size + pageSize + sizeof(void *)); -+ /* find the alinged buffer */ -+ tmp = buf + sizeof(char *); -+ aligned = tmp + pageSize - ((size_t) tmp & pageMask); -+ /* write a pointer to the original malloc()ed buffer into the bytes -+ preceding "aligned", so that the aligned buffer can later be free()ed */ -+ tmp = aligned - sizeof(void *); -+ *(void **)tmp = buf; -+ -+ return (void *)aligned; -+} -+ -+/* -+ * Free a buffer allocated by aligned_buffer_alloc(). -+ */ -+void aligned_buffer_free(void *buf, ior_memory_flags gpu) -+{ -+ if(gpu){ -+#ifdef HAVE_CUDA -+ if (cudaFree(buf) != cudaSuccess){ -+ WARN("Cannot free buffer on GPU"); -+ } -+ return; -+#else -+ ERR("No CUDA supported, cannot free on the GPU"); -+#endif -+ } -+ free(*(void **)((char *)buf - sizeof(char *))); -+} -diff --git a/src/utilities.h b/src/utilities.h -index 2a9abe3..7e9f704 100755 ---- a/src/utilities.h -+++ b/src/utilities.h -@@ -22,8 +22,6 @@ extern int rank; - extern int rankOffset; - extern int verbose; - extern MPI_Comm testComm; --extern MPI_Comm mpi_comm_world; --extern FILE * out_logfile; - extern FILE * out_resultfile; - extern enum OutputFormat_t outputFormat; /* format of the output */ - -@@ -31,25 +29,22 @@ extern enum OutputFormat_t outputFormat; /* format of the output */ - * Try using the system's PATH_MAX, which is what realpath and such use. - */ - #define MAX_PATHLEN PATH_MAX -- -- --#ifdef __linux__ - #define ERROR_LOCATION __func__ --#else --#define ERROR_LOCATION __LINE__ --#endif - --#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__) --void FailMessage(int rank, const char *location, char *format, ...); - - void* safeMalloc(uint64_t size); - void set_o_direct_flag(int *fd); - -+ior_dataPacketType_e parsePacketType(char t); -+void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int rand_seed, int rank, ior_dataPacketType_e dataPacketType); -+void generate_memory_pattern(char * buf, size_t bytes, int rand_seed, int rank, ior_dataPacketType_e dataPacketType); -+/* check a data buffer, @return 0 if all is correct, otherwise 1 */ -+int verify_memory_pattern(uint64_t item, char * buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType); -+ - char *CurrentTimeString(void); - int Regex(char *, char *); --void ShowFileSystemSize(char *); -+void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options); - void DumpBuffer(void *, size_t); --void SeedRandGen(MPI_Comm); - void SetHints (MPI_Info *, char *); - void ShowHints (MPI_Info *); - char *HumanReadable(IOR_offset_t value, int base); -@@ -62,13 +57,13 @@ void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); - size_t NodeMemoryStringToBytes(char *size_str); - - /* Returns -1, if cannot be read */ --int64_t ReadStoneWallingIterations(char * const filename); -+int64_t ReadStoneWallingIterations(char * const filename, MPI_Comm com); - void StoreStoneWallingIterations(char * const filename, int64_t count); - --void init_clock(void); -+void init_clock(MPI_Comm com); - double GetTimeStamp(void); - char * PrintTimestamp(); // TODO remove this function -- --extern double wall_clock_deviation; --extern double wall_clock_delta; -+unsigned long GetProcessorAndCore(int *chip, int *core); -+void *aligned_buffer_alloc(size_t size, ior_memory_flags type); -+void aligned_buffer_free(void *buf, ior_memory_flags type); - #endif /* !_UTILITIES_H */ -diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh -index 91dba4b..2f82ced 100755 ---- a/testing/basic-tests.sh -+++ b/testing/basic-tests.sh -@@ -15,18 +15,39 @@ MDTEST 1 -a POSIX - MDTEST 2 -a POSIX -W 2 - MDTEST 1 -C -T -r -F -I 1 -z 1 -b 1 -L -u - MDTEST 1 -C -T -I 1 -z 1 -b 1 -u -+MDTEST 2 -n 1 -f 1 -l 2 - --IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 1000k --IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 100k --IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 100k -+IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 2000k -+IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 200k -+IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 200k - --IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 100k -+IOR 2 -a POSIX -w -C -k -e -i1 -m -t 100k -b 200k - -+IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 200k -+IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 200k -+IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 200k -+IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 200k -+IOR 3 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 200k - - IOR 2 -f "$ROOT/test_comments.ior" - -+# Test for JSON output -+IOR 2 -a DUMMY -e -F -t 1m -b 1m -A 328883 -O summaryFormat=JSON -O summaryFile=OUT.json -+python -mjson.tool OUT.json >/dev/null && echo "JSON OK" -+ -+# MDWB -+MDWB 3 -a POSIX -O=1 -D=1 -G=10 -P=1 -I=1 -R=2 -X -+MDWB 3 -a POSIX -O=1 -D=4 -G=10 -P=4 -I=1 -R=2 -X -t=0.001 -L=latency.txt -+MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -R=2 -X -W -w 1 -+MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -1 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats -+MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats -+MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --read-only --run-info-file=mdw.tst --print-detailed-stats -+MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --read-only --run-info-file=mdw.tst --print-detailed-stats -+MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -3 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats -+ -+MDWB 2 -a POSIX -O=1 -D=1 -G=3 -P=2 -I=2 -R=2 -X -S 772 --dataPacketType=t -+DELETE=0 -+MDWB 2 -a POSIX -D=1 -P=2 -I=2 -R=2 -X -G=2252 -S 772 --dataPacketType=i -1 -+MDWB 2 -a POSIX -D=1 -P=2 -I=2 -R=2 -X -G=2252 -S 772 --dataPacketType=i -2 -+MDWB 2 -a POSIX -D=1 -P=2 -I=2 -R=2 -X -G=2252 -S 772 --dataPacketType=i -3 - END -diff --git a/testing/build-hdfs.sh b/testing/build-hdfs.sh -new file mode 100755 -index 0000000..0165dfb ---- /dev/null -+++ b/testing/build-hdfs.sh -@@ -0,0 +1,18 @@ -+#!/bin/bash -+mkdir build-hdfs -+cd build-hdfs -+ -+VER=hadoop-3.2.1 -+if [[ ! -e $VER.tar.gz ]] ; then -+ wget https://www.apache.org/dyn/closer.cgi/hadoop/common/$VER/$VER.tar.gz -+ tar -xf $VER.tar.gz -+fi -+ -+../configure --with-hdfs CFLAGS="-I$PWD/$VER/include/ -O0 -g3" LDFLAGS="-L$PWD/$VER/lib/native -Wl,-rpath=$PWD/$VER/lib/native" -+make -j -+ -+ -+echo "To run execute:" -+echo export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ -+echo export CLASSPATH=$(find $VER/ -name "*.jar" -printf "%p:") -+echo ./src/ior -a HDFS -diff --git a/testing/build-pnetcdf.sh b/testing/build-pnetcdf.sh -new file mode 100755 -index 0000000..ff14571 ---- /dev/null -+++ b/testing/build-pnetcdf.sh -@@ -0,0 +1,16 @@ -+#!/bin/bash -e -+ -+mkdir build-pnetcdf -+cd build-pnetcdf -+VER=pnetcdf-1.12.2.tar.gz -+if [[ ! -e $VER ]] ; then -+ wget https://parallel-netcdf.github.io/Release/$VER -+ tar -xf $VER -+ CUR_DIR=$(pwd) -+ pushd pnetcdf*/ -+ ./configure --prefix=$CUR_DIR/install -+ make -j install -+ popd -+fi -+ -+../configure --with-ncmpi LDFLAGS=-L$(pwd)/install/lib CFLAGS=-I$(pwd)/install/include -diff --git a/testing/complex-tests.sh b/testing/complex-tests.sh -index c314cf9..e4692dd 100755 ---- a/testing/complex-tests.sh -+++ b/testing/complex-tests.sh -@@ -10,22 +10,22 @@ TYPE="advanced" - source $ROOT/test-lib.sh - - #stonewalling tests --IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15 --IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still! --IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30 -+IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15 -k -+IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 -k # max 15 still! -+IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30 -k - - MDTEST 2 -I 20 -a DUMMY -W 1 -x stonewall-md.log -C - MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -T -v - MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -D -v - - #shared tests --IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -r -z-k -e -i1 -m -t 100k -b 100k -+IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 200k -+IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 200k -+IOR 2 -a POSIX -r -z-k -e -i1 -m -t 100k -b 200k - - #test mutually exclusive options --IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k --IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k -+IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 200k -+IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 200k - IOR 2 -a POSIX -w -Z -i1 -m -t 100k -b 100k -d 0.1 - - # Now set the num tasks per node to 1: -diff --git a/testing/docker/ceph/NOTES b/testing/docker/ceph/NOTES -index 2023922..398e4c4 100644 ---- a/testing/docker/ceph/NOTES -+++ b/testing/docker/ceph/NOTES -@@ -7,7 +7,7 @@ Following are basic notes on how to deploy the 'ceph/demo' docker container. The - Run `docker pull ceph/demo` to download the image to your system. - - ################################ --# Deploy 'ceph/demo' conatiner # -+# Deploy 'ceph/demo' container # - ################################ - - To deploy the Ceph cluster, execute the following command: -diff --git a/testing/docker/run-all-tests.sh b/testing/docker/run-all-tests.sh -index 172576f..15d576d 100755 ---- a/testing/docker/run-all-tests.sh -+++ b/testing/docker/run-all-tests.sh -@@ -46,7 +46,7 @@ for IMAGE in $(find -type d | cut -b 3- |grep -v "^$") ; do - done - - if [[ $ERROR != 0 ]] ; then -- echo "Errors occured!" -+ echo "Errors occurred!" - else - echo "OK: all tests passed!" - fi -diff --git a/testing/mdtest-patterns/advanced/3.txt b/testing/mdtest-patterns/advanced/3.txt -index 4c45941..cff653e 100644 ---- a/testing/mdtest-patterns/advanced/3.txt -+++ b/testing/mdtest-patterns/advanced/3.txt -@@ -1,95 +1,92 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir --V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' --V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 --V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 --V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -+V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' -+V-3: Rank 0 will file_test on mdtest_tree.0 -+V-3: Rank 0 Entering file_test on mdtest_tree.0 -+V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' -diff --git a/testing/mdtest-patterns/advanced/4.txt b/testing/mdtest-patterns/advanced/4.txt -index 5d3b7da..62548ae 100644 ---- a/testing/mdtest-patterns/advanced/4.txt -+++ b/testing/mdtest-patterns/advanced/4.txt -@@ -1,52 +1,49 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir --V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 --V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 --V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 --V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 --V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -+V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -+V-3: Rank 0 will file_test on mdtest_tree.0 -+V-3: Rank 0 Entering file_test on mdtest_tree.0 -+V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 -+V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' -diff --git a/testing/mdtest-patterns/advanced/5.txt b/testing/mdtest-patterns/advanced/5.txt -index e87ae0a..7192c35 100644 ---- a/testing/mdtest-patterns/advanced/5.txt -+++ b/testing/mdtest-patterns/advanced/5.txt -@@ -1,77 +1,95 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir --V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' --V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 --V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' --V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -+V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' -+V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -+V-3: Rank 0 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 rename path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -+V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -+V-3: Rank 0 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' -+V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' -+V-3: Rank 0 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -diff --git a/testing/mdtest-patterns/basic/0.txt b/testing/mdtest-patterns/basic/0.txt -index ebe0f14..4c816c5 100644 ---- a/testing/mdtest-patterns/basic/0.txt -+++ b/testing/mdtest-patterns/basic/0.txt -@@ -1,27 +1,25 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir --V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 --V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 --V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' --V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -+V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 rename path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 will file_test on mdtest_tree.0 -+V-3: Rank 0 Entering file_test on mdtest_tree.0 -+V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: rm unique directories path is 'mdtest_tree.0' -+V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -diff --git a/testing/mdtest-patterns/basic/1.txt b/testing/mdtest-patterns/basic/1.txt -index ebe0f14..4c816c5 100644 ---- a/testing/mdtest-patterns/basic/1.txt -+++ b/testing/mdtest-patterns/basic/1.txt -@@ -1,27 +1,25 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir --V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 --V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 --V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' --V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' --V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -+V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 rename path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 will file_test on mdtest_tree.0 -+V-3: Rank 0 Entering file_test on mdtest_tree.0 -+V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -+V-3: Rank 0 file_test: rm unique directories path is 'mdtest_tree.0' -+V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -diff --git a/testing/mdtest-patterns/basic/2.txt b/testing/mdtest-patterns/basic/2.txt -index 77f5c78..099b265 100644 ---- a/testing/mdtest-patterns/basic/2.txt -+++ b/testing/mdtest-patterns/basic/2.txt -@@ -1,29 +1,26 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1694 i 1 nstride 0 --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir --V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 --V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 --V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 --V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0 --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 310 create_remove_items_helper (non-dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 1158 file_test: rm unique directories path is '/dev/shm/mdest/test-dir.0-0/' --V-3: Rank 0 Line 1754 main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 i 1 nstride 0 -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir -+V-3: Rank 0 will file_test on mdtest_tree.0.0 -+V-3: Rank 0 Entering file_test on mdtest_tree.0.0 -+V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 -+V-3: Rank 0 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0 -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 create_remove_items_helper (non-dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 file_test: rm unique directories path is '/dev/shm/mdest/test-dir.0-0/' -+V-3: Rank 0 main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -diff --git a/testing/mdtest-patterns/basic/3.txt b/testing/mdtest-patterns/basic/3.txt -index eafadc1..cf925f8 100644 ---- a/testing/mdtest-patterns/basic/3.txt -+++ b/testing/mdtest-patterns/basic/3.txt -@@ -1,34 +1,31 @@ --V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... --V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' --V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' --V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' --V-3: Rank 0 Line 1694 i 1 nstride 0 --V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir --V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 --V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 --V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 --V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 --V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' --V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... --V-3: Rank 0 Line 373 create_remove_items_helper: close... --V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' --V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 --V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 -+V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' -+V-3: Rank 0 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -+V-3: Rank 0 i 1 nstride 0 -+V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir -+V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 -+V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 -+V-3: Rank 0 will file_test on mdtest_tree.0.0 -+V-3: Rank 0 Entering file_test on mdtest_tree.0.0 -+V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' -+V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... -+V-3: Rank 0 create_remove_items_helper: close... -+V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -+V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 -+V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 -diff --git a/testing/s3.sh b/testing/s3.sh -new file mode 100755 -index 0000000..b38d339 ---- /dev/null -+++ b/testing/s3.sh -@@ -0,0 +1,33 @@ -+#!/bin/bash -+ -+# Test basic S3 behavior using minio. -+ -+ROOT="$(dirname ${BASH_SOURCE[0]})" -+TYPE="basic" -+ -+if [[ ! -e $ROOT/minio ]] ; then -+ wget https://dl.min.io/server/minio/release/linux-amd64/minio -+ mv minio $ROOT -+ chmod +x $ROOT/minio -+fi -+ -+export MINIO_ACCESS_KEY=accesskey -+export MINIO_SECRET_KEY=secretkey -+ -+$ROOT/minio --quiet server /dev/shm & -+ -+export IOR_EXTRA="-o test" -+export MDTEST_EXTRA="-d test" -+source $ROOT/test-lib.sh -+ -+I=100 # Start with this ID -+IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024*1024)) -t $((10*1024*1024)) -+MDTEST 2 -a S3-libs3 -L --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -n 10 -+MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -n 5 -w 1024 -e 1024 -+ -+IOR 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024)) -t $((10*1024)) --S3.bucket-per-file -+MDTEST 1 -a S3-libs3 -L --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file -n 5 -+MDTEST 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file -n 10 -w 1024 -e 1024 -+ -+ -+kill -9 %1 -diff --git a/testing/test-lib.sh b/testing/test-lib.sh -index 444873d..a7e23fb 100644 ---- a/testing/test-lib.sh -+++ b/testing/test-lib.sh -@@ -7,12 +7,17 @@ - # Example: export IOR_EXTRA="-v -v -v" - - IOR_MPIRUN=${IOR_MPIRUN:-mpiexec -np} -+if ${IOR_MPIRUN} 1 --oversubscribe true ; then -+ IOR_MPIRUN="mpiexec --oversubscribe -np" -+fi - IOR_BIN_DIR=${IOR_BIN_DIR:-./src} --IOR_OUT=${IOR_OUT:-./test_logs} -+IOR_OUT=${IOR_OUT:-./test_logs/$TYPE} - IOR_TMP=${IOR_TMP:-/dev/shm} - IOR_EXTRA=${IOR_EXTRA:-} # Add global options like verbosity - MDTEST_EXTRA=${MDTEST_EXTRA:-} - MDTEST_TEST_PATTERNS=${MDTEST_TEST_PATTERNS:-../testing/mdtest-patterns/$TYPE} -+MDWB_EXTRA=${MDWB_EXTRA:-} -+ - - ################################################################################ - mkdir -p ${IOR_OUT} -@@ -40,7 +45,7 @@ I=0 - function IOR(){ - RANKS=$1 - shift -- WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/ior ${@} ${IOR_EXTRA} -o ${IOR_TMP}/ior" -+ WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/ior ${@} -o ${IOR_TMP}/ior ${IOR_EXTRA}" - $WHAT 1>"${IOR_OUT}/test_out.$I" 2>&1 - if [[ $? != 0 ]]; then - echo -n "ERR" -@@ -56,15 +61,15 @@ function MDTEST(){ - RANKS=$1 - shift - rm -rf ${IOR_TMP}/mdest -- WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} ${MDTEST_EXTRA} -d ${IOR_TMP}/mdest -V=4" -+ WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} -d ${IOR_TMP}/mdest ${MDTEST_EXTRA} -V=4" - $WHAT 1>"${IOR_OUT}/test_out.$I" 2>&1 - if [[ $? != 0 ]]; then - echo -n "ERR" - ERRORS=$(($ERRORS + 1)) - else - # compare basic pattern -+ grep "V-3" "${IOR_OUT}/test_out.$I" | sed "s/Line *[0-9]*//" > "${IOR_OUT}/tmp" - if [[ -r ${MDTEST_TEST_PATTERNS}/$I.txt ]] ; then -- grep "V-3" "${IOR_OUT}/test_out.$I" > "${IOR_OUT}/tmp" - cmp -s "${IOR_OUT}/tmp" ${MDTEST_TEST_PATTERNS}/$I.txt - if [[ $? != 0 ]]; then - mv "${IOR_OUT}/tmp" ${IOR_OUT}/tmp.$I -@@ -74,7 +79,7 @@ function MDTEST(){ - if [[ ! -e ${MDTEST_TEST_PATTERNS} ]] ; then - mkdir -p ${MDTEST_TEST_PATTERNS} - fi -- grep "V-3" "${IOR_OUT}/test_out.$I" > ${MDTEST_TEST_PATTERNS}/$I.txt -+ mv "${IOR_OUT}/tmp" ${MDTEST_TEST_PATTERNS}/$I.txt - fi - echo -n "OK " - fi -@@ -82,6 +87,25 @@ function MDTEST(){ - I=$((${I}+1)) - } - -+function MDWB(){ -+ RANKS=$1 -+ shift -+ if [[ "$DELETE" != "0" ]] ; then -+ rm -rf "${IOR_TMP}/md-workbench" -+ fi -+ WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/md-workbench ${@} -o ${IOR_TMP}/md-workbench ${MDWB_EXTRA}" -+ LOG="${IOR_OUT}/test_out.$I" -+ $WHAT 1>"$LOG" 2>&1 -+ if [[ $? != 0 ]] || grep '!!!' "$LOG" ; then -+ echo -n "ERR" -+ ERRORS=$(($ERRORS + 1)) -+ else -+ echo -n "OK " -+ fi -+ echo " $WHAT" -+ I=$((${I}+1)) -+} -+ - function END(){ - if [[ ${ERRORS} == 0 ]] ; then - echo "PASSED" -diff --git a/testing/test_comments.ior b/testing/test_comments.ior -index eaf7997..1472e8f 100644 ---- a/testing/test_comments.ior -+++ b/testing/test_comments.ior -@@ -2,16 +2,16 @@ - IOR START - api=posix - writeFile =1 -- randomOffset=1 -+ randomOffset=1 - reorderTasks=1 -- filePerProc=1 -+ filePerProc=1 - keepFile=1 - fsync=1 - repetitions=1 - multiFile=1 - # tab-prefixed comment --transferSize=100k --blockSize=100k -+transferSize=10k -+blockSize=20k - # space-prefixed comment - run - --dummy.delay-create=1000 diff --git a/Makefile b/Makefile index b42f7ad..87b3b73 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,5 @@ NAME := ior SRC_EXT := gz -PKG_GIT_COMMIT := d3574d536643475269d37211e283b49ebd6732d7 GITHUB_PROJECT := hpc/$(NAME) # This list of files that are in the upstream git repo but are not included in upstream's releases PATCH_EXCLUDE_FILES := .travis.yml README_DAOS doc/sphinx/ diff --git a/ior.spec b/ior.spec index 70e2b13..1b688f1 100644 --- a/ior.spec +++ b/ior.spec @@ -7,8 +7,8 @@ %global shortcommit %(c=%{commit};echo ${c:0:7}) Name: ior -Version: 3.3.0 -Release: 20%{?commit:.g%{shortcommit}}%{?dist} +Version: 4.0.0 +Release: 1%{?commit:.g%{shortcommit}}%{?dist} Summary: IOR-HPC @@ -50,10 +50,8 @@ IOR-HPC %prep %autosetup -p1 -%if "%{?commit}" != "" -# we most likely patched configure.ac +# we patched configure.ac autoreconf -%endif %build export CC=mpicc @@ -107,6 +105,9 @@ EOF %endif %changelog +* Fri Jan 12 2024 Dalton A. Bohning - 4.0.0-1 +- Update to 4.0.0 release + * Tue Jul 04 2023 Brian J. Murrell - 3.3.0-20 - Add BR: mercury-devel - Remove static library From fbc56816cb7650281dc2c0207380536e69243ebc Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 12 Jan 2024 19:03:24 +0000 Subject: [PATCH 02/15] update packaging Signed-off-by: Dalton Bohning --- packaging/Dockerfile.centos.7 | 30 +++++++++++++++++++++++++---- packaging/Dockerfile.mockbuild | 14 +++++++++----- packaging/Makefile_distro_vars.mk | 8 ++++++++ packaging/Makefile_packaging.mk | 8 ++++++-- packaging/rpm_chrootbuild | 32 ++++++++++++++++--------------- 5 files changed, 66 insertions(+), 26 deletions(-) diff --git a/packaging/Dockerfile.centos.7 b/packaging/Dockerfile.centos.7 index cdfb7f6..189ea1e 100644 --- a/packaging/Dockerfile.centos.7 +++ b/packaging/Dockerfile.centos.7 @@ -5,9 +5,31 @@ # # Pull base image -FROM centos:7 +FROM centos:centos7 LABEL maintainer="daos@daos.groups.io" +# Use local repo server if present +ARG REPO_FILE_URL +RUN set -e; \ + if [ -n "$REPO_FILE_URL" ]; then \ + cd /etc/yum.repos.d/ && \ + curl -k -f -o daos_ci-centos7-artifactory.repo.tmp \ + "$REPO_FILE_URL"daos_ci-centos7-artifactory.repo && \ + for file in *.repo; do \ + true > $file; \ + done; \ + mv daos_ci-centos7-artifactory.repo{.tmp,}; \ + fi; \ + yum -y install dnf; \ + yum clean all; \ + dnf --disablerepo \*epel\* -y install epel-release \ + dnf-plugins-core; \ + if [ -n "$REPO_FILE_URL" ]; then \ + dnf -y --quiet config-manager --disable epel; \ + fi; \ + dnf -y update epel-release; \ + dnf -y clean all + # use same UID as host and default value of 1000 if not specified ARG UID=1000 @@ -15,9 +37,9 @@ ARG UID=1000 #Nothing to do for CentOS # Install basic tools -RUN yum install -y epel-release -RUN yum install -y mock make rpm-build curl createrepo rpmlint redhat-lsb-core \ - git python-srpm-macros dnf +RUN dnf install -y epel-release +RUN dnf install -y mock make rpm-build curl createrepo rpmlint redhat-lsb-core \ + git python-srpm-macros dnf && dnf -y clean all # Add build user (to keep rpmbuild happy) ENV USER build diff --git a/packaging/Dockerfile.mockbuild b/packaging/Dockerfile.mockbuild index c8bc1a4..edfd973 100644 --- a/packaging/Dockerfile.mockbuild +++ b/packaging/Dockerfile.mockbuild @@ -5,7 +5,7 @@ # # Pull base image -ARG FVERSION=38 +ARG FVERSION=latest FROM fedora:$FVERSION # Needed for later use of FVERSION ARG FVERSION @@ -15,16 +15,19 @@ LABEL maintainer="daos@daos.groups.io" ARG REPO_FILE_URL RUN if [ -n "$REPO_FILE_URL" ]; then \ cd /etc/yum.repos.d/ && \ - curl -f -o daos_ci-fedora-artifactory.repo.tmp \ + curl -k -f -o daos_ci-fedora-artifactory.repo.tmp \ "$REPO_FILE_URL"daos_ci-fedora-artifactory.repo && \ - rm -f *.repo && \ + for file in *.repo; do \ + true > $file; \ + done; \ mv daos_ci-fedora-artifactory.repo{.tmp,}; \ fi # Install basic tools RUN dnf -y install mock make \ rpm-build createrepo rpmlint redhat-lsb-core git \ - python-srpm-macros rpmdevtools + python-srpm-macros rpmdevtools && \ + dnf -y clean all # use same UID as host and default value of 1000 if not specified ARG UID=1000 @@ -45,7 +48,8 @@ RUN dnf -y upgrade && \ # https://github.com/rpm-software-management/rpmlint/pull/795 in it # But make sure to patch after dnf upgrade so that an upgraded rpmlint # RPM doesn't wipe out our patch -COPY packaging/rpmlint--ignore-unused-rpmlintrc.patch . +ARG PACKAGINGDIR=packaging +COPY ${PACKAGINGDIR}/rpmlint--ignore-unused-rpmlintrc.patch . RUN (cd $(python3 -c 'import site; print(site.getsitepackages()[-1])') && \ if ! grep -e --ignore-unused-rpmlintrc rpmlint/cli.py; then \ if ! patch -p1; then \ diff --git a/packaging/Makefile_distro_vars.mk b/packaging/Makefile_distro_vars.mk index 6a7f88b..4e8a09d 100644 --- a/packaging/Makefile_distro_vars.mk +++ b/packaging/Makefile_distro_vars.mk @@ -83,6 +83,14 @@ DISTRO_VERSION ?= $(VERSION_ID) ORIG_TARGET_VER := 15.4 SED_EXPR := 1p endif +ifeq ($(CHROOT_NAME),opensuse-leap-15.5-x86_64) +VERSION_ID := 15.5 +DISTRO_ID := sl15.5 +DISTRO_BASE := LEAP_15 +DISTRO_VERSION ?= $(VERSION_ID) +ORIG_TARGET_VER := 15.5 +SED_EXPR := 1p +endif endif ifeq ($(ID),centos) ID = el diff --git a/packaging/Makefile_packaging.mk b/packaging/Makefile_packaging.mk index 2fdcaed..f347735 100644 --- a/packaging/Makefile_packaging.mk +++ b/packaging/Makefile_packaging.mk @@ -54,7 +54,8 @@ RPM_BUILD_OPTIONS := $(BUILD_DEFINES) GIT_DIFF_EXCLUDES := $(PATCH_EXCLUDE_FILES:%=':!%') endif -COMMON_RPM_ARGS = --define "_topdir $$PWD/_topdir" $(BUILD_DEFINES) +FVERSION ?= latest +COMMON_RPM_ARGS := --define "_topdir $$PWD/_topdir" $(BUILD_DEFINES) SPEC := $(shell if [ -f $(NAME)-$(DISTRO_BASE).spec ]; then echo $(NAME)-$(DISTRO_BASE).spec; else echo $(NAME).spec; fi) VERSION = $(eval VERSION := $(shell rpm $(COMMON_RPM_ARGS) --specfile --qf '%{version}\n' $(SPEC) | sed -n '1p'))$(VERSION) DEB_RVERS := $(subst $(DOT),\$(DOT),$(VERSION)) @@ -369,6 +370,7 @@ endif podman_chrootbuild: if ! podman build --build-arg REPO_FILE_URL=$(REPO_FILE_URL) \ + --build-arg FVERSION=$(FVERSION) \ -t $(subst +,-,$(CHROOT_NAME))-chrootbuild \ -f packaging/Dockerfile.mockbuild .; then \ echo "Container build failed"; \ @@ -386,7 +388,9 @@ podman_chrootbuild: exit 1; \ fi; \ rpmlint $$(ls /var/lib/mock/$(CHROOT_NAME)/result/*.rpm | \ - grep -v -e debuginfo -e debugsource -e src.rpm)' + grep -v -e debuginfo -e debugsource -e src.rpm)'; then \ + exit 1; \ + fi docker_chrootbuild: if ! $(DOCKER) build --build-arg UID=$$(id -u) -t chrootbuild \ diff --git a/packaging/rpm_chrootbuild b/packaging/rpm_chrootbuild index d122e0e..a19308e 100755 --- a/packaging/rpm_chrootbuild +++ b/packaging/rpm_chrootbuild @@ -2,10 +2,7 @@ set -uex -original_cfg_file="/etc/mock/$CHROOT_NAME.cfg" -cfg_file=mock.cfg - -cp "$original_cfg_file" "$cfg_file" +cp /etc/mock/"$CHROOT_NAME".cfg mock.cfg if [[ $CHROOT_NAME == *epel-8-x86_64 ]]; then cat <> mock.cfg @@ -22,7 +19,7 @@ if [[ $CHROOT_NAME == *epel-7-x86_64 ]]; then fi # Allow BR: foo-devel < 1.2 to work when foo-devel-1.3 is actually available -cat <> "$cfg_file" +cat <> mock.cfg config_opts['dnf.conf'] += """ [main] best=0 @@ -33,7 +30,7 @@ EOF repo_adds=() repo_dels=() -echo -e "config_opts['yum.conf'] += \"\"\"\n" >> "$cfg_file" +echo -e "config_opts['yum.conf'] += \"\"\"\n" >> mock.cfg if [ -n "${ARTIFACTORY_URL:-}" ] && "$LOCAL_REPOS"; then repo_dels+=("--disablerepo=\*") @@ -56,7 +53,7 @@ if [ -n "${ARTIFACTORY_URL:-}" ] && "$LOCAL_REPOS"; then REPO_FILE_URL="file://$(readlink -e "$REPO_FILES_PR")/" fi fi - curl -sSf "${REPO_FILE_URL}daos_ci-$DISTRO"-mock-artifactory.repo >> "$cfg_file" + curl -sSf "$REPO_FILE_URL"daos_ci-"${CHROOT_NAME%-*}".repo >> mock.cfg repo_adds+=("--enablerepo *-artifactory") fi fi @@ -81,9 +78,9 @@ for repo in $DISTRO_BASE_PR_REPOS $PR_REPOS; do repo_adds+=("--enablerepo $repo:${branch//[@\/]/_}:$build_number") echo -e "[$repo:${branch//[@\/]/_}:$build_number]\n\ name=$repo:${branch//[@\/]/_}:$build_number\n\ -baseurl=${JENKINS_URL:-https://build.hpdd.intel.com/}job/daos-stack/job/$repo/job/${branch//\//%2F}/$build_number/artifact/artifacts/$DISTRO/\n\ +baseurl=${ARTIFACTS_URL:-${JENKINS_URL:-https://build.hpdd.intel.com/}job/}daos-stack/job/$repo/job/${branch//\//%2F}/$build_number/artifact/artifacts/$DISTRO/\n\ enabled=1\n\ -gpgcheck=False\n" >> "$cfg_file" +gpgcheck=False\n" >> mock.cfg done for repo in $JOB_REPOS; do repo_name=${repo##*://} @@ -97,9 +94,9 @@ for repo in $JOB_REPOS; do echo -e "[${repo_name//[@\/]/_}]\n\ name=${repo_name}\n\ baseurl=${repo//\//%2F}\n\ -enabled=1\n" >> "$cfg_file" +enabled=1\n" >> mock.cfg done -echo "\"\"\"" >> "$cfg_file" +echo "\"\"\"" >> mock.cfg if [ -n "$DISTRO_VERSION" ]; then releasever_opt=("--config-opts=releasever=$DISTRO_VERSION") @@ -111,11 +108,14 @@ if ls -l /scratch/mock/cache/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz; flock "$bs_dir" -c "cp -a $bs_dir/root_cache /var/cache/mock/${CHROOT_NAME}-bootstrap" fi -# shellcheck disable=SC2086 -eval mock -r "$cfg_file" ${repo_dels[*]} ${repo_adds[*]} --disablerepo=\*-debug* \ - "${releasever_opt[@]}" $MOCK_OPTIONS $RPM_BUILD_OPTIONS "$TARGET" +rc=0 +# shellcheck disable=SC2086,SC2048 +if ! eval mock -r mock.cfg ${repo_dels[*]} ${repo_adds[*]} --no-clean \ + --disablerepo=\*-debug* ${releasever_opt[*]} $MOCK_OPTIONS \ + $RPM_BUILD_OPTIONS "$TARGET"; then + rc=${PIPESTATUS[0]} +fi -date if ls -l /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz && [ -d /scratch/ ]; then mkdir -p /scratch/mock/cache/"${CHROOT_NAME}"-bootstrap/ @@ -123,3 +123,5 @@ if ls -l /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz && flock "$bs_dir" -c "cp -a /var/cache/mock/${CHROOT_NAME}-bootstrap/root_cache $bs_dir/" fi fi + +exit "$rc" From 24ecda71dcc533848f09852c0e885d696b316b05 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 12 Jan 2024 19:51:58 +0000 Subject: [PATCH 03/15] do not build centos7 Signed-off-by: Dalton Bohning --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 860452b..8dca98e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -41,5 +41,5 @@ //@Library(value="pipeline-lib@your_branch") _ /* groovylint-disable-next-line CompileStatic */ -packageBuildingPipelineDAOSTest(['distros' : ['centos7', 'el8', 'el9', 'leap15'], +packageBuildingPipelineDAOSTest(['distros' : ['el8', 'el9', 'leap15'], 'test-tag': 'ior']) From 3edbdbc03bd9eb8ae0bd824552f0e366cfe9e948 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Jan 2024 16:39:17 +0000 Subject: [PATCH 04/15] Remove BR: mercury-devel and use upstream patch Signed-off-by: Dalton Bohning --- daos-configure.patch | 16 ---------------- ior.spec | 6 ++++-- 2 files changed, 4 insertions(+), 18 deletions(-) delete mode 100644 daos-configure.patch diff --git a/daos-configure.patch b/daos-configure.patch deleted file mode 100644 index 01b305d..0000000 --- a/daos-configure.patch +++ /dev/null @@ -1,16 +0,0 @@ ---- a/configure.ac -+++ b/configure.ac -@@ -307,7 +307,12 @@ - [], [with_daos=no]) - AS_IF([test "x$with_daos" != xno], [ - DAOS="yes" -- LDFLAGS="$LDFLAGS -L$with_daos/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_daos/lib64" -+ if test -d $with_daos/lib/x86_64-linux-gnu/; then -+ LIB_DIR=$with_daos/lib/x86_64-linux-gnu -+ else -+ LIB_DIR=$with_daos/lib64 -+ fi -+ LDFLAGS="$LDFLAGS -L$LIB_DIR -Wl,--enable-new-dtags -Wl,-rpath=$LIB_DIR -lmercury" - CPPFLAGS="$CPPFLAGS -I$with_daos/include" - AC_CHECK_HEADERS(gurt/common.h,, [unset DAOS]) - AC_CHECK_HEADERS(daos.h,, [unset DAOS]) diff --git a/ior.spec b/ior.spec index 1b688f1..9a5dde3 100644 --- a/ior.spec +++ b/ior.spec @@ -18,7 +18,8 @@ Source0: https://github.com/hpc/%{name}/releases/download/%{version}/%{name}- %if "%{?commit}" != "" Patch1: %{version}..%{commit}.patch %endif -Patch3: daos-configure.patch +# patch configure.ac +Patch3: https://github.com/hpc/ior/commit/38064419cbe959cb538695e51b2bc2a91d6971f7.patch BuildRequires: mpich-devel BuildRequires: hwloc-devel @@ -27,7 +28,6 @@ BuildRequires: unzip BuildRequires: autoconf, automake BuildRequires: daos-devel BuildRequires: hdf5-mpich-devel%{?_isa} -BuildRequires: mercury-devel BuildRequires: chrpath %if (0%{?suse_version} >= 1500) BuildRequires: lua-lmod @@ -107,6 +107,8 @@ EOF %changelog * Fri Jan 12 2024 Dalton A. Bohning - 4.0.0-1 - Update to 4.0.0 release +- Remove BR: mercury-devel +- Use upstream configure.ac patch instead of local * Tue Jul 04 2023 Brian J. Murrell - 3.3.0-20 - Add BR: mercury-devel From e4346a462e38a47a407a1436b72524075e652c53 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Jan 2024 17:40:46 +0000 Subject: [PATCH 05/15] attempt to fix license linting Signed-off-by: Dalton Bohning --- ior.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ior.spec b/ior.spec index 9a5dde3..fa65120 100644 --- a/ior.spec +++ b/ior.spec @@ -12,7 +12,7 @@ Release: 1%{?commit:.g%{shortcommit}}%{?dist} Summary: IOR-HPC -License: GPL +License: GPL-2.0-only URL: https://github.com/hpc/%{name}/ Source0: https://github.com/hpc/%{name}/releases/download/%{version}/%{name}-%{version}.tar.gz %if "%{?commit}" != "" From ef4c220f15b92bbe7ae58037bd49643baaf018a5 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Jan 2024 18:04:17 +0000 Subject: [PATCH 06/15] attempt fix rpm lint file-not-in-%lang Signed-off-by: Dalton Bohning --- ior.spec | 1 + 1 file changed, 1 insertion(+) diff --git a/ior.spec b/ior.spec index fa65120..4c1d839 100644 --- a/ior.spec +++ b/ior.spec @@ -94,6 +94,7 @@ $MPI_MAN/man1/* EOF %endif +%find_lang mdtest %if (0%{?suse_version} >= 1) %files From 4864ccd9c4be026e02d64a4dbc9ab86d7b039563 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Jan 2024 18:18:28 +0000 Subject: [PATCH 07/15] try --with-man Signed-off-by: Dalton Bohning --- ior.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ior.spec b/ior.spec index 4c1d839..7aa522f 100644 --- a/ior.spec +++ b/ior.spec @@ -94,7 +94,7 @@ $MPI_MAN/man1/* EOF %endif -%find_lang mdtest +%find_lang mdtest --with-man %if (0%{?suse_version} >= 1) %files From ecea31d845f3b16b0ddf5742eab539934878e5a9 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Jan 2024 18:28:13 +0000 Subject: [PATCH 08/15] move find_lang Signed-off-by: Dalton Bohning --- ior.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ior.spec b/ior.spec index 7aa522f..832934d 100644 --- a/ior.spec +++ b/ior.spec @@ -94,15 +94,15 @@ $MPI_MAN/man1/* EOF %endif -%find_lang mdtest --with-man - %if (0%{?suse_version} >= 1) %files %{_bindir}/* %{_defaultdocdir}/%{name}/ %{_mandir}/man1/* %else +%find_lang mdtest --with-man %files -f files.mpich +%files -f mdtest.lang %endif %changelog From 062d5c4b36dea6bdeee3360f9f875ff2685108ac Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Jan 2024 23:36:22 +0000 Subject: [PATCH 09/15] add V=1 Signed-off-by: Dalton Bohning --- ior.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ior.spec b/ior.spec index 832934d..d2cabc8 100644 --- a/ior.spec +++ b/ior.spec @@ -71,11 +71,11 @@ fi %else %configure --with-mpiio --with-daos=/usr --with-hdf5 --bindir=$MPI_BIN --mandir=$MPI_MAN --libdir=$MPI_LIB --includedir=$MPI_INCLUDE --datadir=%{_datadir}/doc/ior-mpich %endif -%make_build +%make_build V=1 %install %module_load mpich -%make_install +%make_install V=1 %if 0%{?suse_version} MPI_LIB=%{_libdir} From af3ed3899709b8cf69ee1ae2d4b005c0cd95de26 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Thu, 18 Jan 2024 22:01:16 +0000 Subject: [PATCH 10/15] try both -fPIC and -fPIE for giggles Signed-off-by: Dalton Bohning --- ior.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ior.spec b/ior.spec index d2cabc8..3357509 100644 --- a/ior.spec +++ b/ior.spec @@ -58,8 +58,8 @@ export CC=mpicc export CXX=mpicxx export FC=mpif90 export F77=mpif77 -export CFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fPIC" -export CXXFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fPIC" +export CFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fPIC -fPIE" +export CXXFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fPIC -fPIE" if [ ! -f configure ]; then # probably a git tarball ./bootstrap From 1d60a7d09a28da42af3e26c94af6e646f0a2f2d8 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Wed, 28 Feb 2024 23:04:28 +0000 Subject: [PATCH 11/15] update packaging again Signed-off-by: Dalton Bohning --- packaging/Dockerfile.mockbuild | 18 ++++++--- packaging/Makefile_packaging.mk | 1 + packaging/ccache-stats.patch | 66 +++++++++++++++++++++++++++++++++ packaging/get_base_branch | 22 +++++++++++ packaging/rpm_chrootbuild | 33 ++++++++++++----- 5 files changed, 125 insertions(+), 15 deletions(-) create mode 100644 packaging/ccache-stats.patch create mode 100755 packaging/get_base_branch diff --git a/packaging/Dockerfile.mockbuild b/packaging/Dockerfile.mockbuild index edfd973..76a6e94 100644 --- a/packaging/Dockerfile.mockbuild +++ b/packaging/Dockerfile.mockbuild @@ -1,5 +1,5 @@ # -# Copyright 2018-2023 Intel Corporation +# Copyright 2018-2024 Intel Corporation # # 'recipe' for Docker to build an RPM # @@ -48,16 +48,24 @@ RUN dnf -y upgrade && \ # https://github.com/rpm-software-management/rpmlint/pull/795 in it # But make sure to patch after dnf upgrade so that an upgraded rpmlint # RPM doesn't wipe out our patch +# Ditto for the patch to zero and display ccache stats +# https://github.com/rpm-software-management/mock/pull/1299 ARG PACKAGINGDIR=packaging -COPY ${PACKAGINGDIR}/rpmlint--ignore-unused-rpmlintrc.patch . +COPY ${PACKAGINGDIR}/*.patch ./ RUN (cd $(python3 -c 'import site; print(site.getsitepackages()[-1])') && \ if ! grep -e --ignore-unused-rpmlintrc rpmlint/cli.py; then \ - if ! patch -p1; then \ + if ! patch -p1 < $OLDPWD/rpmlint--ignore-unused-rpmlintrc.patch; then \ exit 1; \ fi; \ rm -f rpmlint/__pycache__/{cli,lint}.*.pyc; \ - fi) < rpmlint--ignore-unused-rpmlintrc.patch; \ - rm -f rpmlint--ignore-unused-rpmlintrc.patch + fi; \ + if ! grep _ccachePostBuildHook mockbuild/plugins/ccache.py; then \ + if ! patch -p3 < $OLDPWD/ccache-stats.patch; then \ + exit 1; \ + fi; \ + rm -f mockbuild/plugins/__pycache__/ccache.*.pyc; \ + fi); \ + rm -f rpmlint--ignore-unused-rpmlintrc.patch ccache-stats.patch # show the release that was built ARG CACHEBUST diff --git a/packaging/Makefile_packaging.mk b/packaging/Makefile_packaging.mk index f347735..3201a22 100644 --- a/packaging/Makefile_packaging.mk +++ b/packaging/Makefile_packaging.mk @@ -364,6 +364,7 @@ chrootbuild: $(SRPM) $(CALLING_MAKEFILE) LOCAL_REPOS='$(LOCAL_REPOS)' \ ARTIFACTORY_URL="$(ARTIFACTORY_URL)" \ DISTRO_VERSION="$(DISTRO_VERSION)" \ + PACKAGE="$(NAME)" \ TARGET="$<" \ packaging/rpm_chrootbuild endif diff --git a/packaging/ccache-stats.patch b/packaging/ccache-stats.patch new file mode 100644 index 0000000..26d5eeb --- /dev/null +++ b/packaging/ccache-stats.patch @@ -0,0 +1,66 @@ +From e87d916d7f49ea4949973adf0f09e9e5bf891e03 Mon Sep 17 00:00:00 2001 +From: "Brian J. Murrell" +Date: Tue, 30 Jan 2024 11:03:12 -0500 +Subject: [PATCH 1/2] Show ccache stats at the end of the build + +Zero the ccache stats at the beginning of the build and then display the +ccache stats at the end of the build to see how effective ccache was. + +Signed-off-by: Brian J. Murrell +--- + mock/py/mockbuild/plugins/ccache.py | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/mock/py/mockbuild/plugins/ccache.py b/mock/py/mockbuild/plugins/ccache.py +index 2666ad9fc..1080ffe68 100644 +--- a/mock/py/mockbuild/plugins/ccache.py ++++ b/mock/py/mockbuild/plugins/ccache.py +@@ -35,6 +35,7 @@ def __init__(self, plugins, conf, buildroot): + buildroot.preexisting_deps.append("ccache") + plugins.add_hook("prebuild", self._ccacheBuildHook) + plugins.add_hook("preinit", self._ccachePreInitHook) ++ plugins.add_hook("postbuild", self._ccachePostBuildHook) + buildroot.mounts.add( + BindMountPoint(srcpath=self.ccachePath, bindpath=buildroot.make_chroot_path("/var/tmp/ccache"))) + +@@ -47,6 +48,9 @@ def __init__(self, plugins, conf, buildroot): + @traceLog() + def _ccacheBuildHook(self): + self.buildroot.doChroot(["ccache", "-M", str(self.ccache_opts['max_cache_size'])], shell=False) ++ # zero ccache stats ++ getLog().info("Zero ccache stats:") ++ self.buildroot.doChroot(["ccache", "--zero-stats"], printOutput=True, shell=False) + + # set up the ccache dir. + # we also set a few variables used by ccache to find the shared cache. +@@ -61,3 +65,10 @@ def _ccachePreInitHook(self): + file_util.mkdirIfAbsent(self.buildroot.make_chroot_path('/var/tmp/ccache')) + file_util.mkdirIfAbsent(self.ccachePath) + self.buildroot.uid_manager.changeOwner(self.ccachePath, recursive=True) ++ ++ # get some cache stats ++ def _ccachePostBuildHook(self): ++ # show the cache hit stats ++ getLog().info("ccache stats:") ++ self.buildroot.doChroot(["ccache", "--show-stats"], printOutput=True, shell=False) +++ + +From bfd3a7e1bb47d28ee60a94cb5985c1f66476475f Mon Sep 17 00:00:00 2001 +From: "Brian J. Murrell" +Date: Tue, 30 Jan 2024 11:17:48 -0500 +Subject: [PATCH 2/2] Remove extraneous line + +Signed-off-by: Brian J. Murrell +--- + mock/py/mockbuild/plugins/ccache.py | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/mock/py/mockbuild/plugins/ccache.py b/mock/py/mockbuild/plugins/ccache.py +index 1080ffe68..1a20846d3 100644 +--- a/mock/py/mockbuild/plugins/ccache.py ++++ b/mock/py/mockbuild/plugins/ccache.py +@@ -71,4 +71,3 @@ def _ccachePostBuildHook(self): + # show the cache hit stats + getLog().info("ccache stats:") + self.buildroot.doChroot(["ccache", "--show-stats"], printOutput=True, shell=False) +-+ diff --git a/packaging/get_base_branch b/packaging/get_base_branch new file mode 100755 index 0000000..27515a7 --- /dev/null +++ b/packaging/get_base_branch @@ -0,0 +1,22 @@ +#!/bin/bash + +# find the base branch of the current branch + +set -eux -o pipefail +IFS=' ' read -r -a add_bases <<< "${1:-}" +origin=origin +mapfile -t all_bases < <(echo "master" + git branch -r | sed -ne "/^ $origin\\/release\\/[0-9]/s/^ $origin\\///p") +all_bases+=("${add_bases[@]}") +TARGET="master" +min_diff=-1 +for base in "${all_bases[@]}"; do + git rev-parse --verify "$origin/$base" &> /dev/null || continue + commits_ahead=$(git log --oneline "$origin/$base..HEAD" | wc -l) + if [ "$min_diff" -eq -1 ] || [ "$min_diff" -gt "$commits_ahead" ]; then + TARGET="$base" + min_diff=$commits_ahead + fi +done +echo "$TARGET" +exit 0 diff --git a/packaging/rpm_chrootbuild b/packaging/rpm_chrootbuild index a19308e..4dcdaa4 100755 --- a/packaging/rpm_chrootbuild +++ b/packaging/rpm_chrootbuild @@ -4,6 +4,13 @@ set -uex cp /etc/mock/"$CHROOT_NAME".cfg mock.cfg +# Enable mock ccache plugin +cat <> mock.cfg +config_opts['plugin_conf']['ccache_enable'] = True +config_opts['plugin_conf']['ccache_opts']['dir'] = "%(cache_topdir)s/%(root)s/ccache/" +EOF + + if [[ $CHROOT_NAME == *epel-8-x86_64 ]]; then cat <> mock.cfg config_opts['module_setup_commands'] = [ @@ -103,24 +110,30 @@ if [ -n "$DISTRO_VERSION" ]; then fi bs_dir=/scratch/mock/cache/"${CHROOT_NAME}"-bootstrap -if ls -l /scratch/mock/cache/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz; then - mkdir -p "/var/cache/mock/${CHROOT_NAME}-bootstrap" +if ls -l "$bs_dir"/root_cache/cache.tar.gz; then + mkdir -p "/var/cache/mock/${CHROOT_NAME}-bootstrap/" flock "$bs_dir" -c "cp -a $bs_dir/root_cache /var/cache/mock/${CHROOT_NAME}-bootstrap" fi +if ls -l "$bs_dir/ccache-$CHROOT_NAME-$PACKAGE".tar.gz; then + flock "$bs_dir" -c "tar -C / -xzf $bs_dir/ccache-$CHROOT_NAME-$PACKAGE.tar.gz" +fi rc=0 # shellcheck disable=SC2086,SC2048 -if ! eval mock -r mock.cfg ${repo_dels[*]} ${repo_adds[*]} --no-clean \ - --disablerepo=\*-debug* ${releasever_opt[*]} $MOCK_OPTIONS \ - $RPM_BUILD_OPTIONS "$TARGET"; then +if ! eval time mock -r mock.cfg ${repo_dels[*]} ${repo_adds[*]} --no-clean \ + --disablerepo=\*-debug* ${releasever_opt[*]} $MOCK_OPTIONS \ + $RPM_BUILD_OPTIONS "$TARGET"; then rc=${PIPESTATUS[0]} fi -if ls -l /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz && - [ -d /scratch/ ]; then - mkdir -p /scratch/mock/cache/"${CHROOT_NAME}"-bootstrap/ - if ! cmp /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz "$bs_dir"/root_cache/cache.tar.gz; then - flock "$bs_dir" -c "cp -a /var/cache/mock/${CHROOT_NAME}-bootstrap/root_cache $bs_dir/" +# Save the ccache +if [ -d /scratch/ ]; then + mkdir -p "$bs_dir"/ + flock "$bs_dir" -c "tar -czf $bs_dir/ccache-$CHROOT_NAME-$PACKAGE.tar.gz /var/cache/mock/${CHROOT_NAME}/ccache" + if ls -l /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz; then + if ! cmp /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz "$bs_dir"/root_cache/cache.tar.gz; then + flock "$bs_dir" -c "cp -a /var/cache/mock/${CHROOT_NAME}-bootstrap/root_cache $bs_dir/" + fi fi fi From 30c6c52009638d2a92b9fd7026b5c74100d3de16 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 31 Jan 2025 22:57:45 +0000 Subject: [PATCH 12/15] update packaging again Signed-off-by: Dalton Bohning --- packaging/Dockerfile.mockbuild | 8 +++++--- packaging/Dockerfile.ubuntu.20.04 | 25 +++++++++++-------------- packaging/Makefile_distro_vars.mk | 8 ++++++++ packaging/Makefile_packaging.mk | 6 ++++++ packaging/get_base_branch | 2 +- packaging/rpm_chrootbuild | 12 +++++++++--- 6 files changed, 40 insertions(+), 21 deletions(-) diff --git a/packaging/Dockerfile.mockbuild b/packaging/Dockerfile.mockbuild index 76a6e94..6bad37d 100644 --- a/packaging/Dockerfile.mockbuild +++ b/packaging/Dockerfile.mockbuild @@ -35,10 +35,12 @@ ARG UID=1000 # Add build user (to keep rpmbuild happy) ENV USER build ENV PASSWD build -RUN useradd -u $UID -ms /bin/bash $USER -RUN echo "$USER:$PASSWD" | chpasswd # add the user to the mock group so it can run mock -RUN usermod -a -G mock $USER +RUN if [ $UID != 0 ]; then \ + useradd -u $UID -ms /bin/bash $USER; \ + echo "$USER:$PASSWD" | chpasswd; \ + usermod -a -G mock $USER; \ + fi ARG CB0 RUN dnf -y upgrade && \ diff --git a/packaging/Dockerfile.ubuntu.20.04 b/packaging/Dockerfile.ubuntu.20.04 index ec76bfd..4d05464 100644 --- a/packaging/Dockerfile.ubuntu.20.04 +++ b/packaging/Dockerfile.ubuntu.20.04 @@ -18,26 +18,23 @@ RUN if [ -n "$REPO_FILE_URL" ]; then \ true > ../sources.list && \ mv daos_ci-ubuntu20.04-artifactory.list.tmp \ daos_ci-ubuntu20.04-artifactory.list; \ + url="${REPO_FILE_URL%/*/}/hpe-ilorest-ubuntu-bionic-proxy/"; \ + else \ + url="https://downloads.linux.hpe.com/SDR/repo/ilorest/"; \ fi; \ cd -; \ - curl -f -O "$REPO_FILE_URL"esad_repo.key; \ - gpg --no-default-keyring --keyring ./temp-keyring.gpg \ - --import esad_repo.key; \ mkdir -p /usr/local/share/keyrings/; \ + curl -f -O "$url"GPG-KEY-hprest; \ + gpg --no-default-keyring --keyring ./temp-keyring.gpg \ + --import GPG-KEY-hprest; \ gpg --no-default-keyring --keyring ./temp-keyring.gpg --export \ - --output /usr/local/share/keyrings/daos-stack-public.gpg; \ + --output /usr/local/share/keyrings/hpe-sdr-public.gpg; \ rm ./temp-keyring.gpg; \ - url_prefix=https://downloads.linux.hpe.com/SDR/; \ - for url in hpPublicKey2048.pub \ - hpPublicKey2048_key1.pub \ - hpePublicKey2048_key1.pub; do \ - curl -f -O "$url_prefix$url"; \ - gpg --no-default-keyring --keyring ./temp-keyring.gpg \ - --import "$(basename $url)"; \ - done; \ + curl -f -O "$REPO_FILE_URL"esad_repo.key; \ + gpg --no-default-keyring --keyring ./temp-keyring.gpg \ + --import esad_repo.key; \ gpg --no-default-keyring --keyring ./temp-keyring.gpg --export \ - --output /usr/local/share/keyrings/hpe-sdr-public.gpg; \ - rm ./temp-keyring.gpg + --output /usr/local/share/keyrings/daos-stack-public.gpg # Install basic tools RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ diff --git a/packaging/Makefile_distro_vars.mk b/packaging/Makefile_distro_vars.mk index 4e8a09d..79ae211 100644 --- a/packaging/Makefile_distro_vars.mk +++ b/packaging/Makefile_distro_vars.mk @@ -91,6 +91,14 @@ DISTRO_VERSION ?= $(VERSION_ID) ORIG_TARGET_VER := 15.5 SED_EXPR := 1p endif +ifeq ($(CHROOT_NAME),opensuse-leap-15.6-x86_64) +VERSION_ID := 15.6 +DISTRO_ID := sl15.6 +DISTRO_BASE := LEAP_15 +DISTRO_VERSION ?= $(VERSION_ID) +ORIG_TARGET_VER := 15.6 +SED_EXPR := 1p +endif endif ifeq ($(ID),centos) ID = el diff --git a/packaging/Makefile_packaging.mk b/packaging/Makefile_packaging.mk index 3201a22..75038c1 100644 --- a/packaging/Makefile_packaging.mk +++ b/packaging/Makefile_packaging.mk @@ -35,7 +35,11 @@ TEST_PACKAGES ?= ${NAME} # unfortunately we cannot always name the repo the same as the project REPO_NAME ?= $(NAME) +ifneq ($(CI_PR_REPOS),) +PR_REPOS ?= $(CI_PR_REPOS) +else PR_REPOS ?= $(shell git show -s --format=%B | sed -ne 's/^PR-repos: *\(.*\)/\1/p') +endif LEAP_15_PR_REPOS ?= $(shell git show -s --format=%B | sed -ne 's/^PR-repos-leap15: *\(.*\)/\1/p') EL_7_PR_REPOS ?= $(shell git show -s --format=%B | sed -ne 's/^PR-repos-el7: *\(.*\)/\1/p') EL_8_PR_REPOS ?= $(shell git show -s --format=%B | sed -ne 's/^PR-repos-el8: *\(.*\)/\1/p') @@ -424,6 +428,8 @@ packaging_check: --exclude libfabric.spec \ --exclude Makefile \ --exclude README.md \ + --exclude SECURITY.md \ + --exclude LICENSE \ --exclude _topdir \ --exclude \*.tar.\* \ --exclude \*.code-workspace \ diff --git a/packaging/get_base_branch b/packaging/get_base_branch index 27515a7..75eb90b 100755 --- a/packaging/get_base_branch +++ b/packaging/get_base_branch @@ -4,7 +4,7 @@ set -eux -o pipefail IFS=' ' read -r -a add_bases <<< "${1:-}" -origin=origin +origin="${ORIGIN:-origin}" mapfile -t all_bases < <(echo "master" git branch -r | sed -ne "/^ $origin\\/release\\/[0-9]/s/^ $origin\\///p") all_bases+=("${add_bases[@]}") diff --git a/packaging/rpm_chrootbuild b/packaging/rpm_chrootbuild index 4dcdaa4..d6443b7 100755 --- a/packaging/rpm_chrootbuild +++ b/packaging/rpm_chrootbuild @@ -109,7 +109,7 @@ if [ -n "$DISTRO_VERSION" ]; then releasever_opt=("--config-opts=releasever=$DISTRO_VERSION") fi -bs_dir=/scratch/mock/cache/"${CHROOT_NAME}"-bootstrap +bs_dir=/scratch/mock/cache/"${CHROOT_NAME}"-bootstrap-$(id -u) if ls -l "$bs_dir"/root_cache/cache.tar.gz; then mkdir -p "/var/cache/mock/${CHROOT_NAME}-bootstrap/" flock "$bs_dir" -c "cp -a $bs_dir/root_cache /var/cache/mock/${CHROOT_NAME}-bootstrap" @@ -129,10 +129,16 @@ fi # Save the ccache if [ -d /scratch/ ]; then mkdir -p "$bs_dir"/ - flock "$bs_dir" -c "tar -czf $bs_dir/ccache-$CHROOT_NAME-$PACKAGE.tar.gz /var/cache/mock/${CHROOT_NAME}/ccache" + if ! flock "$bs_dir" -c "tar -czf $bs_dir/ccache-$CHROOT_NAME-$PACKAGE.tar.gz /var/cache/mock/${CHROOT_NAME}/ccache"; then + echo "Failed to save ccache. Plowing onward." + echo "I am $(id)" + fi if ls -l /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz; then if ! cmp /var/cache/mock/"${CHROOT_NAME}"-bootstrap/root_cache/cache.tar.gz "$bs_dir"/root_cache/cache.tar.gz; then - flock "$bs_dir" -c "cp -a /var/cache/mock/${CHROOT_NAME}-bootstrap/root_cache $bs_dir/" + if ! flock "$bs_dir" -c "cp -a /var/cache/mock/${CHROOT_NAME}-bootstrap/root_cache $bs_dir/"; then + echo "Failed to save root_cache. Plowing onward." + echo "I am $(id)" + fi fi fi fi From f475c5dd38e52767ab06e815e82fbd9bc8cb96b0 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 31 Jan 2025 22:59:09 +0000 Subject: [PATCH 13/15] update changelog Signed-off-by: Dalton Bohning --- ior.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ior.spec b/ior.spec index 3357509..b829274 100644 --- a/ior.spec +++ b/ior.spec @@ -106,7 +106,7 @@ EOF %endif %changelog -* Fri Jan 12 2024 Dalton A. Bohning - 4.0.0-1 +* Fri Jan 31 2025 Dalton A. Bohning - 4.0.0-1 - Update to 4.0.0 release - Remove BR: mercury-devel - Use upstream configure.ac patch instead of local From 199a099eaee822c3faf63dcc2e62742071168e74 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 31 Jan 2025 23:00:54 +0000 Subject: [PATCH 14/15] review update Signed-off-by: Dalton Bohning --- ior.spec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ior.spec b/ior.spec index b829274..49c1079 100644 --- a/ior.spec +++ b/ior.spec @@ -58,8 +58,10 @@ export CC=mpicc export CXX=mpicxx export FC=mpif90 export F77=mpif77 +%if (0%{?suse_version} >= 1500) export CFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fPIC -fPIE" export CXXFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fPIC -fPIE" +%endif if [ ! -f configure ]; then # probably a git tarball ./bootstrap From 72481b44630359a6118b2c906daa1b03cc984007 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 3 Mar 2026 19:26:20 +0000 Subject: [PATCH 15/15] update packaging Signed-off-by: Dalton Bohning --- packaging/Dockerfile.coverity | 3 +- packaging/Dockerfile.mockbuild | 26 +++++----- packaging/Dockerfile.ubuntu.20.04 | 54 ++++++++------------- packaging/Dockerfile.ubuntu.rolling | 3 +- packaging/debian_chrootbuild | 75 ++++++++++++++++++----------- packaging/rpm_chrootbuild | 26 ++++++++-- 6 files changed, 110 insertions(+), 77 deletions(-) diff --git a/packaging/Dockerfile.coverity b/packaging/Dockerfile.coverity index 7eed2c3..f8171c9 100755 --- a/packaging/Dockerfile.coverity +++ b/packaging/Dockerfile.coverity @@ -1,12 +1,13 @@ # # Copyright 2018-2020, Intel Corporation +# Copyright 2025 Hewlett Packard Enterprise Development LP # # 'recipe' for Docker to build for a Coverity scan. # # Pull base image FROM fedora:latest -MAINTAINER daos-stack +LABEL maintainer="daos-stack "" # use same UID as host and default value of 1000 if not specified ARG UID=1000 diff --git a/packaging/Dockerfile.mockbuild b/packaging/Dockerfile.mockbuild index 6bad37d..d8d86ac 100644 --- a/packaging/Dockerfile.mockbuild +++ b/packaging/Dockerfile.mockbuild @@ -1,5 +1,6 @@ # # Copyright 2018-2024 Intel Corporation +# Copyright 2025 Hewlett Packard Enterprise Development LP # # 'recipe' for Docker to build an RPM # @@ -13,15 +14,16 @@ LABEL maintainer="daos@daos.groups.io" # Use local repo server if present ARG REPO_FILE_URL -RUN if [ -n "$REPO_FILE_URL" ]; then \ - cd /etc/yum.repos.d/ && \ - curl -k -f -o daos_ci-fedora-artifactory.repo.tmp \ - "$REPO_FILE_URL"daos_ci-fedora-artifactory.repo && \ - for file in *.repo; do \ - true > $file; \ - done; \ - mv daos_ci-fedora-artifactory.repo{.tmp,}; \ - fi +ARG DAOS_LAB_CA_FILE_URL +ARG REPOSITORY_NAME +# script to install OS updates basic tools and daos dependencies +# COPY ./utils/scripts/install-fedora.sh /tmp/install.sh +# script to setup local repo if available +COPY ./packaging/scripts/repo-helper-fedora.sh /tmp/repo-helper.sh + +RUN chmod +x /tmp/repo-helper.sh && \ + /tmp/repo-helper.sh && \ + rm -f /tmp/repo-helper.sh # Install basic tools RUN dnf -y install mock make \ @@ -33,13 +35,15 @@ RUN dnf -y install mock make \ ARG UID=1000 # Add build user (to keep rpmbuild happy) -ENV USER build -ENV PASSWD build +ENV USER=build +ENV PASSWD=build # add the user to the mock group so it can run mock RUN if [ $UID != 0 ]; then \ useradd -u $UID -ms /bin/bash $USER; \ echo "$USER:$PASSWD" | chpasswd; \ usermod -a -G mock $USER; \ + mkdir -p /var/cache/mock; \ + chown $USER:root /var/cache/mock; \ fi ARG CB0 diff --git a/packaging/Dockerfile.ubuntu.20.04 b/packaging/Dockerfile.ubuntu.20.04 index 4d05464..bd5363b 100644 --- a/packaging/Dockerfile.ubuntu.20.04 +++ b/packaging/Dockerfile.ubuntu.20.04 @@ -1,54 +1,42 @@ +# Keep Dockerfile.ubuntu the same as this file until all packaging +# jobs are fixed to have a Dockerfile.ubuntu, and then the common +# Jenkinsfile will be changed to use Dockerfile.ubuntu. # # Copyright 2019-2021, Intel Corporation +# Copyright 2025 Hewlett Packard Enterprise Development LP # # 'recipe' for Docker to build an Debian package # # Pull base image -FROM ubuntu:20.04 +ARG BASE_DISTRO=ubuntu:20.04 +FROM $BASE_DISTRO LABEL org.opencontainers.image.authors="daos@daos.groups.io" - -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - curl gpg +# Needed for later use of BASE_DISTRO +ARG BASE_DISTRO ARG REPO_FILE_URL -RUN if [ -n "$REPO_FILE_URL" ]; then \ - cd /etc/apt/sources.list.d && \ - curl -f -o daos_ci-ubuntu20.04-artifactory.list.tmp \ - "$REPO_FILE_URL"daos_ci-ubuntu20.04-artifactory.list && \ - true > ../sources.list && \ - mv daos_ci-ubuntu20.04-artifactory.list.tmp \ - daos_ci-ubuntu20.04-artifactory.list; \ - url="${REPO_FILE_URL%/*/}/hpe-ilorest-ubuntu-bionic-proxy/"; \ - else \ - url="https://downloads.linux.hpe.com/SDR/repo/ilorest/"; \ - fi; \ - cd -; \ - mkdir -p /usr/local/share/keyrings/; \ - curl -f -O "$url"GPG-KEY-hprest; \ - gpg --no-default-keyring --keyring ./temp-keyring.gpg \ - --import GPG-KEY-hprest; \ - gpg --no-default-keyring --keyring ./temp-keyring.gpg --export \ - --output /usr/local/share/keyrings/hpe-sdr-public.gpg; \ - rm ./temp-keyring.gpg; \ - curl -f -O "$REPO_FILE_URL"esad_repo.key; \ - gpg --no-default-keyring --keyring ./temp-keyring.gpg \ - --import esad_repo.key; \ - gpg --no-default-keyring --keyring ./temp-keyring.gpg --export \ - --output /usr/local/share/keyrings/daos-stack-public.gpg +ARG DAOS_LAB_CA_FILE_URL +ARG REPOSITORY_NAME +# script to setup local repo if available +COPY ./packaging/scripts/repo-helper-ubuntu.sh /tmp/repo-helper.sh + +RUN chmod +x /tmp/repo-helper.sh && \ + /tmp/repo-helper.sh && \ + rm -f /tmp/repo-helper.sh -# Install basic tools +# Install basic tools - rpmdevtools temporary commented out. RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ autoconf bash ca-certificates curl debhelper dh-make \ dpkg-dev dh-python doxygen gcc git git-buildpackage \ javahelper locales make patch pbuilder pkg-config \ - python3-dev python3-distro python3-distutils rpm scons wget \ - cmake valgrind rpmdevtools + python3-dev python3-distro python3-distutils rpm scons sudo \ + wget cmake valgrind # rpmdevtools # use same UID as host and default value of 1000 if not specified ARG UID=1000 # Add build user (to keep chrootbuild happy) -ENV USER build +ENV USER=build RUN useradd -u $UID -ms /bin/bash $USER # need to run the build command as root, as it needs to chroot @@ -56,7 +44,7 @@ RUN if ! grep "^#includedir /etc/sudoers.d" /etc/sudoers; then echo "#includedir /etc/sudoers.d" >> /etc/sudoers; \ fi; \ echo "Defaults env_keep += \"DPKG_GENSYMBOLS_CHECK_LEVEL\"" > /etc/sudoers.d/build; \ - echo "build ALL=(ALL) NOPASSWD: /usr/bin/tee /root/.pbuilderrc" >> /etc/sudoers.d/build; \ + echo "build ALL=(ALL) NOPASSWD: /usr/bin/tee /root/.pbuilderrc" >> /etc/sudoers.d/build; \ echo "build ALL=(ALL) NOPASSWD: /usr/sbin/pbuilder" >> /etc/sudoers.d/build; \ chmod 0440 /etc/sudoers.d/build; \ visudo -c; \ diff --git a/packaging/Dockerfile.ubuntu.rolling b/packaging/Dockerfile.ubuntu.rolling index 02aca45..ed5d363 100644 --- a/packaging/Dockerfile.ubuntu.rolling +++ b/packaging/Dockerfile.ubuntu.rolling @@ -1,11 +1,12 @@ # # Copyright 2019, Intel Corporation +# Copyright 2025 Hewlett Packard Enterprise Development LP # # 'recipe' for Docker to build an Debian package # # Pull base image FROM ubuntu:rolling -Maintainer daos-stack +LABEL org.opencontainers.image.authors="daos@daos.groups.io" # use same UID as host and default value of 1000 if not specified ARG UID=1000 diff --git a/packaging/debian_chrootbuild b/packaging/debian_chrootbuild index cc2cc96..5cb2943 100755 --- a/packaging/debian_chrootbuild +++ b/packaging/debian_chrootbuild @@ -2,17 +2,35 @@ set -uex +: "${REPO_FILE_URL:=}" +: "${HTTPS_PROXY:=}" + +# Currently not fully working behind a proxy if [ -n "${ARTIFACTORY_URL:-}" ] && "$LOCAL_REPOS"; then - echo "MIRRORSITE=${ARTIFACTORY_URL}artifactory/ubuntu-proxy" | sudo tee /root/.pbuilderrc + pbuilderrc="./pbuilder_rc.txt" + rm -f "$pbuilderrc" + if [ -n "${HTTPS_PROXY}" ]; then + echo "export http_proxy=\"${HTTPS_PROXY}\"" >> "$pbuilderrc" + else + echo "MIRRORSITE=${ARTIFACTORY_URL}/ubuntu-proxy/ubuntu" > "$pbuilderrc" + fi + #if [ -n "$REPO_FILE_URL" ]; then + # direct="${REPO_FILE_URL##*//}" + # direct="${direct%%/*}" + # echo "no_proxy=\"${direct}\"" >> "$pbuilderrc" + #fi + # shellcheck disable=SC2002 + cat "$pbuilderrc" | sudo tee /root/.pbuilderrc fi # shellcheck disable=SC2086 sudo pbuilder create \ --extrapackages "gnupg ca-certificates" \ - $DISTRO_ID_OPT + $DISTRO_ID_OPT || true # Ignore error status for now. repo_args="" repos_added=() +# currently a bit broken, pbuilder will not accept user provided CAs. for repo in $DISTRO_BASE_PR_REPOS $PR_REPOS; do branch="master" build_number="lastSuccessfulBuild" @@ -32,31 +50,34 @@ for repo in $DISTRO_BASE_PR_REPOS $PR_REPOS; do repo_args="$repo_args|deb [trusted=yes] ${JENKINS_URL:-https://build.hpdd.intel.com/}job/daos-stack/job/$repo/job/$branch/$build_number/artifact/artifacts/$DISTRO/ ./" done -repo_args+="|$(curl -sSf "$REPO_FILE_URL"daos_ci-"$DISTRO"-artifactory.list | - sed -e 's/#.*//' -e '/ubuntu-proxy/d' -e '/^$/d' -e '/^$/d' \ - -e 's/signed-by=.*\.gpg/trusted=yes/' | - sed -e ':a; N; $!ba; s/\n/|/g')" -for repo in $JOB_REPOS; do - repo_name=${repo##*://} - repo_name=${repo_name//\//_} - if [[ " ${repos_added[*]} " = *\ ${repo_name}\ * ]]; then - # don't add duplicates, first found wins - continue - fi - repos_added+=("$repo_name") - repo_args+="|deb ${repo} $VERSION_CODENAME main" -done -# NB: This PPA is needed to support modern go toolchains on ubuntu 20.04. -# After the build is updated to use 22.04, which supports go >= 1.18, it -# should no longer be needed. -repo_args="$repo_args|deb [trusted=yes] https://ppa.launchpadcontent.net/longsleep/golang-backports/ubuntu $VERSION_CODENAME main" -echo "$repo_args" -if [ "$repo_args" = "|" ]; then - repo_args="" -else - #repo_args="--othermirror"${repo_args#|}\"" - repo_args="${repo_args#|}" -fi +# currently broken, builder will not accept internal certs. +# repo_args+="|$(curl -sSf "$REPO_FILE_URL"daos_ci-"$DISTRO"-artifactory.list | +# sed -e 's/#.*//' -e '/ubuntu-proxy/d' -e '/^$/d' -e '/^$/d' \ +# -e 's/signed-by=.*\.gpg/trusted=yes/' | +# sed -e ':a; N; $!ba; s/\n/|/g')" +#for repo in $JOB_REPOS; do +# repo_name=${repo##*://} +# repo_name=${repo_name//\//_} +# if [[ " ${repos_added[*]} " = *\ ${repo_name}\ * ]]; then +# # don't add duplicates, first found wins +# continue +# fi +# repos_added+=("$repo_name") +# repo_args+="|deb ${repo} $VERSION_CODENAME main" +#done + +## NB: This PPA is needed to support modern go toolchains on ubuntu 20.04. +## After the build is updated to use 22.04, which supports go >= 1.18, it +## should no longer be needed. +# currently broken - claim is public key not available. +#repo_args="$repo_args|deb [trusted=yes] https://ppa.launchpadcontent.net/longsleep/golang-backports/ubuntu $VERSION_CODENAME main" +#echo "$repo_args" +#if [ "$repo_args" = "|" ]; then +# repo_args="" +#else +# #repo_args="--othermirror"${repo_args#|}\"" +# repo_args="${repo_args#|}" +#fi cd "$DEB_TOP" # shellcheck disable=SC2086 sudo pbuilder update --override-config $DISTRO_ID_OPT ${repo_args:+--othermirror "$repo_args"} diff --git a/packaging/rpm_chrootbuild b/packaging/rpm_chrootbuild index d6443b7..8d37abc 100755 --- a/packaging/rpm_chrootbuild +++ b/packaging/rpm_chrootbuild @@ -2,6 +2,11 @@ set -uex +: "${HTTPS_PROXY:=}" +: "${REPO_FILE_URL:=}" +: "${ARCH:=$(arch)}" +: "${REPOSITORY_NAME:=artifactory}" + cp /etc/mock/"$CHROOT_NAME".cfg mock.cfg # Enable mock ccache plugin @@ -11,7 +16,20 @@ config_opts['plugin_conf']['ccache_opts']['dir'] = "%(cache_topdir)s/%(root)s/cc EOF -if [[ $CHROOT_NAME == *epel-8-x86_64 ]]; then +# Optionally add a proxy to mock +if [ -n "$HTTPS_PROXY" ];then + yum_proxy="http://${HTTPS_PROXY##*//}" + echo "config_opts['https_proxy'] = '$yum_proxy'" >> mock.cfg +fi + +# No proxy for local mirrors +if [ -n "$REPO_FILE_URL" ]; then + direct="${REPO_FILE_URL##*//}" + direct="${direct%%/*}" + echo "config_opts['no_proxy'] = '${direct}'" >> mock.cfg +fi + +if [[ $CHROOT_NAME == *"epel-8-${ARCH}" ]]; then cat <> mock.cfg config_opts['module_setup_commands'] = [ ('enable', 'javapackages-tools:201801'), @@ -21,7 +39,7 @@ EOF fi # Use dnf on CentOS 7 -if [[ $CHROOT_NAME == *epel-7-x86_64 ]]; then +if [[ $CHROOT_NAME == *"epel-7-$ARCH" ]]; then MOCK_OPTIONS="--dnf --no-bootstrap-chroot${MOCK_OPTIONS:+ }$MOCK_OPTIONS" fi @@ -61,7 +79,7 @@ if [ -n "${ARTIFACTORY_URL:-}" ] && "$LOCAL_REPOS"; then fi fi curl -sSf "$REPO_FILE_URL"daos_ci-"${CHROOT_NAME%-*}".repo >> mock.cfg - repo_adds+=("--enablerepo *-artifactory") + repo_adds+=("--enablerepo *-${REPOSITORY_NAME}") fi fi @@ -127,7 +145,7 @@ if ! eval time mock -r mock.cfg ${repo_dels[*]} ${repo_adds[*]} --no-clean \ fi # Save the ccache -if [ -d /scratch/ ]; then +if [ -d /scratch/mock ]; then mkdir -p "$bs_dir"/ if ! flock "$bs_dir" -c "tar -czf $bs_dir/ccache-$CHROOT_NAME-$PACKAGE.tar.gz /var/cache/mock/${CHROOT_NAME}/ccache"; then echo "Failed to save ccache. Plowing onward."