diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..c225a125
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,84 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+all: IMB-MPI1 IMB-NBC IMB-RMA IMB-EXT IMB-IO IMB-MT
+
+IMB-MPI1:
+	make -j8 -C src_cpp -f Makefile TARGET=MPI1
+	@cp src_cpp/IMB-MPI1 .
+
+IMB-NBC:
+	make -C src_cpp -f Makefile TARGET=NBC
+	@cp src_cpp/IMB-NBC .
+
+IMB-EXT:
+	make -C src_cpp -f Makefile TARGET=EXT
+	@cp src_cpp/IMB-EXT .
+
+IMB-RMA:
+	make -C src_cpp -f Makefile TARGET=RMA
+	@cp src_cpp/IMB-RMA .
+
+IMB-IO:
+	make -C src_cpp -f Makefile TARGET=IO
+	@cp src_cpp/IMB-IO .
+
+IMB-MT: | IMB-MPI1
+	make -j8 -C src_cpp -f Makefile TARGET=MT
+	@cp src_cpp/IMB-MT .
+
+
+clean:
+	make -C src_cpp -f Makefile TARGET=MPI1 clean
+	make -C src_cpp -f Makefile TARGET=NBC clean
+	make -C src_cpp -f Makefile TARGET=RMA clean
+	make -C src_cpp -f Makefile TARGET=EXT clean
+	make -C src_cpp -f Makefile TARGET=IO clean
+	make -C src_cpp -f Makefile TARGET=MT clean
+	rm -f IMB-MPI1 IMB-NBC IMB-RMA IMB-EXT IMB-IO IMB-MT
diff --git a/Makefile_win b/Makefile_win
new file mode 100644
index 00000000..e444691a
--- /dev/null
+++ b/Makefile_win
@@ -0,0 +1,98 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+all: IMB-MPI1 IMB-MT IMB-RMA IMB-EXT IMB-IO IMB-NBC
+
+IMB-MPI1:
+	cd .\src_cpp
+	nmake -f Makefile_win TARGET=MPI1
+	cd ..
+	copy .\src_cpp\IMB-MPI1.exe
+
+IMB-NBC:
+	cd .\src_cpp
+	nmake -f Makefile_win IMB-NBC
+	cd ..
+	copy .\src_cpp\IMB-NBC.exe
+
+IMB-RMA:
+	cd .\src_cpp
+	nmake -f Makefile_win TARGET=RMA
+	cd ..
+	copy .\src_cpp\IMB-RMA.exe
+
+IMB-EXT:
+	cd .\src_cpp
+	nmake -f Makefile_win IMB-EXT
+	cd ..
+	copy .\src_cpp\IMB-EXT.exe
+
+IMB-IO:
+	cd .\src_cpp
+	nmake -f Makefile_win IMB-IO
+	cd ..
+	copy .\src_cpp\IMB-IO.exe
+
+
+IMB-MT:
+	cd .\src_cpp
+	nmake -f Makefile_win TARGET=MT
+	cd ..
+	copy .\src_cpp\IMB-MT.exe
+
+clean:
+	cd ./src_cpp
+	nmake -f Makefile_win clean TARGET=MT
+	nmake -f Makefile_win clean TARGET=MPI1
+	nmake -f Makefile_win clean TARGET=RMA
+	nmake -f Makefile_win clean TARGET=NBC
+	nmake -f Makefile_win clean TARGET=IO
+	nmake -f Makefile_win clean TARGET=EXT
+	cd ..
+	del /f /q IMB-MPI1.exe IMB-NBC.exe IMB-RMA.exe IMB-EXT.exe IMB-IO.exe IMB-MT.exe
diff --git a/README.md b/README.md
index 55272d88..4281fbcb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Intel(R) MPI Benchmarks
 [![Common Public License Version 1.0](https://img.shields.io/badge/license-Common%20Public%20License%20Version%201.0-green.svg)](license/license.txt)
-![v2018](https://img.shields.io/badge/v.2018-Gold-orange.svg)
+![v2019](https://img.shields.io/badge/v.2019-Gold-orange.svg)
 --------------------------------------------------
 
 --------
@@ -41,6 +41,8 @@ After a successful installation of Intel(R) MPI Benchmarks, the following
 files and folders appear on your system:
 
     +-- \imb            Intel(R) MPI Benchmarks product directory
+         |
+         +-- \src_c             Product source "C" code and Makefiles.
          |
          +-- \license           Product license files.
          |    |              
@@ -50,7 +52,7 @@ files and folders appear on your system:
          |                                       use of the Intel(R) MPI 
          |                                       Benchmarks name and trademark.
          |
-         +-- \src                  Product source code and Makefiles. 
+         +-- \src_cpp              Product source "CPP" code and Makefiles. 
          |
          +-- \WINDOWS              Microsoft* Visual Studio* project files. 
          |
@@ -60,6 +62,31 @@ files and folders appear on your system:
 ----------
 What's New
 ----------
+New in Intel(R) MPI Benchmarks 2019 Beta
+----------------------------------------
+- Intel(R) MPI Benchmarks 2019 (Beta) are now available for Windows*.
+- Added a new option -noheader for IMB-MT to disable printing of benchmark headers.
+- Added a new benchmark BarrierMT for IMB-MT.
+- Bug fixes.
+
+New in Intel(R) MPI Benchmarks 2019 Technical Preview
+-----------------------------------------------------
+- New IMB-MT benchmarks.
+  The benchmarks implement the multithreaded version of some of the IMB-MPI1
+  benchmarks using the OpenMP* paradigm.
+  
+- New benchmarks infrastructure implemented in C++.
+  The IMB-MPI1, IMB-RMA and IMB-MT implementation is now based on the new C++
+  infrastructure (IMB-NBC, IMB-EXT and IMB-IO still use the legacy one).
+  The legacy infrastructure is preserved in legacy subdirectory.
+
+- Changes in syntax for the -include and -exclude options.
+  Benchmarks to include and exclude now must be separated by a comma rather
+  than a space. Benchmarks to launch can be separated by a comma or a space.
+
+- Iteration policy can no longer be set with the -iter option. Use -iter_policy
+  instead.
+
 New in Intel(R) MPI Benchmarks 2018 Update 1
 --------------------------------------------
 - Support for the Microsoft* Visual Studio* 2017. Microsoft* Visual Studio* 2012 
diff --git a/ReadMe_IMB.txt b/ReadMe_IMB.txt
index 4923da91..3ceda711 100644
--- a/ReadMe_IMB.txt
+++ b/ReadMe_IMB.txt
@@ -1,7 +1,7 @@
---------------------------------------------------
-Intel(R) MPI Benchmarks 2018 Update 1
+---------------------------------
+Intel(R) MPI Benchmarks 2019
 README
---------------------------------------------------
+---------------------------------
 
 --------
 Contents
@@ -11,8 +11,8 @@ Contents
 - Product Directories
 - What's New
 - Command-Line Control
-- Build Instructions for Linux* OS
-- Build Instructions for Windows* OS
+- Building Instructions for Linux* OS
+- Building Instructions for Windows* OS
 - Copyright and License Information
 - Legal Information
 
@@ -25,11 +25,10 @@ You can run all of the supported benchmarks, or a subset specified in the
 command line using one executable file. Use command-line parameters to specify
 various settings, such as time measurement, message lengths, and selection of 
 communicators. For details, see the Intel(R) MPI Benchmarks User's Guide 
-located at: https://software.intel.com/en-us/imb-user-guide
+located in the <install-dir>/doc directory.
 
-When installed as part of Intel(R) MPI Library, the default location of
-the Intel(R) MPI Benchmarks is:
-  - C:\Program Files (x86)\IntelSWTools\imb on Windows* OS	 
+By default, Intel(R) MPI Benchmarks is installed at:
+  - C:\Program Files (x86)\IntelSWTools\imb on Windows* OS 
   - /opt/intel/imb on Linux* OS
 
 Before using the Intel(R) MPI Benchmarks, please read the license agreements 
@@ -42,6 +41,8 @@ After a successful installation of Intel(R) MPI Benchmarks, the following
 files and folders appear on your system:
 
     +-- \imb            Intel(R) MPI Benchmarks product directory
+         |
+         +-- \src_c             Product source "C" code and Makefiles.
          |
          +-- \license           Product license files.
          |    |              
@@ -51,7 +52,7 @@ files and folders appear on your system:
          |                                       use of the Intel(R) MPI 
          |                                       Benchmarks name and trademark.
          |
-         +-- \src                  Product source code and Makefiles. 
+         +-- \src_cpp              Product source "CPP" code and Makefiles. 
          |
          +-- \WINDOWS              Microsoft* Visual Studio* project files. 
          |
@@ -61,10 +62,35 @@ files and folders appear on your system:
 ----------
 What's New
 ----------
+New in Intel(R) MPI Benchmarks 2019 Beta
+----------------------------------------
+- Intel(R) MPI Benchmarks 2019 (Beta) are now available for Windows*.
+- Added a new option -noheader for IMB-MT to disable printing of benchmark headers.
+- Added a new benchmark BarrierMT for IMB-MT.
+- Bug fixes.
+
+New in Intel(R) MPI Benchmarks 2019 Technical Preview
+-----------------------------------------------------
+- New IMB-MT benchmarks.
+  The benchmarks implement the multithreaded version of some of the IMB-MPI1
+  benchmarks using the OpenMP* paradigm.
+  
+- New benchmarks infrastructure implemented in C++.
+  The IMB-MPI1, IMB-RMA and IMB-MT implementation is now based on the new C++
+  infrastructure (IMB-NBC, IMB-EXT and IMB-IO still use the legacy one).
+  The legacy infrastructure is preserved in legacy subdirectory.
+
+- Changes in syntax for the -include and -exclude options.
+  Benchmarks to include and exclude now must be separated by a comma rather
+  than a space. Benchmarks to launch can be separated by a comma or a space.
+
+- Iteration policy can no longer be set with the -iter option. Use -iter_policy
+  instead.
+
 New in Intel(R) MPI Benchmarks 2018 Update 1
 --------------------------------------------
 - Support for the Microsoft* Visual Studio* 2017. Microsoft* Visual Studio* 2012 
-support is removed.
+  support is removed.
 
 New in Intel(R) MPI Benchmarks 2018
 --------------------------------------------
@@ -134,20 +160,10 @@ You can see the Intel(R) MPI Benchmarks User's Guide for details on the
 command-line parameters.
 
 -----------------------------------------
-Build Instructions for Linux* OS
+Building Instructions for Linux* OS
 -----------------------------------------
-
-1) Set up the environment for the compiler and Intel(R) MPI Library.
-   For the Intel(R) compilers, run:
-   
-   source <compiler_dir>/bin/compilervars.sh intel64
-   
-   For the Intel(R) MPI Library, run:
-   
-   source <intel_mpi_dir>/intel64/bin/mpivars.sh
-
-2) Set the CC variable to point to the appropriate compiler wrapper, mpiicc or mpicc.
-3) Run one or more Makefile commands below:
+1) Set the CC variable to point to the appropriate compiler wrapper, mpiicc or mpicc.
+2) Run one or more Makefile commands below:
 
    make clean - remove legacy binary object files and executable files
    make MPI1 - build the executable file for the IMB-MPI1 component
@@ -157,22 +173,56 @@ Build Instructions for Linux* OS
    make RMA - build the executable file for IMB-RMA benchmarks
    make all - build all executable files available
    
-4) Run the benchmarks as follows:
+3) Run the benchmarks as follows:
 
    mpirun -n <number_of_processes> IMB-<component> [arguments]
 
    where <component> is one of the make targets above.
-   For details, refer to the Intel(R) MPI Benchmarks User's Guide.
+   For details, refer to the Intel(R) MPI Benchmarks User's Guide at:
+   https://software.intel.com/en-us/imb-user-guide-2018-beta
 
 -----------------------------------------
-Build Instructions for Windows* OS
+Building Instructions for Windows* OS
 -----------------------------------------
 Use the enclosed solution files located in the component-specific 
 subdirectories under the imb/WINDOWS directory. Click on the respective 
 ".vcproj" or ".vcxproj" project file and use the Microsoft* Visual Studio* 
 menu to run the associated benchmark application.
 
-Use F7 or Build > Build Solution to create an executable.
+Building "x64" Executable Files 
+-------------------------------
+1) Check that the Include, Lib, and Path environment variables are set as follows:
+    %I_MPI_ROOT%\intel64\include
+    %I_MPI_ROOT%\intel64\lib
+    %I_MPI_ROOT%\mpi\intel64\bin
+   The %I_MPI_ROOT% environment variable is set to the Intel(R) MPI Library 
+   installation directory.
+
+2) Open the ".vcproj" or ".vcxproj" file for the component you would like to 
+   build. From the Visual Studio Project panel:
+   a) Change the "Solution Platforms" dialog box to "x64".
+   b) Change the "Solution Configurations" dialog box to "Release".
+   c) Check other settings as required, for example:
+    General > Project Defaults
+       - Set "Character Set" to "Use Multi-Byte Character Set"
+    C/C++ > General 
+       - Set "Additional Include Directories" to 
+           "$(I_MPI_ROOT)\intel64\include"
+       - Set "Warning Level" to "Level 1 (/W1)"
+    C/C++ > Preprocessor
+       - For the "Preprocessor definitions" within the Visual Studio 
+         projects, add the conditional compilation macros WIN_IMB and 
+         _CRT_SECURE_NO_DEPRECATE. Depending on the components you intend to 
+         use, add one or more of the following macros: 
+         MPI1, EXT, MPIIO, NBC, RMA.
+    Linker > Input
+       - Set "Additional Dependencies" to "$(I_MPI_ROOT)\intel64\lib\impi.lib". 
+         Make sure to add quotes.
+
+3) Use F7 or Build > Build Solution to create an executable.
+
+   For details, refer to the Intel(R) MPI Benchmarks User's Guide at:
+   https://software.intel.com/en-us/imb-user-guide-2018-beta
 
 ----------------------
 Copyright and Licenses
diff --git a/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.rc b/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.rc
index e027761e..266474f1 100644
--- a/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.rc
+++ b/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-EXT"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-EXT.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.vcxproj b/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.vcxproj
index f1e6cb77..fcd1192e 100644
--- a/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.vcxproj
+++ b/WINDOWS/IMB-EXT_VS_2013/IMB-EXT.vcxproj
@@ -58,21 +58,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;EXT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -87,9 +86,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;EXT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -97,7 +96,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -106,42 +105,47 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_ones_accu.c" />
-    <ClCompile Include="..\..\src\IMB_ones_bidir.c" />
-    <ClCompile Include="..\..\src\IMB_ones_unidir.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_ext.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-    <ClCompile Include="..\..\src\IMB_window.c">
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\EXT\EXT_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\EXT\EXT_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_accu.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_bidir.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_unidir.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_ext.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_c\IMB_window.c">
       <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -150,4 +154,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.rc b/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.rc
old mode 100755
new mode 100644
index e027761e..266474f1
--- a/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.rc
+++ b/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-EXT"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-EXT.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.sln b/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.sln
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.vcxproj b/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.vcxproj
old mode 100755
new mode 100644
index 42db070e..60675893
--- a/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.vcxproj
+++ b/WINDOWS/IMB-EXT_VS_2015/IMB-EXT.vcxproj
@@ -58,21 +58,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;EXT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -87,9 +86,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;EXT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -97,7 +96,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -106,42 +105,47 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_ones_accu.c" />
-    <ClCompile Include="..\..\src\IMB_ones_bidir.c" />
-    <ClCompile Include="..\..\src\IMB_ones_unidir.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_ext.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-    <ClCompile Include="..\..\src\IMB_window.c">
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\EXT\EXT_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\EXT\EXT_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_accu.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_bidir.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_unidir.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_ext.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_c\IMB_window.c">
       <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
diff --git a/WINDOWS/IMB-EXT_VS_2015/resource.h b/WINDOWS/IMB-EXT_VS_2015/resource.h
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.rc b/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.rc
index e027761e..266474f1 100644
--- a/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.rc
+++ b/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-EXT"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-EXT.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.vcxproj b/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.vcxproj
index f4df0d10..064005ce 100644
--- a/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.vcxproj
+++ b/WINDOWS/IMB-EXT_VS_2017/IMB-EXT.vcxproj
@@ -13,7 +13,7 @@
   <PropertyGroup Label="Globals">
     <ProjectGuid>{4219D5A9-6972-4B1C-9F07-EB97EEF4EDDA}</ProjectGuid>
     <RootNamespace>IMBEXT</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
+    <Keyword>Win32Proj</Keyword>
     <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
@@ -59,21 +59,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;EXT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -88,9 +87,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;EXT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -98,7 +97,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -107,42 +106,47 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_ones_accu.c" />
-    <ClCompile Include="..\..\src\IMB_ones_bidir.c" />
-    <ClCompile Include="..\..\src\IMB_ones_unidir.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_ext.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-    <ClCompile Include="..\..\src\IMB_window.c">
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\EXT\EXT_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\EXT\EXT_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_accu.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_bidir.c" />
+    <ClCompile Include="..\..\src_c\IMB_ones_unidir.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_ext.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_c\IMB_window.c">
       <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -151,4 +155,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-IO_VS_2013/IMB-IO.rc b/WINDOWS/IMB-IO_VS_2013/IMB-IO.rc
index 28b415a4..ae9b59de 100644
--- a/WINDOWS/IMB-IO_VS_2013/IMB-IO.rc
+++ b/WINDOWS/IMB-IO_VS_2013/IMB-IO.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"		
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-IO"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-IO.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-IO_VS_2013/IMB-IO.vcxproj b/WINDOWS/IMB-IO_VS_2013/IMB-IO.vcxproj
index 926fbbd5..e6dd7dfe 100644
--- a/WINDOWS/IMB-IO_VS_2013/IMB-IO.vcxproj
+++ b/WINDOWS/IMB-IO_VS_2013/IMB-IO.vcxproj
@@ -58,18 +58,18 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPIIO;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <TargetMachine>MachineX64</TargetMachine>
@@ -82,9 +82,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPIIO;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -92,7 +92,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -101,33 +101,46 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c">
-      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_file.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_open_close.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_io.c" />
-    <ClCompile Include="..\..\src\IMB_read.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-    <ClCompile Include="..\..\src\IMB_write.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\IO\IO_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\IO\IO_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_file.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_open_close.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_io.c" />
+    <ClCompile Include="..\..\src_c\IMB_read.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_c\IMB_write.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -136,4 +149,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-IO_VS_2015/IMB-IO.rc b/WINDOWS/IMB-IO_VS_2015/IMB-IO.rc
old mode 100755
new mode 100644
index 28b415a4..ae9b59de
--- a/WINDOWS/IMB-IO_VS_2015/IMB-IO.rc
+++ b/WINDOWS/IMB-IO_VS_2015/IMB-IO.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"		
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-IO"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-IO.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-IO_VS_2015/IMB-IO.sln b/WINDOWS/IMB-IO_VS_2015/IMB-IO.sln
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-IO_VS_2015/IMB-IO.vcxproj b/WINDOWS/IMB-IO_VS_2015/IMB-IO.vcxproj
old mode 100755
new mode 100644
index 8bb7e6d1..31219a0e
--- a/WINDOWS/IMB-IO_VS_2015/IMB-IO.vcxproj
+++ b/WINDOWS/IMB-IO_VS_2015/IMB-IO.vcxproj
@@ -58,18 +58,18 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPIIO;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <TargetMachine>MachineX64</TargetMachine>
@@ -82,9 +82,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPIIO;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -92,7 +92,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -101,34 +101,46 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c">
-      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_file.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_open_close.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_io.c" />
-    <ClCompile Include="..\..\src\IMB_read.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-    <ClCompile Include="..\..\src\IMB_write.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\IO\IO_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\IO\IO_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_file.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_open_close.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_io.c" />
+    <ClCompile Include="..\..\src_c\IMB_read.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_c\IMB_write.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -137,4 +149,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-IO_VS_2015/resource.h b/WINDOWS/IMB-IO_VS_2015/resource.h
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-IO_VS_2017/IMB-IO.rc b/WINDOWS/IMB-IO_VS_2017/IMB-IO.rc
index 28b415a4..ae9b59de 100644
--- a/WINDOWS/IMB-IO_VS_2017/IMB-IO.rc
+++ b/WINDOWS/IMB-IO_VS_2017/IMB-IO.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"		
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-IO"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-IO.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-IO_VS_2017/IMB-IO.vcxproj b/WINDOWS/IMB-IO_VS_2017/IMB-IO.vcxproj
index f1b2c84b..96fd2705 100644
--- a/WINDOWS/IMB-IO_VS_2017/IMB-IO.vcxproj
+++ b/WINDOWS/IMB-IO_VS_2017/IMB-IO.vcxproj
@@ -13,7 +13,7 @@
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8FDAF9E9-C2F3-443A-B86C-FC0A04DB7713}</ProjectGuid>
     <RootNamespace>IMBIO</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
+    <Keyword>Win32Proj</Keyword>
     <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
@@ -59,18 +59,18 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPIIO;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <TargetMachine>MachineX64</TargetMachine>
@@ -83,9 +83,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPIIO;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -93,7 +93,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -102,34 +102,46 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c">
-      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_file.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_open_close.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_io.c" />
-    <ClCompile Include="..\..\src\IMB_read.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-    <ClCompile Include="..\..\src\IMB_write.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\IO\IO_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\IO\IO_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_file.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_open_close.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_io.c" />
+    <ClCompile Include="..\..\src_c\IMB_read.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_c\IMB_write.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -138,4 +150,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.rc b/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.rc
index 4da83a95..57bcf000 100644
--- a/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.rc
+++ b/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-MPI1"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-MPI1.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.vcxproj b/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.vcxproj
index e598a017..a645996c 100644
--- a/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.vcxproj
+++ b/WINDOWS/IMB-MPI1_VS_2013/IMB-MPI1.vcxproj
@@ -1,154 +1,166 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
-    <RootNamespace>IMBMPI1</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v120</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v120</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
-    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
-    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
-    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
-    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
-    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
-    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
-    <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <MinimalRebuild>true</MinimalRebuild>
-      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level1</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <TargetMachine>MachineX64</TargetMachine>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
-    <ClCompile>
-      <Optimization>MaxSpeed</Optimization>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level1</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <OptimizeReferences>true</OptimizeReferences>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <TargetMachine>MachineX64</TargetMachine>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_allgather.c" />
-    <ClCompile Include="..\..\src\IMB_allgatherv.c" />
-    <ClCompile Include="..\..\src\IMB_allreduce.c" />
-    <ClCompile Include="..\..\src\IMB_alltoall.c" />
-    <ClCompile Include="..\..\src\IMB_alltoallv.c" />
-    <ClCompile Include="..\..\src\IMB_bandwidth.c" />
-    <ClCompile Include="..\..\src\IMB_barrier.c" />
-    <ClCompile Include="..\..\src\IMB_bcast.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_exchange.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_gather.c" />
-    <ClCompile Include="..\..\src\IMB_gatherv.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_mpi1.c" />
-    <ClCompile Include="..\..\src\IMB_pingping.c" />
-    <ClCompile Include="..\..\src\IMB_pingpong.c" />
-    <ClCompile Include="..\..\src\IMB_reduce.c" />
-    <ClCompile Include="..\..\src\IMB_reduce_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatterv.c" />
-    <ClCompile Include="..\..\src\IMB_sendrecv.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="resource.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="IMB-MPI1.rc" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
+    <RootNamespace>IMBMPI1</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_cpp\helpers;$(ProjectDir)\..\..\src_c;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\MPI1\MPI1_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\MPI1\MPI1_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_allgather.c" />
+    <ClCompile Include="..\..\src_c\IMB_allgatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_allreduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoall.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoallv.c" />
+    <ClCompile Include="..\..\src_c\IMB_bandwidth.c" />
+    <ClCompile Include="..\..\src_c\IMB_barrier.c" />
+    <ClCompile Include="..\..\src_c\IMB_bcast.c" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_exchange.c" />
+    <ClCompile Include="..\..\src_c\IMB_gather.c" />
+    <ClCompile Include="..\..\src_c\IMB_gatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_mpi1.c" />
+    <ClCompile Include="..\..\src_c\IMB_pingping.c" />
+    <ClCompile Include="..\..\src_c\IMB_pingpong.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatterv.c" />
+    <ClCompile Include="..\..\src_c\IMB_sendrecv.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
+    <ClInclude Include="resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="IMB-MPI1.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.rc b/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.rc
old mode 100755
new mode 100644
index 4da83a95..57bcf000
--- a/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.rc
+++ b/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-MPI1"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-MPI1.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.sln b/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.sln
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.vcxproj b/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.vcxproj
old mode 100755
new mode 100644
index 0947f9de..91a0f2a1
--- a/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.vcxproj
+++ b/WINDOWS/IMB-MPI1_VS_2015/IMB-MPI1.vcxproj
@@ -1,154 +1,166 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
-    <RootNamespace>IMBMPI1</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v140</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v140</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
-    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
-    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
-    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
-    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
-    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
-    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
-    <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <MinimalRebuild>true</MinimalRebuild>
-      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level1</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <TargetMachine>MachineX64</TargetMachine>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
-    <ClCompile>
-      <Optimization>MaxSpeed</Optimization>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level1</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <OptimizeReferences>true</OptimizeReferences>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <TargetMachine>MachineX64</TargetMachine>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_allgather.c" />
-    <ClCompile Include="..\..\src\IMB_allgatherv.c" />
-    <ClCompile Include="..\..\src\IMB_allreduce.c" />
-    <ClCompile Include="..\..\src\IMB_alltoall.c" />
-    <ClCompile Include="..\..\src\IMB_alltoallv.c" />
-    <ClCompile Include="..\..\src\IMB_bandwidth.c" />
-    <ClCompile Include="..\..\src\IMB_barrier.c" />
-    <ClCompile Include="..\..\src\IMB_bcast.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_exchange.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_gather.c" />
-    <ClCompile Include="..\..\src\IMB_gatherv.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_mpi1.c" />
-    <ClCompile Include="..\..\src\IMB_pingping.c" />
-    <ClCompile Include="..\..\src\IMB_pingpong.c" />
-    <ClCompile Include="..\..\src\IMB_reduce.c" />
-    <ClCompile Include="..\..\src\IMB_reduce_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatterv.c" />
-    <ClCompile Include="..\..\src\IMB_sendrecv.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="resource.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="IMB-MPI1.rc" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
+    <RootNamespace>IMBMPI1</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_cpp\helpers;$(ProjectDir)\..\..\src_c;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\MPI1\MPI1_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\MPI1\MPI1_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_allgather.c" />
+    <ClCompile Include="..\..\src_c\IMB_allgatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_allreduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoall.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoallv.c" />
+    <ClCompile Include="..\..\src_c\IMB_bandwidth.c" />
+    <ClCompile Include="..\..\src_c\IMB_barrier.c" />
+    <ClCompile Include="..\..\src_c\IMB_bcast.c" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_exchange.c" />
+    <ClCompile Include="..\..\src_c\IMB_gather.c" />
+    <ClCompile Include="..\..\src_c\IMB_gatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_mpi1.c" />
+    <ClCompile Include="..\..\src_c\IMB_pingping.c" />
+    <ClCompile Include="..\..\src_c\IMB_pingpong.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatterv.c" />
+    <ClCompile Include="..\..\src_c\IMB_sendrecv.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
+    <ClInclude Include="resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="IMB-MPI1.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/WINDOWS/IMB-MPI1_VS_2015/resource.h b/WINDOWS/IMB-MPI1_VS_2015/resource.h
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.rc b/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.rc
index 4da83a95..57bcf000 100644
--- a/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.rc
+++ b/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-MPI1"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-MPI1.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.vcxproj b/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.vcxproj
index 6d32ca12..3955e6b9 100644
--- a/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.vcxproj
+++ b/WINDOWS/IMB-MPI1_VS_2017/IMB-MPI1.vcxproj
@@ -1,155 +1,167 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
-    <RootNamespace>IMBMPI1</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
-    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <CharacterSet>MultiByte</CharacterSet>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
-    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
-    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
-    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
-    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
-    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
-    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
-    <ClCompile>
-      <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <MinimalRebuild>true</MinimalRebuild>
-      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level1</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <TargetMachine>MachineX64</TargetMachine>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Midl>
-      <TargetEnvironment>X64</TargetEnvironment>
-    </Midl>
-    <ClCompile>
-      <Optimization>MaxSpeed</Optimization>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level1</WarningLevel>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <SubSystem>Console</SubSystem>
-      <OptimizeReferences>true</OptimizeReferences>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <TargetMachine>MachineX64</TargetMachine>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_allgather.c" />
-    <ClCompile Include="..\..\src\IMB_allgatherv.c" />
-    <ClCompile Include="..\..\src\IMB_allreduce.c" />
-    <ClCompile Include="..\..\src\IMB_alltoall.c" />
-    <ClCompile Include="..\..\src\IMB_alltoallv.c" />
-    <ClCompile Include="..\..\src\IMB_bandwidth.c" />
-    <ClCompile Include="..\..\src\IMB_barrier.c" />
-    <ClCompile Include="..\..\src\IMB_bcast.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_exchange.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_gather.c" />
-    <ClCompile Include="..\..\src\IMB_gatherv.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_mpi1.c" />
-    <ClCompile Include="..\..\src\IMB_pingping.c" />
-    <ClCompile Include="..\..\src\IMB_pingpong.c" />
-    <ClCompile Include="..\..\src\IMB_reduce.c" />
-    <ClCompile Include="..\..\src\IMB_reduce_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatterv.c" />
-    <ClCompile Include="..\..\src\IMB_sendrecv.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="resource.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="IMB-MPI1.rc" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
+    <RootNamespace>IMBMPI1</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_cpp\helpers;$(ProjectDir)\..\..\src_c;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MPI1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\MPI1\MPI1_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\MPI1\MPI1_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ClCompile Include="..\..\src_c\IMB_allgather.c" />
+    <ClCompile Include="..\..\src_c\IMB_allgatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_allreduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoall.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoallv.c" />
+    <ClCompile Include="..\..\src_c\IMB_bandwidth.c" />
+    <ClCompile Include="..\..\src_c\IMB_barrier.c" />
+    <ClCompile Include="..\..\src_c\IMB_bcast.c" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_exchange.c" />
+    <ClCompile Include="..\..\src_c\IMB_gather.c" />
+    <ClCompile Include="..\..\src_c\IMB_gatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_mpi1.c" />
+    <ClCompile Include="..\..\src_c\IMB_pingping.c" />
+    <ClCompile Include="..\..\src_c\IMB_pingpong.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatterv.c" />
+    <ClCompile Include="..\..\src_c\IMB_sendrecv.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
+    <ClInclude Include="resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="IMB-MPI1.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/WINDOWS/IMB-MT_VS_2013/IMB-MT.rc b/WINDOWS/IMB-MT_VS_2013/IMB-MT.rc
new file mode 100644
index 00000000..bf7d4848
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2013/IMB-MT.rc
@@ -0,0 +1,102 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "CompanyName", "Intel Corporation"
+            VALUE "FileDescription", "Intel(R) MPI Benchmarks"
+            VALUE "FileVersion", "2019.0.0"
+            VALUE "InternalName", "IMB-MT"
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
+            VALUE "OriginalFilename", "IMB-MT.exe"
+            VALUE "ProductName", "Intel(R) MPI Benchmarks"
+            VALUE "ProductVersion", "2019.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/WINDOWS/IMB-MT_VS_2013/IMB-MT.sln b/WINDOWS/IMB-MT_VS_2013/IMB-MT.sln
new file mode 100644
index 00000000..f86d77d3
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2013/IMB-MT.sln
@@ -0,0 +1,19 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 11.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IMB-MT", "IMB-MT.vcxproj", "{72B11209-BF5B-4AF0-B38D-744C2FD082C3}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Debug|x64.ActiveCfg = Debug|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Debug|x64.Build.0 = Debug|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Release|x64.ActiveCfg = Release|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/WINDOWS/IMB-MT_VS_2013/IMB-MT.vcxproj b/WINDOWS/IMB-MT_VS_2013/IMB-MT.vcxproj
new file mode 100644
index 00000000..493b668c
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2013/IMB-MT.vcxproj
@@ -0,0 +1,130 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
+    <RootNamespace>IMBMT</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\MT\MT_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\MT\MT_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\MT\MT_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
+    <ClInclude Include="resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="IMB-MT.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/WINDOWS/IMB-MT_VS_2013/resource.h b/WINDOWS/IMB-MT_VS_2013/resource.h
new file mode 100644
index 00000000..8ba6e029
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2013/resource.h
@@ -0,0 +1,14 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by IMB-MT.rc
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1001
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/WINDOWS/IMB-MT_VS_2015/IMB-MT.rc b/WINDOWS/IMB-MT_VS_2015/IMB-MT.rc
new file mode 100644
index 00000000..bf7d4848
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2015/IMB-MT.rc
@@ -0,0 +1,102 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "CompanyName", "Intel Corporation"
+            VALUE "FileDescription", "Intel(R) MPI Benchmarks"
+            VALUE "FileVersion", "2019.0.0"
+            VALUE "InternalName", "IMB-MT"
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
+            VALUE "OriginalFilename", "IMB-MT.exe"
+            VALUE "ProductName", "Intel(R) MPI Benchmarks"
+            VALUE "ProductVersion", "2019.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/WINDOWS/IMB-MT_VS_2015/IMB-MT.sln b/WINDOWS/IMB-MT_VS_2015/IMB-MT.sln
new file mode 100644
index 00000000..f86d77d3
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2015/IMB-MT.sln
@@ -0,0 +1,19 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 11.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IMB-MT", "IMB-MT.vcxproj", "{72B11209-BF5B-4AF0-B38D-744C2FD082C3}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Debug|x64.ActiveCfg = Debug|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Debug|x64.Build.0 = Debug|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Release|x64.ActiveCfg = Release|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/WINDOWS/IMB-MT_VS_2015/IMB-MT.vcxproj b/WINDOWS/IMB-MT_VS_2015/IMB-MT.vcxproj
new file mode 100644
index 00000000..1838548b
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2015/IMB-MT.vcxproj
@@ -0,0 +1,130 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
+    <RootNamespace>IMBMT</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\MT\MT_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\MT\MT_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\MT\MT_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
+    <ClInclude Include="resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="IMB-MT.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/WINDOWS/IMB-MT_VS_2015/resource.h b/WINDOWS/IMB-MT_VS_2015/resource.h
new file mode 100644
index 00000000..8ba6e029
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2015/resource.h
@@ -0,0 +1,14 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by IMB-MT.rc
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1001
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/WINDOWS/IMB-MT_VS_2017/IMB-MT.rc b/WINDOWS/IMB-MT_VS_2017/IMB-MT.rc
new file mode 100644
index 00000000..bf7d4848
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2017/IMB-MT.rc
@@ -0,0 +1,102 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "CompanyName", "Intel Corporation"
+            VALUE "FileDescription", "Intel(R) MPI Benchmarks"
+            VALUE "FileVersion", "2019.0.0"
+            VALUE "InternalName", "IMB-MT"
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
+            VALUE "OriginalFilename", "IMB-MT.exe"
+            VALUE "ProductName", "Intel(R) MPI Benchmarks"
+            VALUE "ProductVersion", "2019.0.0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/WINDOWS/IMB-MT_VS_2017/IMB-MT.sln b/WINDOWS/IMB-MT_VS_2017/IMB-MT.sln
new file mode 100644
index 00000000..f86d77d3
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2017/IMB-MT.sln
@@ -0,0 +1,19 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 11.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IMB-MT", "IMB-MT.vcxproj", "{72B11209-BF5B-4AF0-B38D-744C2FD082C3}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Debug|x64.ActiveCfg = Debug|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Debug|x64.Build.0 = Debug|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Release|x64.ActiveCfg = Release|x64
+		{72B11209-BF5B-4AF0-B38D-744C2FD082C3}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/WINDOWS/IMB-MT_VS_2017/IMB-MT.vcxproj b/WINDOWS/IMB-MT_VS_2017/IMB-MT.vcxproj
new file mode 100644
index 00000000..a065082a
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2017/IMB-MT.vcxproj
@@ -0,0 +1,131 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{72B11209-BF5B-4AF0-B38D-744C2FD082C3}</ProjectGuid>
+    <RootNamespace>IMBMT</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
+    <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;MT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level1</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX64</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\MT\MT_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\MT\MT_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\MT\MT_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
+    <ClInclude Include="resource.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="IMB-MT.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/WINDOWS/IMB-MT_VS_2017/resource.h b/WINDOWS/IMB-MT_VS_2017/resource.h
new file mode 100644
index 00000000..8ba6e029
--- /dev/null
+++ b/WINDOWS/IMB-MT_VS_2017/resource.h
@@ -0,0 +1,14 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by IMB-MT.rc
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1001
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.rc b/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.rc
index 365ba5b3..8678addf 100644
--- a/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.rc
+++ b/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-NBC"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-NBC.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.vcxproj b/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.vcxproj
index d67aad49..97c8215a 100644
--- a/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.vcxproj
+++ b/WINDOWS/IMB-NBC_VS_2013/IMB-NBC.vcxproj
@@ -58,21 +58,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;NBC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -87,9 +86,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;NBC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -97,7 +96,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -106,50 +105,55 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_allgather.c" />
-    <ClCompile Include="..\..\src\IMB_allgatherv.c" />
-    <ClCompile Include="..\..\src\IMB_allreduce.c" />
-    <ClCompile Include="..\..\src\IMB_alltoall.c" />
-    <ClCompile Include="..\..\src\IMB_alltoallv.c" />
-    <ClCompile Include="..\..\src\IMB_barrier.c" />
-    <ClCompile Include="..\..\src\IMB_bcast.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_gather.c" />
-    <ClCompile Include="..\..\src\IMB_gatherv.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_nbc.c" />
-    <ClCompile Include="..\..\src\IMB_reduce.c" />
-    <ClCompile Include="..\..\src\IMB_reduce_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatterv.c" />
-    <ClCompile Include="..\..\src\IMB_sendrecv.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\NBC\NBC_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\NBC\NBC_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClCompile Include="..\..\src_c\IMB_allgather.c" />
+    <ClCompile Include="..\..\src_c\IMB_allgatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_allreduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoall.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoallv.c" />
+    <ClCompile Include="..\..\src_c\IMB_barrier.c" />
+    <ClCompile Include="..\..\src_c\IMB_bcast.c" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_gather.c" />
+    <ClCompile Include="..\..\src_c\IMB_gatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_nbc.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatterv.c" />
+    <ClCompile Include="..\..\src_c\IMB_sendrecv.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -158,4 +162,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.rc b/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.rc
old mode 100755
new mode 100644
index 365ba5b3..8678addf
--- a/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.rc
+++ b/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-NBC"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-NBC.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.sln b/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.sln
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.vcxproj b/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.vcxproj
old mode 100755
new mode 100644
index 11566363..9cfeea49
--- a/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.vcxproj
+++ b/WINDOWS/IMB-NBC_VS_2015/IMB-NBC.vcxproj
@@ -58,21 +58,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;NBC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -87,9 +86,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;NBC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -97,7 +96,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -106,50 +105,55 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_allgather.c" />
-    <ClCompile Include="..\..\src\IMB_allgatherv.c" />
-    <ClCompile Include="..\..\src\IMB_allreduce.c" />
-    <ClCompile Include="..\..\src\IMB_alltoall.c" />
-    <ClCompile Include="..\..\src\IMB_alltoallv.c" />
-    <ClCompile Include="..\..\src\IMB_barrier.c" />
-    <ClCompile Include="..\..\src\IMB_bcast.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_gather.c" />
-    <ClCompile Include="..\..\src\IMB_gatherv.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_nbc.c" />
-    <ClCompile Include="..\..\src\IMB_reduce.c" />
-    <ClCompile Include="..\..\src\IMB_reduce_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatterv.c" />
-    <ClCompile Include="..\..\src\IMB_sendrecv.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\NBC\NBC_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\NBC\NBC_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClCompile Include="..\..\src_c\IMB_allgather.c" />
+    <ClCompile Include="..\..\src_c\IMB_allgatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_allreduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoall.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoallv.c" />
+    <ClCompile Include="..\..\src_c\IMB_barrier.c" />
+    <ClCompile Include="..\..\src_c\IMB_bcast.c" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_gather.c" />
+    <ClCompile Include="..\..\src_c\IMB_gatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_nbc.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatterv.c" />
+    <ClCompile Include="..\..\src_c\IMB_sendrecv.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -158,4 +162,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-NBC_VS_2015/resource.h b/WINDOWS/IMB-NBC_VS_2015/resource.h
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.rc b/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.rc
index 365ba5b3..8678addf 100644
--- a/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.rc
+++ b/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-NBC"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-NBC.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.vcxproj b/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.vcxproj
index 4cbf2dee..01bc5d58 100644
--- a/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.vcxproj
+++ b/WINDOWS/IMB-NBC_VS_2017/IMB-NBC.vcxproj
@@ -13,7 +13,7 @@
   <PropertyGroup Label="Globals">
     <ProjectGuid>{4219D5A9-6972-4B1C-9F07-EB97EEF4EDDA}</ProjectGuid>
     <RootNamespace>IMBNBC</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
+    <Keyword>Win32Proj</Keyword>
     <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
@@ -59,21 +59,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;NBC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -88,9 +87,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;NBC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -98,7 +97,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -107,50 +106,55 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_allgather.c" />
-    <ClCompile Include="..\..\src\IMB_allgatherv.c" />
-    <ClCompile Include="..\..\src\IMB_allreduce.c" />
-    <ClCompile Include="..\..\src\IMB_alltoall.c" />
-    <ClCompile Include="..\..\src\IMB_alltoallv.c" />
-    <ClCompile Include="..\..\src\IMB_barrier.c" />
-    <ClCompile Include="..\..\src\IMB_bcast.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_gather.c" />
-    <ClCompile Include="..\..\src\IMB_gatherv.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_nbc.c" />
-    <ClCompile Include="..\..\src\IMB_reduce.c" />
-    <ClCompile Include="..\..\src\IMB_reduce_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatter.c" />
-    <ClCompile Include="..\..\src\IMB_scatterv.c" />
-    <ClCompile Include="..\..\src\IMB_sendrecv.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\NBC\NBC_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\NBC\NBC_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClCompile Include="..\..\src_c\IMB_allgather.c" />
+    <ClCompile Include="..\..\src_c\IMB_allgatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_allreduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoall.c" />
+    <ClCompile Include="..\..\src_c\IMB_alltoallv.c" />
+    <ClCompile Include="..\..\src_c\IMB_barrier.c" />
+    <ClCompile Include="..\..\src_c\IMB_bcast.c" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_gather.c" />
+    <ClCompile Include="..\..\src_c\IMB_gatherv.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_nbc.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce.c" />
+    <ClCompile Include="..\..\src_c\IMB_reduce_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatter.c" />
+    <ClCompile Include="..\..\src_c\IMB_scatterv.c" />
+    <ClCompile Include="..\..\src_c\IMB_sendrecv.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -159,4 +163,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.rc b/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.rc
index 7423daeb..ef0624dc 100644
--- a/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.rc
+++ b/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-RMA"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-RMA.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.vcxproj b/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.vcxproj
index 54737f16..a96671db 100644
--- a/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.vcxproj
+++ b/WINDOWS/IMB-RMA_VS_2013/IMB-RMA.vcxproj
@@ -58,21 +58,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;RMA;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -87,9 +86,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;RMA;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -97,7 +96,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -106,40 +105,45 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_rma.c" />
-    <ClCompile Include="..\..\src\IMB_rma_atomic.c" />
-    <ClCompile Include="..\..\src\IMB_rma_get.c" />
-    <ClCompile Include="..\..\src\IMB_rma_put.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\RMA\RMA_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\RMA\RMA_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_rma.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_atomic.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_get.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_put.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -148,4 +152,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.rc b/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.rc
old mode 100755
new mode 100644
index 7423daeb..ef0624dc
--- a/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.rc
+++ b/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-RMA"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-RMA.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.sln b/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.sln
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.vcxproj b/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.vcxproj
old mode 100755
new mode 100644
index 79d92da2..ab245896
--- a/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.vcxproj
+++ b/WINDOWS/IMB-RMA_VS_2015/IMB-RMA.vcxproj
@@ -58,21 +58,18 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_cpp\helpers;$(ProjectDir)\..\..\src_c;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;RMA;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
-      <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -87,9 +84,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_cpp\helpers;$(ProjectDir)\..\..\src_c;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;RMA;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -97,7 +94,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -106,40 +103,45 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_rma.c" />
-    <ClCompile Include="..\..\src\IMB_rma_atomic.c" />
-    <ClCompile Include="..\..\src\IMB_rma_get.c" />
-    <ClCompile Include="..\..\src\IMB_rma_put.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\RMA\RMA_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\RMA\RMA_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_rma.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_atomic.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_get.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_put.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
diff --git a/WINDOWS/IMB-RMA_VS_2015/resource.h b/WINDOWS/IMB-RMA_VS_2015/resource.h
old mode 100755
new mode 100644
diff --git a/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.rc b/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.rc
index 7423daeb..ef0624dc 100644
--- a/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.rc
+++ b/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.rc
@@ -53,8 +53,8 @@ END
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 2018,0,1,0
- PRODUCTVERSION 2018,0,1,0
+ FILEVERSION 2019,0,0,0
+ PRODUCTVERSION 2019,0,0,0
  FILEFLAGSMASK 0x17L
 #ifdef _DEBUG
  FILEFLAGS 0x1L
@@ -71,12 +71,12 @@ BEGIN
         BEGIN
             VALUE "CompanyName", "Intel Corporation"
             VALUE "FileDescription", "Intel(R) MPI Benchmarks"
-            VALUE "FileVersion", "2018.0.1"
+            VALUE "FileVersion", "2019.0.0"
             VALUE "InternalName", "IMB-RMA"
-            VALUE "LegalCopyright", "Copyright (C) 2003-2017 Intel Corporation. All rights reserved."
+            VALUE "LegalCopyright", "Copyright 2003-2018 Intel Corporation."
             VALUE "OriginalFilename", "IMB-RMA.exe"
             VALUE "ProductName", "Intel(R) MPI Benchmarks"
-            VALUE "ProductVersion", "2018.0.1"
+            VALUE "ProductVersion", "2019.0.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.vcxproj b/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.vcxproj
index 931d68d9..987b018a 100644
--- a/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.vcxproj
+++ b/WINDOWS/IMB-RMA_VS_2017/IMB-RMA.vcxproj
@@ -13,7 +13,7 @@
   <PropertyGroup Label="Globals">
     <ProjectGuid>{4219D5A9-6972-4B1C-9F07-EB97EEF4EDDA}</ProjectGuid>
     <RootNamespace>IMBRMA</RootNamespace>
-    <Keyword>Win32Proj</Keyword>
+    <Keyword>Win32Proj</Keyword>
     <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
@@ -59,21 +59,20 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;RMA;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level1</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <CompileAs>CompileAsC</CompileAs>
       <ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\debug\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <ImportLibrary>
@@ -88,9 +87,9 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%I_MPI_ROOT%\intel64\include;$(ProjectDir)\..\..\src_cpp;$(ProjectDir)\..\..\src_c;$(ProjectDir)\..\..\src_cpp\helpers;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN_IMB;_CRT_SECURE_NO_DEPRECATE;RMA;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <PrecompiledHeader>
       </PrecompiledHeader>
@@ -98,7 +97,7 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>%I_MPI_ROOT%\intel64\lib\release\impi.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <OptimizeReferences>true</OptimizeReferences>
@@ -107,40 +106,45 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\src\IMB.c" />
-    <ClCompile Include="..\..\src\IMB_benchlist.c" />
-    <ClCompile Include="..\..\src\IMB_chk_diff.c" />
-    <ClCompile Include="..\..\src\IMB_cpu_exploit.c" />
-    <ClCompile Include="..\..\src\IMB_declare.c" />
-    <ClCompile Include="..\..\src\IMB_err_handler.c" />
-    <ClCompile Include="..\..\src\IMB_g_info.c" />
-    <ClCompile Include="..\..\src\IMB_init.c" />
-    <ClCompile Include="..\..\src\IMB_init_transfer.c" />
-    <ClCompile Include="..\..\src\IMB_mem_manager.c" />
-    <ClCompile Include="..\..\src\IMB_output.c" />
-    <ClCompile Include="..\..\src\IMB_parse_name_rma.c" />
-    <ClCompile Include="..\..\src\IMB_rma_atomic.c" />
-    <ClCompile Include="..\..\src\IMB_rma_get.c" />
-    <ClCompile Include="..\..\src\IMB_rma_put.c" />
-    <ClCompile Include="..\..\src\IMB_strgs.c" />
-    <ClCompile Include="..\..\src\IMB_user_set_info.c" />
-    <ClCompile Include="..\..\src\IMB_utils.c" />
-    <ClCompile Include="..\..\src\IMB_warm_up.c" />
+    <ClCompile Include="..\..\src_cpp\imb.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser.cpp" />
+    <ClCompile Include="..\..\src_cpp\args_parser_utests.cpp" />
+    <ClCompile Include="..\..\src_cpp\scope.cpp" />
+    <ClCompile Include="..\..\src_cpp\RMA\RMA_suite.cpp" />
+    <ClCompile Include="..\..\src_cpp\RMA\RMA_benchmark.cpp" />
+    <ClCompile Include="..\..\src_cpp\benchmark_suites_collection.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="..\..\src\IMB_appl_errors.h" />
-    <ClInclude Include="..\..\src\IMB_benchmark.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_ext.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_io.h" />
-    <ClInclude Include="..\..\src\IMB_bnames_mpi1.h" />
-    <ClInclude Include="..\..\src\IMB_comm_info.h" />
-    <ClInclude Include="..\..\src\IMB_comments.h" />
-    <ClInclude Include="..\..\src\IMB_declare.h" />
-    <ClInclude Include="..\..\src\IMB_err_check.h" />
-    <ClInclude Include="..\..\src\IMB_mem_info.h" />
-    <ClInclude Include="..\..\src\IMB_prototypes.h" />
-    <ClInclude Include="..\..\src\IMB_settings.h" />
-    <ClInclude Include="..\..\src\IMB_settings_io.h" />
+    <ClCompile Include="..\..\src_c\IMB_benchlist.c" />
+    <ClCompile Include="..\..\src_c\IMB_chk_diff.c" />
+    <ClCompile Include="..\..\src_c\IMB_cpu_exploit.c" />
+    <ClCompile Include="..\..\src_c\IMB_declare.c" />
+    <ClCompile Include="..\..\src_c\IMB_err_handler.c" />
+    <ClCompile Include="..\..\src_c\IMB_g_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_init.c" />
+    <ClCompile Include="..\..\src_c\IMB_init_transfer.c" />
+    <ClCompile Include="..\..\src_c\IMB_mem_manager.c" />
+    <ClCompile Include="..\..\src_c\IMB_output.c" />
+    <ClCompile Include="..\..\src_c\IMB_parse_name_rma.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_atomic.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_get.c" />
+    <ClCompile Include="..\..\src_c\IMB_rma_put.c" />
+    <ClCompile Include="..\..\src_c\IMB_strgs.c" />
+    <ClCompile Include="..\..\src_c\IMB_user_set_info.c" />
+    <ClCompile Include="..\..\src_c\IMB_utils.c" />
+    <ClCompile Include="..\..\src_c\IMB_warm_up.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src_cpp\helpers\original_benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\helpers\helper_IMB_functions.h" />
+    <ClInclude Include="..\..\src_cpp\args_parser.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite_base.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suite.h" />
+    <ClInclude Include="..\..\src_cpp\benchmark_suites_collection.h" />
+    <ClInclude Include="..\..\src_cpp\smart_ptr.h" />
+    <ClInclude Include="..\..\src_cpp\utils.h" />
+    <ClInclude Include="..\..\src_cpp\scope.h" />
     <ClInclude Include="resource.h" />
   </ItemGroup>
   <ItemGroup>
@@ -149,4 +153,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/WINDOWS/mkvsprojs.sh b/WINDOWS/mkvsprojs.sh
new file mode 100755
index 00000000..00a17677
--- /dev/null
+++ b/WINDOWS/mkvsprojs.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+#IMB-EXT_VS
+PROJ=$1
+if [ ! -d ${PROJ}_2013 ]; then echo "ERROR"; exit 1; fi
+if [ -d ${PROJ}_2015 ]; then rm -rf ${PROJ}_2015; fi
+if [ -d ${PROJ}_2017 ]; then rm -rf ${PROJ}_2017; fi
+
+cp -r ${PROJ}_2013 ${PROJ}_2015
+cp -r ${PROJ}_2013 ${PROJ}_2017
+
+VCXPROJ=${PROJ}_2013/*.vcxproj
+if [ ! -f $VCXPROJ ]; then echo "ERROR"; exit 1; fi
+
+VCXPROJ_2015=${PROJ}_2015/*.vcxproj
+VCXPROJ_2017=${PROJ}_2017/*.vcxproj
+
+sed -i 's!ToolsVersion="12.0"!ToolsVersion="14.0"!' $VCXPROJ_2015
+sed -i 's!Toolset>v120<!Toolset>v140<!' $VCXPROJ_2015
+
+sed -i 's!ToolsVersion="12.0"!ToolsVersion="15.0"!' $VCXPROJ_2017
+sed -i 's!<Keyword>Win32Proj</Keyword>!<Keyword>Win32Proj</Keyword>\n    <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>!' $VCXPROJ_2017
+sed -i 's!Toolset>v120<!Toolset>v141<!' $VCXPROJ_2017
diff --git a/src/GNUmakefile b/src/GNUmakefile
deleted file mode 100755
index b1aaa6e7..00000000
--- a/src/GNUmakefile
+++ /dev/null
@@ -1,47 +0,0 @@
-default: MPI1
-
-all: EXT IO MPI1 NBC RMA
-
-MPI1: feedback
-	make -f make_ict IMB-MPI1
-
-EXT:
-	make -f make_ict IMB-EXT
-
-IO:
-	make -f make_ict IMB-IO
-
-NBC:
-	make -f make_ict IMB-NBC
-
-RMA:
-	make -f make_ict IMB-RMA
-
-feedback:
-	@echo " "
-	@echo "This Makefile uses make_ict to build IMB "
-	@echo "based on Intel(R) Parallel Studio"
-	@echo " "
-	@echo "Targets: MPI1 (default), EXT, IO, NBC, all, clean"
-	@echo "         MPI1 builds the MPI-1 standard benchmarks"
-	@echo "         EXT builds the one sided communications benchmarks"
-	@echo "         IO builds the MPI-File I\/O benchmarks"
-	@echo "         NBC builds the nonblocking collective operations benchmarks"
-	@echo "         RMA builds the one-sided benchmarks (MPI-3 plus passive mode)"
-	@echo "         all = MPI1+EXT+IO+NBC+RMA"
-	@echo " "
-	@echo "When an Intel(R) MPI Library install and mpiicc path exists,"
-	@echo "this should work immediately."
-	@echo " "
-	@echo "Alternatively, use "
-	@echo " "
-	@echo "gmake -f make_mpich"
-	@echo " "
-	@echo "to install an mpich or similar version; for this,"
-	@echo "you normally have to edit at least the MPI_HOME"
-	@echo "variable provided in make_mpich"
-	@echo " "
-
-clean:
-	/bin/rm -rf *.o *~ PI* core IMB-IO IMB-EXT IMB-MPI1 IMB-NBC IMB-RMA exe_io exe_ext exe_mpi1 exe_nbc exe_rma IMB_out IMB_out_* build_MPIIO build_MPI1 build_EXT build_NBC build_RMA
-
diff --git a/src/IMB.c b/src/IMB.c
deleted file mode 100644
index a3e4798d..00000000
--- a/src/IMB.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB.c 
-
- Implemented functions: 
-
- main;
-
- ***************************************************************************/
-
-#include "mpi.h"
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-extern int num_alloc, num_free;
-
-
-/**********************************************************************/
-
-
-
-int main(int argc, char **argv)
-/*
-
-
-
-Input variables: 
-
--argc                 (type int)
-                      Number of command line arguments
-                      
-
--argv                 (type char **)                      
-                      List of command line arguments
-                      
-
-
-Return value          (type int)                      
-                      0 always
-                      
-
-
-*/
-{
-    struct comm_info C_INFO;   /* BASIC SETTINGS STRUCTURE    */
-    struct Bench* BList;       /* List of Benchmarks          */
-    
-    struct Bench* Bmark;
-    
-    #ifdef CHECK
-    int NFAIL, NSUCCESS;
-    #endif
-    
-    char 	*p;
-    int	i,j,iter,imod,ierr;
-    int	NP,NP_min;
-    int	do_it  ; 
-    
-    int	header;              
-    int	size;             
-    int	MAXMSG;
-    int	x_sample,n_sample;            
-    int ci_np; /* number of procs adjusted in case of ParallelTransferMsgRate benchmark */
-    /* IMB 3.1 << */
-    struct iter_schedule ITERATIONS;
-    int 	mem_ok;
-    /* >> IMB 3.1  */
-    MODES 	BMODE;            
-    double  time[MAX_TIME_ID];
-    
-    Type_Size unit_size;
-
-#ifdef USE_MPI_INIT_THREAD
-    if( (ierr=MPI_Init_thread(&argc,&argv,mpi_thread_desired,&mpi_thread_environment))!=MPI_SUCCESS) IMB_err_hand(1, ierr);
-#else
-    if( (ierr=MPI_Init(&argc,&argv))!=MPI_SUCCESS) IMB_err_hand(1, ierr);
-#endif /*USE_MPI_INIT_THREAD*/
-
-    IMB_set_default(&C_INFO);
-
-    IMB_init_pointers(&C_INFO);
-
-    /* IMB 3.1 << */
-    if( IMB_basic_input(&C_INFO,&BList,&ITERATIONS,&argc,&argv,&NP_min)<0 )
-    /* >> IMB 3.1  */
-    {
-	/* IMB_3.0: help mode */
-	if( C_INFO.w_rank==0 ){
-	    IMB_help();
-	}
-
-	MPI_Barrier(MPI_COMM_WORLD);
-	IMB_free_all(&C_INFO, &BList, &ITERATIONS);
-	MPI_Finalize();
-	return 0;
-    }
-
-    /* IMB 3.1 << */
-    IMB_show_selections(&C_INFO,BList,&argc,&argv);
-    /* >> IMB 3.1  */
-
-    /* LOOP OVER INDIVIDUAL BENCHMARKS */
-    j=0;
-    while( (p=BList[j].name) )
-    {
-	Bmark = BList+j;
-
-    ci_np = C_INFO.w_num_procs;
-    if( Bmark->RUN_MODES[0].type == ParallelTransferMsgRate )
-    {
-        ci_np -= ci_np % 2;
-        NP_min += NP_min % 2;
-    }
-	if( Bmark->RUN_MODES[0].type != BTYPE_INVALID )
-	{
-	    NP=max(1,min(ci_np,NP_min));
-
-	    if( Bmark->RUN_MODES[0].type == SingleTransfer || Bmark->RUN_MODES[0].type == SingleElementTransfer) 
-	    {
-#ifndef MPIIO
-		NP = (min(2,ci_np));
-#else
-		NP = 1;
-#endif
-	    }
-
-	    do_it = 1;
-/* LOOP OVER PROCESS NUMBERS */
-	    while ( do_it )
-	    {
-		if ( IMB_valid(&C_INFO,Bmark,NP) )
-		{
-		    if(IMB_init_communicator(&C_INFO,NP)!=0) IMB_err_hand(0,-1);
-#ifdef MPIIO
-		    if(IMB_init_file(&C_INFO,Bmark,&ITERATIONS,NP)!=0) IMB_err_hand(0,-1);
-#endif
-
-#ifdef RMA            
-            /* Now when communicator/s is created, update scaling of reported timing values.
-             * Some of RMA benchmarks accesses memory of all other processes, so their 
-             * bandwidth results should be update accordingly */
-             IMB_adjust_timings_scale(&C_INFO, Bmark);            
-#endif            
-		    /* MINIMAL OUTPUT IF UNIT IS GIVEN */
-		    if (C_INFO.w_rank==0)
-		    {
-			if(unit != stdout) 
-			    printf("# Running %s; see file \"%s\" for results\n",p,OUTPUT_FILENAME);
-		    };
-		    header=1;
-
-#ifdef EXT
-		    MPI_Type_size(C_INFO.red_data_type,&unit_size);
-#else
-        
-		    if (Bmark->reduction || 
-                Bmark->RUN_MODES[0].type == SingleElementTransfer)
-            {    
-			    MPI_Type_size(C_INFO.red_data_type,&unit_size);
-            }    
-		    else
-            {
-                MPI_Type_size(C_INFO.s_data_type,&unit_size);
-            }    
-#endif
-
-		    MAXMSG=(1<<C_INFO.max_msg_log)/unit_size* unit_size;
-
-		    for( imod=0; imod < Bmark->N_Modes; imod++ )
-		    {
-                double sample_time  = MPI_Wtime();
-                int    time_limit[] = {0, 0};
-
-			BMODE=&Bmark->RUN_MODES[imod];
-
-			/* IMB 3.1 << */
-			// x_sample calc => IMB_init_buffers_iter
-			/* >> IMB 3.1  */
-
-			header=header | (imod > 0);
-	
-			iter=0;
-			size =0;
-
-            Bmark->sample_failure = 0;
-            /* LOOP OVER MESSAGE LENGTHS */
-            while ( ((C_INFO.n_lens == 0 && size < MAXMSG ) ||
-                     (C_INFO.n_lens > 0  && iter < C_INFO.n_lens))
-                    && (Bmark->sample_failure != SAMPLE_FAILED_TIME_OUT) )
-            {
-                if (Bmark->RUN_MODES[0].type == SingleElementTransfer) {
-                    /* just one size needs to be tested (the size of one element) */
-                    MPI_Type_size(C_INFO.red_data_type,&size);
-                } else {
-                    if (C_INFO.n_lens > 0) {
-                        size = C_INFO.msglen[iter];
-                    } else {
-                        if( iter == 0 ) {
-                            size = 0;
-                        } else if (iter == 1) {
-                            size = ((1<<C_INFO.min_msg_log) + unit_size - 1)/unit_size*unit_size;
-#ifdef EXT
-                            size = min(size, asize);
-#endif
-                        } else {
-                            size = min(MAXMSG,size+size);
-                        }
-                    }
-                }
-
-                if (size > MAXMSG) {
-                    if (C_INFO.w_rank == 0) {
-                        fprintf(unit,"Attention, msg size %d truncated to %d\n", size,MAXMSG);
-                    }
-                    size = MAXMSG;
-                } 
-
-                size = (size+unit_size-1)/unit_size*unit_size;
-
-                if( Bmark->RUN_MODES[0].type == Sync ) {
-                    size = MAXMSG;
-                    iter = C_INFO.n_lens - 1;
-                }
-
-                /* IMB 3.1 << */
-                // put some initialization stuff into:
-                IMB_init_buffers_iter(&C_INFO, &ITERATIONS, Bmark, BMODE, iter, size);
-
-                if (!Bmark->sample_failure) {
-
-                    time_limit[1] = 0;
-
-                    if (C_INFO.rank >= 0) {
-                        time_limit[1] = (MPI_Wtime() - sample_time < max(max(C_INFO.n_lens, C_INFO.max_msg_log - C_INFO.min_msg_log) - 1, 1)*ITERATIONS.secs) ? 0 : 1;
-                    }
-                }
-
-                MPI_Allreduce(&time_limit[1], &time_limit[0], 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
-
-                if (time_limit[0]) {
-                    Bmark->sample_failure = SAMPLE_FAILED_TIME_OUT;
-                }
-
-                if( !Bmark->sample_failure ) {
-                    IMB_warm_up  (&C_INFO,Bmark,&ITERATIONS,iter);                    
-#ifdef MPI1
-                    C_INFO.select_source = Bmark->select_source;
-#endif
-                    Bmark->Benchmark(&C_INFO,size,&ITERATIONS,BMODE,time);
-                }
-                /* >> IMB 3.1  */
-
-                /* Synchronization, in particular for idle processes
-                which have to wait in a well defined manner */
-                MPI_Barrier(MPI_COMM_WORLD);
-
-                /* IMB 3.1 << */
-                IMB_output   (&C_INFO,Bmark,BMODE,header,size,&ITERATIONS,time);
-                /* >> IMB 3.1  */
-
-                IMB_close_transfer(&C_INFO, Bmark, size);
-
-                CHK_BRK;
-
-                header = 0;
-                if( Bmark->RUN_MODES[0].type == SingleElementTransfer )
-                    break;
-                iter++;
-            } /*while ( (C_INFO.n_lens...*/
-
-            if( !Bmark->success && C_INFO.w_rank == 0 ) {
-                fprintf(unit,"\n\n!!! Benchmark unsuccessful !!!\n\n");
-            }
-
-            CHK_BRK;
-            } /*for( imod=0*/
-
-            CHK_BRK;
-        } /*if ( IMB_valid(&C_INFO,Bmark,NP) )*/
-
-		/* CALCULATE THE NUMBER OF PROCESSES FOR NEXT STEP */
-		if( NP >= ci_np  ) {do_it=0;}
-        else
-		{
-		    NP=min(NP+NP,ci_np);
-		}	  
-
-#ifdef MPIIO
-		if( Bmark->RUN_MODES[0].type == SingleTransfer ) do_it = 0;
-#endif
-	
-		CHK_BRK;
-	    } /*while ( do_it )*/
-	} /*if( Bmark->RUN_MODES[0].type != BTYPE_INVALID )*/
-
-	CHK_BRK;
-	j++;
-    } /*while( (p=BList[j].name) )*/
-
-#ifdef CHECK
-    if( C_INFO.w_rank == 0 )
-    {
-	j=0;
-	NFAIL=0;
-	NSUCCESS=0;
-
-	while( (p=BList[j].name) )
-	{
-	    Bmark = BList+j;
-
-	    if( Bmark->RUN_MODES[0].type != BTYPE_INVALID )
-	    {
-		Bmark = BList+j;
-		if( !Bmark->success ) NFAIL ++;
-		else                  NSUCCESS++;
-	    } 
-	    j++;
-	}
-    
-	if( NFAIL == 0 && NSUCCESS>0 )
-	{
-	    fprintf(unit,"\n\n!!!!  ALL BENCHMARKS SUCCESSFUL !!!! \n\n");
-	}
-	else if( NSUCCESS>0 )
-	{
-	    if( NFAIL == 1 )
-		fprintf(unit,"\n\n!!!!  %d  BENCHMARK FAILED     !!!! \n\n",NFAIL);
-	    else
-		fprintf(unit,"\n\n!!!!  %d  BENCHMARKS FAILED     !!!! \n\n",NFAIL);
-
-	    j=0;
-
-	    while( (p=BList[j].name) )
-	    {
-		Bmark = BList+j;
-		if( Bmark->RUN_MODES[0].type != BTYPE_INVALID )
-		{
-		    if( Bmark->success ) fprintf(unit,"%s    : Successful\n",p);
-		    else                 fprintf(unit,"%s    : FAILED !! \n",p);
-		} 
-		j++;
-	    }
-	} /*else if( NSUCCESS>0 )*/
-    } /*if( C_INFO.w_rank == 0 )*/
-#endif /*#ifdef CHECK*/
-
-
-    /* IMB 3.1 << */
-    IMB_free_all(&C_INFO, &BList, &ITERATIONS);
-
-#ifdef CHECK
-    if( num_alloc == num_free ){
-	ierr=0;
-    }
-    else
-    {
-	fprintf(stderr,"pr %d: calls to IMB_v_alloc %d / IMB_v_free %d (doesn't seem ok, are unequal!)\n",C_INFO.w_rank,num_alloc,num_free);
-	ierr=1;
-    }
-
-    MPI_Allreduce(&ierr,&mem_ok,1,MPI_INT,MPI_MAX,MPI_COMM_WORLD);
-
-    if( C_INFO.w_rank==0 )
-    {
-	if( mem_ok == 0 ) 
-	{
-	    fprintf(stderr,"# of calls to IMB_v_alloc / IMB_v_free match on all processes\n");
-	}
-    }
-#endif /*#ifdef CHECK*/
-
-    MPI_Barrier(MPI_COMM_WORLD);
-    IMB_end_msg(&C_INFO);
-
-    /* >> IMB 3.1  */
-    MPI_Finalize();
-
-    return 0;
-} /* end of main*/
diff --git a/src/IMB_cache.h b/src/IMB_cache.h
deleted file mode 100644
index f9124729..00000000
--- a/src/IMB_cache.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define CACHE_SIZE 1024
-#define CACHE_LINE_SIZE 64
diff --git a/src/IMB_chk_diff.c b/src/IMB_chk_diff.c
deleted file mode 100644
index 577d816e..00000000
--- a/src/IMB_chk_diff.c
+++ /dev/null
@@ -1,1548 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-
- A collection of auxiliary functions ONLY for the CHECK mode of IMB
- ===================================================================
-
- File: IMB_chk_diff.c 
-
- Implemented functions: 
-
- IMB_chk_dadd;
- IMB_ddiff;
- IMB_show;
- IMB_err_msg;
- IMB_chk_diff;
- IMB_cmp_cat;
- IMB_chk_contiguous;
- IMB_chk_distr;
- IMB_chk_contained;
- IMB_compute_crc;
-
- ***************************************************************************/
-
-
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-
-#include "IMB_prototypes.h"
-
-#include <limits.h>
-
-
-void IMB_chk_dadd(void* AUX, int Locsize, size_t buf_pos, 
-                  int rank0, int rank1)
-/*
-
-                      
-                      Auxiliary, only for checking mode; 
-                      Creates reference accumulated values in a given 
-                      buffer section, accumulated over given processor ranks 
-                      
-
-
-Input variables: 
-
--Locsize              (type int)                      
-                      Size of buffer section to check
-                      
-
--buf_pos              (type int)                      
-                      Start position of buffer section
-                      
-
--rank0                (type int)
--rank1                (type int)                      
-                      Process' values between rank0 and rank1 are accumulated
-                      
-
-
-In/out variables: 
-
--AUX                  (type void*)                      
-                      Contains accumulated values
-                      
-
-
-*/
-{
-/* Sum up all process' data buffers in certain window */
-    int  rank;
-    size_t i;
-
-    for(i=0; i<Locsize/asize; i++)
-	((assign_type*)AUX)[i]  = 0.;
-
-    for(rank = rank0; rank<= rank1; rank++)
-    {
-	for(i=0; i<Locsize/asize; i++)
-	    ((assign_type*)AUX)[i] += BUF_VALUE(rank,buf_pos/asize+i);
-    }
-}
-
-
-double IMB_ddiff(assign_type *A, assign_type *B, size_t len, 
-                 size_t *fault_pos)
-/*
-
-                      
-                      Compares the values of 2 buffers A, B and returns max. diff
-                      
-
-
-Input variables: 
-
--A                    (type assign_type *)                      
-                      Buffer of values
-                      
-
--B                    (type assign_type *)                      
-                      Another buffer of values to be checked against A
-                      
-
--len                  (type int)                      
-                      Length (in assign_type items) of A, B
-                      
-
-
-Output variables: 
-
--fault_pos            (type int *)                      
-                      Position of first non tolerable deviation
-                      
-
-
-Return value          (type double)                      
-                      Deviation of A and B
-                      
-
-
-*/
-{
-/* max. relative difference of vectors A/B */
-    double D,d1, rel;
-    size_t i;
-
-    D=0.;
-
-    d1 = -1.;
-    *fault_pos = CHK_NO_FAULT;  
-
-    if( len > 0 )
-    {
-	for(i =0; i<len && d1 <= TOL; i++)
-	{
-	    if( A[i] != 0. ) rel=A_ABS(A[i]);
-	    else rel=1.;
- 
-	    d1 = A_ABS(A[i]-B[i])/rel;
-
-	}
-	D = d1;
-
-	if( D  > TOL ) 
-	{
-	    D=1.;
-	    IMB_Assert(i>0);
-	    *fault_pos = (i-1)*asize;
-	}
-
-    }
-
-    return D;
-
-}
-
-
-void IMB_show(char* text, struct comm_info* c_info, void* buf, 
-              size_t loclen, size_t totlen, int j_sample, 
-              POSITIONING fpos)
-/*
-
-                      
-                      Shows an excerpt of erroneous buffer if occurs in check mode
-                      
-
-
-Input variables: 
-
--text                 (type char*)                      
-                      Accompanying text to put out
-                      
-
--loclen               (type int)                      
-                      Local length of buffer
-                      
-
--totlen               (type int)                      
-                      Total length of buffer (for gathered or shared access buffers)
-                      
-
--j_sample             (type int)                      
-                      Number of sample the error occurred
-                      
-
--fpos                 (type POSITIONING)                      
-                      File positionning (if relevant)
-                      
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--buf                  (type void*)                      
-                      Given check buffer
-                      
-
-
-*/
-{
-    size_t i;
-
-    fprintf(unit,"Process %d: %s",c_info->rank,text);
-    fprintf(unit,"\n");
-
-#ifdef DEBUG
-    size_t j;
-    fprintf(dbg_file,"Process %d: %s",c_info->rank,text);
-    fprintf(dbg_file,"\n");
-#endif /*DEBUG*/
-
-#ifdef MPIIO
-    {
-	MPI_Offset Offset;
-	switch(fpos)
-	{
-	    case indv_block:
-		Offset = (MPI_Offset)(j_sample*totlen);
-		break;
-
-	    case explicit:
-		Offset = c_info->split.Offset+(MPI_Offset)(j_sample*totlen);
-		break;
-
-	    case private:
-		Offset = (MPI_Offset)(j_sample*loclen);
-		break;
-
-	    case shared:
-		Offset = (MPI_Offset)(-1);
-		break;
-	}
-
-	if( fpos == shared )
-	    fprintf(unit,
-#ifdef WIN_IMB
-		    "Overall size = %I64u,"
-		    " Portion = %I64u,"
-#else
-		    "Overall size = %lu,"
-		    " Portion = %lu,"
-#endif /* WIN_IMB*/
-		    " #sample= %d\n",
-		    totlen,loclen,j_sample);
-	else if( fpos != -1 )
-	    fprintf(unit,
-#ifdef WIN_IMB
-		    "Overall size = %I64u,"
-		    " Portion = %I64u,"
-#else
-		    "Overall size = %lu,"
-		    " Portion = %lu,"
-#endif /* WIN_IMB*/
-		    " Startpos = %ld\n",
-		    totlen,loclen,(long)Offset);
-
-#ifdef DEBUG
-	if( fpos == shared )
-	    fprintf(dbg_file,
-#ifdef WIN_IMB
-			    "Overall size = %I64u,"
-			    " Portion = %I64u,"
-#else
-			    "Overall size = %lu,"
-			    " Portion = %lu,"
-#endif /*WIN_IMB*/
-			    " #sample= %d\n",
-			    totlen,loclen,j_sample);
-	else if( fpos != -1 )
-	    fprintf(dbg_file,
-#ifdef WIN_IMB
-			    "Overall size = %I64u,"
-			    " Portion = %I64u,"
-#else
-			    "Overall size = %lu,"
-			    " Portion = %lu,"
-#endif /*WIN_IMB*/
-			    " Startpos = %ld\n",
-			    totlen,loclen,(long)Offset);
-#endif /*DEBUG*/
-
-    }
-#endif /*MPIIO*/
-
-    if( loclen < asize )
-    {
-	if( loclen == 0 )
-	{
-	    fprintf(unit,"Buffer empty\n");
-#ifdef DEBUG
-	    fprintf(dbg_file,"Buffer empty\n");
-#endif
-	}
-	else
-	{
-	    fprintf(unit,"Buffer in bytewise int representation: ");
-
-	    for (i = 0; i<loclen; i++)
-		fprintf(unit,"%d ",((char*)buf)[i]);
-
-	    fprintf(unit,"\n");
-#ifdef DEBUG
-	    fprintf(dbg_file,"Buffer in bytewise int representation: ");
-
-	    for (i = 0; i<loclen; i++)
-		fprintf(dbg_file,"%d ",((char*)buf)[i]);
-	    fprintf(dbg_file,"\n");
-#endif /*DEBUG*/
-	}
-    }
-    else
-    {
-	if( loclen >= 2*asize)
-	    fprintf(unit,"Buffer, 1st and last entry: ");
-	else
-	    fprintf(unit,"Buffer entry: ");
-
-#ifdef BUFFERS_INT
-	fprintf(unit,"%d ",((assign_type*)buf)[0]);
-	if(loclen>=2*asize) fprintf(unit,"%d ",((assign_type*)buf)[loclen/asize-1]);
-#endif
-
-#ifdef BUFFERS_FLOAT
-	fprintf(unit,"%f ",((assign_type*)buf)[0]);
-	if(loclen>=2*asize) fprintf(unit,"%f ",((assign_type*)buf)[loclen/asize-1]);
-#endif
-
-	fprintf(unit,"\n");
-
-#ifdef DEBUG
-	if(err_flag)
-	    for(j=0; j<loclen/asize; j+=5)
-	    {
-#ifdef BUFFERS_INT
-		for(i=j; i<min(loclen/asize,j+5); i++)
-		    fprintf(dbg_file,"%d ",((assign_type*)buf)[i]);
-#endif /*BUFFERS_INT*/
-#ifdef BUFFERS_FLOAT
-		for(i=j; i<min(loclen/asize,j+5); i++)
-		    fprintf(dbg_file,"%f ",((assign_type*)buf)[i]);
-#endif /*BUFFERS_FLOAT*/
-		fprintf(dbg_file,"\n");
-	    } /*for*/*/
-#endif /*DEBUG*/
-    }
-    fflush(unit);
-
-#ifdef DEBUG
-    fflush(dbg_file);
-#endif
-
-}
-
-void IMB_err_msg(struct comm_info* c_info, char* text, size_t totsize, 
-                 int j_sample)
-/*
-
-                      
-                      Outputs an brief error diagnostics if occurs
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--text                 (type char*)                      
-                      Accompanying text
-                      
-
--totsize              (type int)                      
-                      Size of the erroneous buffer
-                      
-
--j_sample             (type int)                      
-                      Number of sample the error occured in
-                      
-
-
-*/
-{
-    fprintf(unit,"%d: Error %s,"
-#ifdef WIN_IMB
-	    "size = %I64u,"
-#else
-	    "size = %lu,"
-#endif
-	    "sample #%d\n",
-	    c_info->rank,text,totsize,j_sample);
-}
-
-#ifdef CHECK
-
-
-
-void IMB_chk_diff(char* text, struct comm_info* c_info, void* RECEIVED, 
-                  size_t buf_pos, int Locsize, size_t Totalsize, 
-                  int unit_size, DIRECTION mode, POSITIONING fpos, 
-                  int n_sample, int j_sample, int source, 
-                  double* diff )
-/*
-
-                      
-                      Checks a received buffer against expected ref values
-                      
-
-
-Input variables: 
-
--text                 (type char*)                      
-                      Accompanying text
-                      
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--RECEIVED             (type void*)                      
-                      The buffer to be checked
-                      
-
--buf_pos              (type int)                      
-                      Beginning position (in units -> unit_size)
-                      
-
--Locsize              (type int)                      
-                      Local buffer size
-                      
-
--Totalsize            (type int)                      
-                      Total buffer size (in case of gathered buffers)
-                      
-
--unit_size            (type int)                      
-                      Base unit for positioning
-                      
-
--mode                 (type DIRECTION)                      
-                      Direction of the action that took place
-                      
-
--fpos                 (type POSITIONING)                      
-                      File positioning of the action that took place (if relevant)
-                      
-
--n_sample             (type int)                      
-                      # overall samples
-                      
-
--j_sample             (type int)                      
-                      current sample
-                      
-
--source               (type int)                      
-                      Sending process (if relevant)
-                      
-
-
-Output variables: 
-
--diff                 (type double*)                      
-                      The error against expected values
-                      
-
-
-*/
-{
-#ifdef MPIIO
-    MPI_File	restore;
-    MPI_Status	stat;
-    double	def_tmp;
-    int 	j, j1, j2, ierr, rank, allpos;
-    size_t	pos1, pos2;
-    int*	rankj;
-    size_t*	lenj;
-#endif
-
-    double defloc;
-
-    size_t faultpos, pos;
-
-    int    *all_ranks, Npos;
-    size_t *lengths;
-
-    if( err_flag ) return;
-
-    defloc = 0.;
-    faultpos = CHK_NO_FAULT;
-
-    if (Totalsize == 0) 
-    {
-	*diff = 0.;
-	return;
-    }
-
-#ifdef MPIIO
-
-    MPI_Barrier(c_info->File_comm);
-
-    if( mode == put )
-    {
-	if( c_info -> File_rank == 0 )
-	{
-
-	    IMB_alloc_buf(c_info,"Write check",0,Totalsize);
-
-	    ierr = MPI_File_open(MPI_COMM_SELF, c_info->filename,
-				 c_info->amode, c_info->info, &restore);
-	    IMB_err_hand(1,ierr);
-
-	    if(source == -3 )
-	    {
-		IMB_i_alloc(size_t, lengths,c_info->File_num_procs*n_sample+n_sample-1,"chk_diff 1");
-		IMB_i_alloc(int, all_ranks,c_info->File_num_procs*n_sample+n_sample-1,"chk_diff 2");
-	    }
-	    else
-	    {
-		IMB_i_alloc(size_t, lengths,c_info->File_num_procs,"chk_diff 1a");
-		IMB_i_alloc(int, all_ranks,c_info->File_num_procs,"chk_diff 2a");
-	    }
-
-	    lenj  = lengths;
-	    rankj = all_ranks;
-	    allpos = 0;
-
-	    if( j_sample < 0 ) 
-	    {
-		j1 = 0; 
-		j2 = n_sample-1;
-	    }
-	    else
-	    {
-		j1 = j_sample; 
-		j2 = j_sample;
-	    }
-
-	    ierr = MPI_File_seek(restore,(MPI_Offset)(j1*Totalsize),MPI_SEEK_SET);
-	    MPI_ERRHAND(ierr);
-
-	    for( j=j1; j<=j2 && faultpos==CHK_NO_FAULT /*faultpos<0*/ ; j++ )
-	    {
-		IMB_Assert(Totalsize<=INT_MAX);
-
-		ierr = MPI_File_read(restore,c_info->r_buffer,
-				     (int) Totalsize,c_info->etype,&stat);
-
-		MPI_ERRHAND(ierr);
-		RECEIVED = c_info->r_buffer;
-
-		if( source == -3 )
-		{
-		    IMB_cmp_cat(c_info, RECEIVED, Totalsize, buf_pos, unit_size, 1, 
-				lenj, rankj, &Npos, &faultpos, &def_tmp);
-
-		    lenj  += Npos;
-		    rankj += Npos;
-		    allpos+= Npos;
-		}
-		else
-		{
-		    IMB_cmp_cat(c_info, RECEIVED, Totalsize, buf_pos, unit_size, 0, 
-				lenj, rankj, &Npos, &faultpos, &def_tmp);
-		}
-
-		defloc = max(defloc,def_tmp);
-
-	    } /*for( j=j...*/
-
-	    MPI_File_close(&restore);
-
-
-	    j_sample = j-1;
-
-	    IMB_free_aux();
-
-	    if( /*faultpos >= 0*/ faultpos != CHK_NO_FAULT ) 
-	    {
-
-		IMB_err_msg(c_info,text,Totalsize,j_sample);
-		fprintf(unit,
-			"Error: restored buffer from output file, invalid portion starting at pos."
-#ifdef WIN_IMB
-			"  %I64u\n",
-#else
-			"  %lu\n",
-#endif
-			(j_sample*Totalsize)+faultpos);
-
-		AUX = (void*)(((char*)RECEIVED)+faultpos);
-		IMB_show("Erroneous data:",c_info,AUX,Totalsize-faultpos,Totalsize-faultpos,j_sample,nothing);
-
-	    }
-	    else
-	    {
-		if( source == -3 )
-		{
-		    IMB_chk_distr(c_info, Totalsize, n_sample, lengths, all_ranks, allpos, &def_tmp);
-  
-		    if( def_tmp > 0. )
-		    {
-			IMB_err_msg(c_info,text,Totalsize,j_sample);
-			IMB_show("restored buffer from output file, has permuted data: ",
-				 c_info,RECEIVED,Totalsize,Totalsize,j_sample,nothing);
-		    }
-		}
-
-		defloc = max(defloc,def_tmp);
-
-	    } /*if( faultpos >= 0 ) */
-
-	    IMB_del_r_buf(c_info);
-	    IMB_v_free((void**)&lengths); IMB_v_free ((void**)&all_ranks);
-	} /*if( c_info -> File_rank == 0 )*/
-
-	fflush(unit);
-    }   /*if( mode == put )*/
-
-    if( mode == get )
-    {
-	size_t file_pos;
-	file_pos = j_sample*Totalsize;
-
-	*diff=0.;
-
-	IMB_alloc_aux(Totalsize," chk_diff 5");
-	IMB_init_file_content(AUX, file_pos, file_pos+Totalsize-1);
-
-	IMB_chk_contained(RECEIVED, Locsize, AUX, Totalsize, &pos, &faultpos, &def_tmp,
-			  "Compare received portion with file Content");
-
-	IMB_get_rank_portion(c_info->File_rank,c_info->File_num_procs,Totalsize,asize, 
-			     &pos1, &pos2);
-
-	if( /*faultpos >= 0*/ faultpos != CHK_NO_FAULT) /* the type of faultpos is changed to size_t*/
-	{
-	    err_flag=1;
-	    defloc = 1; 
-	    IMB_err_msg(c_info,text,Totalsize,j_sample);
-	    RECEIVED = (void*)((char*)RECEIVED+faultpos);
-
-	    fprintf(unit,
-		    "File position: "
-#ifdef WIN_IMB
-		    "%I64u\n",
-#else
-		    "%lu\n",
-#endif
-		    file_pos+pos+faultpos);
-
-	    IMB_show( "Read invalid portion: ",c_info, RECEIVED,
-		      Locsize-faultpos, Totalsize, j_sample, fpos);
-
-	    AUX = (void*)((char*)AUX + pos + faultpos);
-
-	    IMB_show( "Expected portion: ",c_info, AUX,
-		      Locsize-pos-faultpos, Locsize-pos-faultpos, j_sample, nothing);
-
-	    MPI_Gather(&pos,1,MPI_UNSIGNED_LONG, c_info->rdispl,1,MPI_INT,0,c_info->File_comm);
-	}
-	else
-	{
-	    if( source == -2 && Locsize > 0 )
-	    {
-		IMB_get_rank_portion(c_info->File_rank,c_info->File_num_procs,Totalsize,asize, 
-				     &pos1, &pos2);
-		if( pos1 != pos ) defloc = 1; 
-	    }
-   
-	    if( source == -3 )
-	    {
-		/* Check permuted buffer */
-
-		MPI_Gather(&pos,1,MPI_INT,c_info->rdispl,1,MPI_INT,0,c_info->File_comm);
-		MPI_Gather(&Locsize,1,MPI_INT,c_info->reccnt,1,MPI_INT,0,c_info->File_comm);
-
-		if( c_info->File_rank == 0 )
-		{
-		    IMB_chk_contiguous(c_info, c_info->rdispl, c_info->reccnt, &defloc );
-		}
-		else defloc=0.;
-
-	    }
-
-	    if( defloc > 0. )
-		IMB_err_msg(c_info,"Wrong portion ordering in read buffer",Totalsize,j_sample);
-	} /*if( faultpos >= 0 )*/
-
-	defloc = max(defloc,def_tmp);
-    } /*if( mode == get )*/
-
-    MPI_Barrier(c_info->File_comm);
-
-#else /*not  MPIIO*/
-
-    if( source >= 0 )
-    {
-	IMB_alloc_aux(Totalsize,"chk_diff 6");
-	IMB_ass_buf(AUX, source, buf_pos, 
-		    (buf_pos+Totalsize>0)? buf_pos+Totalsize-1 : 0, 1);
-
-   
-	if( Totalsize < asize )
-	{
-	    IMB_chk_contained(RECEIVED, Totalsize, AUX, Totalsize, &pos, &faultpos, &defloc,
-			      "Compare received with expected portion");
-
-	    if( /*faultpos>=0*/ faultpos != CHK_NO_FAULT) /* type of faultpos is changed to size_t*/
-	    {
-		faultpos = 0; 
-		defloc = 1.;
-	    }
-	}
-	else
-	    defloc =  IMB_ddiff((assign_type *)AUX, (assign_type *)RECEIVED, Totalsize/asize,&faultpos);
-
-    }
-    else if( source == -1 )
-    {
-	IMB_alloc_aux(Locsize,"chk_diff 7");
-
-	IMB_chk_dadd(AUX,Locsize,buf_pos,0,c_info->num_procs-1);
-
-	defloc = IMB_ddiff((assign_type *)AUX, (assign_type *)RECEIVED, Locsize/asize,&faultpos);
-   
-    }
-    else
-    {
-	if( source == -2 )
-	{
-	    lengths = NULL;
-	    all_ranks = NULL;
-
-	    IMB_cmp_cat(c_info, RECEIVED, Totalsize, buf_pos, unit_size, 0, 
-			lengths, all_ranks, &Npos, &faultpos, &defloc);
-	}
-
-    }
-
-    if( /*faultpos>=0*/ faultpos != CHK_NO_FAULT) /* type of faultpos is changed to size_t*/
-    {
-	void* tmp = (void*)(((char *)RECEIVED)+faultpos);
-
-	IMB_err_msg(c_info,text,Totalsize,j_sample);
-	IMB_show( "Got invalid buffer: ",c_info, tmp, asize, asize, j_sample, -1);
-
-	fprintf(unit,
-#ifdef WIN_IMB
-		    "pos: %I64u\n"
-#else
-		    "pos: %lu\n"
-#endif /*WIN_IMB*/
-		    , faultpos);
-
-	tmp = (void*)(((char *)AUX     )+faultpos);
-
-	IMB_show( "Expected    buffer: ",c_info, tmp, asize, asize, j_sample, -1);
-
-	defloc = 1;
-    }
-    IMB_free_aux();
-#endif /*MPIIO*/
-
-
-    if( defloc > TOL ) 
-	err_flag = 1;
-
-    *diff = max(*diff,defloc);
-
-}
-
-
-void IMB_cmp_cat(struct comm_info *c_info, void* RECEIVED, size_t size, 
-                 size_t bufpos, int unit_size, int perm, 
-                 size_t* lengths, int*ranks, int* Npos, 
-                 size_t *faultpos, double* diff)
-/*
-
-                      
-                      Checks a received buffer which is a concatenation of 
-                      several processes' buffers
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info *)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--RECEIVED             (type void*)                      
-                      The buffer to be checked
-                      
-
--size                 (type int)                      
-                      Size of the buffer
-                      
-
--bufpos               (type int)                      
-                      First position to check (in units -> unit_size)
-                      
-
--unit_size            (type int)                      
-                      Base unit of positioning
-                      
-
--perm                 (type int)                      
-                      Logical flag: 1 if the different rank's portions
-                      are potentially in non natural order (relevant for
-                      shared file accesses)
-                      
-
-
-Output variables: 
-
--lengths              (type int*)                      
-                      An array of lengths (of a number of erroneous portions)
-                      
-
--ranks                (type int*)                      
-                      An array of ranks (the erroneous portions belong to)
-                      
-
--Npos                 (type int*)                      
-                      Numer of erroneous portions found (=size of 'lengths' and 'ranks' arrays)
-                      
-
--faultpos             (type int *)                      
-                      Position of first found fault
-                      
-
--diff                 (type double*)                      
-                      Diff value
-                      
-
-
-*/
-{
-    int    rank, NP; 
-    size_t pos1, pos2, pos, rsize, rem_size;
-
-    int chk_ok;
-    assign_type *a,*r;
-
-    double tmp_diff;
-
-    *diff = 0.;
-    *faultpos = CHK_NO_FAULT;
-
-    if( size == 0) return;
-
-#ifdef MPIIO
-    NP = c_info->File_num_procs;
-#else
-    NP = c_info->num_procs;
-#endif
-
-    rsize = (size+asize-1)/asize*asize;
-
-    IMB_alloc_aux(rsize, "chk_diff 8");
-
-    r = (assign_type*)AUX;
-    a = (assign_type*)RECEIVED;
-
-    chk_ok = 0;
-
-    if(perm)
-    {
-	*Npos = 0;
-	pos = 0;
-
-	/* Check beginning of buffer */
-
-	for( rank=0; rank<NP && !chk_ok; rank++ )
-	{
-	    if( size > 0)
-	    {
-		IMB_get_rank_portion(rank, NP, size, unit_size, &pos1, &pos2);
-
-		rsize = pos2-pos1+1;
-		IMB_ass_buf(AUX, rank, 0, (rsize>0)? rsize-1 : 0, 1);
-	    } else
-	    {
-		rsize = 0;
-		pos2 = pos1 = 0;
-	    }
-
-	    
-  
-	    IMB_chk_contained(RECEIVED,min(asize,rsize),AUX,rsize,
-			      &pos,faultpos,&tmp_diff, NULL);
-
-	    /*if(*faultpos < 0 && pos>=0)*/ 
-	    /* the type of faultpos and pos is changed to size_t */
-	    if( *faultpos == CHK_NO_FAULT)
-	    {
-		if( rsize <= asize )   chk_ok=1;
-		else
-		{
-		    rem_size = rsize-pos;
-		    IMB_chk_contained((void*)(r+pos/asize), rem_size, RECEIVED, rem_size, &pos1,
-				      faultpos, &tmp_diff, "Check of first part of received buffer");
-
-		    /*if( *faultpos < 0 && pos1>=0 )*/
-		    /* the type of faultpos and pos is changed to size_t */
-		    if( *faultpos == CHK_NO_FAULT)
-		    {
-			lengths[*Npos] = rem_size;
-			ranks[*Npos] = rank;
-			pos = rem_size;
-			(*Npos)++;
-			chk_ok=1;
-		    }
-
-		} /*if( rsize <= asize )*/
-	    } /*if( *faultpos == CHK_NO_FAULT)*/
-	} /*for( rank=0...*/
-
-	if( !chk_ok )
-	{
-	    *faultpos = 0;
-	    *diff = 1.;
-	}
-
-	while( (pos < size) && chk_ok )
-	{
-	    chk_ok = 0;
-
-	    for( rank=0; rank<NP && !chk_ok; rank++ )
-	    {
-		IMB_get_rank_portion(rank, NP, size, unit_size, &pos1, &pos2);
-
-		IMB_Assert(pos2>=pos1);
-		rsize = pos2-pos1+1;
-
-		/*if( rsize > 0 )*/  
-		//{
-
-		    rem_size = min(rsize,size-pos);
-
-		    IMB_ass_buf(AUX, rank, 0, (rsize>0)? rsize-1 : 0, 1);
-
-		    IMB_chk_contained(AUX,rem_size,(void*)(a+pos/asize),rem_size,&pos1,
-				      faultpos,&tmp_diff,NULL);
-
-		    /*if( *faultpos < 0 && pos1 >= 0 ) */
-		    /* the type of faultpos and pos is changed to size_t */
-		    if( *faultpos == CHK_NO_FAULT)
-		    { 
-			lengths[*Npos] = rem_size;
-			ranks[*Npos] = rank;
-			pos = pos+rsize; 
-			(*Npos)++;
-			chk_ok = 1;
-		    } 
-		//}   /* end if(rsize>0) */
-	    }   /* end for(rank..) */
-
-	    if( !chk_ok )
-	    {
-		*faultpos = pos;
-		*diff = 1;
-	    }
-
-	} /* end while */
-
-    } /* end if(perm) */
-    else
-    {
-
-	size_t curr=0;
-	void*  tmp;
-
-	for( rank=0; rank<NP ; rank++ )
-	{
-	    IMB_get_rank_portion(rank, NP, size, unit_size, &pos1, &pos2);
-
-	    if( pos2>=pos1)
-		rsize = pos2-pos1+1;
-	    else 
-		rsize = 0;
-
-	    tmp = (void*)(((char*)RECEIVED)+curr);
-
-	    IMB_ass_buf(AUX, rank, bufpos, 
-			(bufpos+rsize>0) ? bufpos+rsize-1 : 0 , 1);
-
-	    IMB_chk_contained(AUX,rsize,tmp,rsize,&pos1,faultpos, &tmp_diff,"");
-	    *diff = max(*diff,tmp_diff);
-
-	    /*if(*faultpos<0 && pos1>= 0 )*/
-	    /* the type of faultpos and pos is changed to size_t */
-	    if( *faultpos == CHK_NO_FAULT)
-		curr+=rsize;
-	    else
-	    { 
-		*faultpos += curr;
-		*diff = 1;
-		break;
-	    }
-	} /*for( rank=0*/
-    } /* else if(!perm) */
-}
-
-
-
-
-void IMB_chk_contiguous(struct comm_info *c_info, int* rdispl, int* sizes, 
-                        double*diff)
-/*
-
-                      
-                      Checks whether arrays of displacements/sizes form a
-                      contiguous buffer
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info *)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--rdispl               (type int*)                      
-                      Array of displacements (one for each process)
-                      
-
--sizes                (type int*)                      
-                      Array of sizes (one for each process)
-                      
-
-
-Output variables: 
-
--diff                 (type double*)                      
-                      0 if contiguous, 1 else
-                      
-
-
-*/
-{
-    int i, j, NP, rank, p;
-
-#ifdef MPIIO
-    NP = c_info->File_num_procs;
-#else
-    NP = c_info->num_procs;
-#endif
-
-    for(i=0; i<NP; i++)
-    {
-	for(j=i; j<NP; j++)
-	    if( rdispl[j] < rdispl[i] )
-	    {
-		p = rdispl[i];
-		rdispl[i] = rdispl[j];
-		rdispl[j]=p;
-		p = sizes[i];
-		sizes[i] = sizes[j];
-		sizes[j]=p;
-	    }
-    }
-
-    p=0;
-    *diff = 0.;
-
-    for(rank = 0; rank<NP; rank++)
-    {
-	if( rdispl[rank] == p || sizes[rank] == 0 )
-	    p = p+sizes[rank];
-	else
-	    *diff = 1.; 
-    }
-
-    if( *diff > TOL )
-    {
-	fprintf(unit,"check of contiguity of received buffer portions failed\n");
-	fprintf(unit,"Got the following portions/displacements:\n");
-
-	for(rank = 0; rank<NP; rank++)
-	{
-	    fprintf(unit,"%d / %d; ",sizes[rank], rdispl[rank]);
-	}
-	fprintf(unit,"\n");
-    }
-
-}
-               
-
-
-
-void IMB_chk_distr(struct comm_info *c_info, size_t size, int n_sample, 
-                   size_t* lengths, int* ranks, int Npos, 
-                   double *diff)
-/*
-
-                      
-                      (Only for MPI-IO shared file pointer accesses)
-                      Checks whether a found set of section lengths/ranks in
-                      a file meets expectations
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info *)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Size of buffer
-                      
-
--n_sample             (type int)                      
-                      Number of samples expected in file
-                      
-
--lengths              (type int*)                      
-                      Array of section lengths found
-                      
-
--ranks                (type int*)                      
-                      Array of ranks belonging to sections
-                      
-
--Npos                 (type int)                      
-                      Number of sections
-                      
-
-
-Output variables: 
-
--diff                 (type double *)                      
-                      0 if set is consistent, 1 else
-                      
-
-
-*/
-{
-    int i, NP, rank;
-    size_t pos1, pos2;
-
-    *diff = 0.;
-
-    if( size == 0) return;
-
-#ifdef MPIIO
-    NP = c_info->File_num_procs;
-#else
-    NP = c_info->num_procs;
-#endif
-
-    for( rank=0; rank<NP; rank++ )
-    {
-	c_info->reccnt[rank]=0;
-    }
-
-    i=0;
-    while (i<Npos)
-    {
-	rank = ranks[i];
-
-	IMB_get_rank_portion(rank, NP, size, asize, &pos1, &pos2);
-
-	if( pos2-pos1+1 == lengths[i] ) 
-	{
-	    c_info->reccnt[rank]++;
-	}
-	else if( i<Npos )
-	{
-	    if( ranks[i+1] == rank && pos2-pos1+1 == (lengths[i]+(lengths[i+1])) )
-	    {
-		c_info->reccnt[rank]++; i++;
-	    }
-	}
-
-	i++;
-    } /*while*/
-
-    for( rank=0; rank<NP; rank++ )
-    {
-	IMB_get_rank_portion(rank, NP, size, asize, &pos1, &pos2);
-	if( pos2>= pos1 && c_info->reccnt[rank] != n_sample ) *diff = 1; 
-    }
-
-    if( *diff > TOL )
-    {
-	fprintf(unit,"check of contiguity of received buffer portions failed\n");
-	fprintf(unit,"Got the following portions/from process:\n");
-
-	for(i=0; i<Npos; i++)
-	{
-	    fprintf(unit,
-#ifdef WIN_IMB
-		    "%I64u / %d; ",
-#else
-		    "%lu / %d; ",
-#endif
-		    lengths[i], ranks[i]);
-	}
-	fprintf(unit,"\n");
-    }
-
-}
-
-
-
-
-void IMB_chk_contained(void* part, size_t p_size, void* whole, 
-                       size_t w_size, size_t* pos, size_t* fpos, 
-                       double* D, char*msg)
-/*
-
-                      
-                      Checks whether a buffer part is contained in a larger buffer
-                      (exploits uniqueness of buffer values, so check is trivial)
-                      
-
-
-Input variables: 
-
--part                 (type void*)                      
-                      Partial buffer
-                      
-
--p_size               (type int)                      
-                      Size of partial buffer
-                      
-
--whole                (type void*)                      
-                      Whole buffer
-                      
-
--w_size               (type int)                      
-                      Size of whole buffer
-                      
-
--msg                  (type char*)                      
-                      Accompanying message
-                      
-
-
-Output variables: 
-
--pos                  (type int*)                      
-                      Position where partial buffer begins in whole buffer
-                      if search was successful
-                      
-
--fpos                 (type int*)                      
-                      Position where first fault occurred when start position was
-                      found, but later an error occurred
-                      
-
--D                    (type double*)                      
-                      0 if check positive, 1 else
-                      
-
-
-*/
-{
-    assign_type *a_part, *a_whole;
-    long pcrc, wcrc;
-    size_t w_len, p_len;
-
-    a_part = (assign_type*) part;
-    a_whole = (assign_type*) whole;
-
-    *fpos = CHK_NO_FAULT;  /* instead of -1*/
-    *D=0.;
-
-    if( /*p_size <= 0*/ p_size == 0) /*!!! the type of p_size is changed to unsigned size_t*/
-	*pos = 0;
-    else if ( p_size > w_size )
-    {
-	*pos = 0; *fpos = 0;
-    }
-    else
-    {
-
-	if( p_size < asize )
-	{
-	    pcrc = IMB_compute_crc ((char*)part, p_size);
-
-	    *pos = 0;
-	    wcrc = pcrc-1;
-
-	    while( *pos <= w_size-p_size && wcrc != pcrc )
-	    {
-		void* h;
-		h = (void*)(((char*)whole)+*pos); 
-		wcrc = IMB_compute_crc ((char*)h, p_size);
-
-		if(wcrc!=pcrc) (*pos)++;
-	    }
-
-	    if( *pos <= w_size-p_size ) *D=0.;
-	    else {
-		*pos=0; 
-		*fpos = 0; 
-		*D=1.;
-	    }
-	} /*if( p_size < asize )*/
-	else
-	{
-	    *pos = 0;
-	    w_len = w_size/asize;
-	    p_len = p_size/asize;
-
-	    while( *pos <= w_len-p_len && A_ABS(a_part[0] - a_whole[*pos]) > TOL )  
-		    (*pos)++;
-
-	    if( *pos <= w_len-p_len )
-	    {
-		*D = IMB_ddiff(a_part, a_whole+*pos, p_len, fpos);
-	    }
-	    else 
-	    {
-		*D=1.; 
-		*pos=0; 
-		*fpos = 0; 
-	    }
-
-	    *pos *= asize;
-	} /*if !( p_size < asize )*/
-    }
-
-    if( *fpos != CHK_NO_FAULT /*>= 0*/ ) *D=1.;
-    
-}
-
-
-
-/********************* CHECKSUM ********************/
-
-/* Most of following CRC-32 stuff is from zmodem source code */
-
-/* I claim no copyright over the contents of this file.  -- Rahul Dhesi */
-
-/*
-Checksum:  951252172      (check or update this with "brik")
-*/
-#define INITCRC 0xFFFFFFFFL
-/*
- * Copyright (C) 1986 Gary S. Brown.  You may use this program, or
- * code or tables extracted from it, as desired without restriction.
- */
-
-/* First, the polynomial itself and its table of feedback terms.  The  */
-/* polynomial is                                                       */
-/* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */
-/* Note that we take it "backwards" and put the highest-order term in  */
-/* the lowest-order bit.  The X^32 term is "implied"; the LSB is the   */
-/* X^31 term, etc.  The X^0 term (usually shown as "+1") results in    */
-/* the MSB being 1.                                                    */
-
-/* Note that the usual hardware shift register implementation, which   */
-/* is what we're using (we're merely optimizing it by doing eight-bit  */
-/* chunks at a time) shifts bits into the lowest-order term.  In our   */
-/* implementation, that means shifting towards the right.  Why do we   */
-/* do it this way?  Because the calculated CRC must be transmitted in  */
-/* order from highest-order term to lowest-order term.  UARTs transmit */
-/* characters in order from LSB to MSB.  By storing the CRC this way,  */
-/* we hand it to the UART in the order low-byte to high-byte; the UART */
-/* sends each low-bit to hight-bit; and the result is transmission bit */
-/* by bit from highest- to lowest-order term without requiring any bit */
-/* shuffling on our part.  Reception works similarly.                  */
-
-/* The feedback terms table consists of 256, 32-bit entries.  Notes:   */
-/*                                                                     */
-/*     The table can be generated at runtime if desired; code to do so */
-/*     is shown later.  It might not be obvious, but the feedback      */
-/*     terms simply represent the results of eight shift/xor opera-    */
-/*     tions for all combinations of data and CRC register values.     */
-/*                                                                     */
-/*     The values must be right-shifted by eight bits by the "updcrc"  */
-/*     logic; the shift must be unsigned (bring in zeroes).  On some   */
-/*     hardware you could probably optimize the shift in assembler by  */
-/*     using byte-swap instructions.                                   */
-
-
-static long crc_32_tab[] = { /* CRC polynomial 0xedb88320 */
-      0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
-      0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
-      0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
-      0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
-      0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
-      0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
-      0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
-      0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
-      0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
-      0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
-      0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
-      0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
-      0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
-      0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
-      0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
-      0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
-      0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
-      0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
-      0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
-      0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
-      0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
-      0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
-      0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
-      0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
-      0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
-      0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
-      0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
-      0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
-      0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
-      0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
-      0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
-      0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
-      0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
-      0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
-      0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
-      0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
-      0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
-      0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
-      0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
-      0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
-      0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
-      0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
-      0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
-      0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
-      0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
-      0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
-      0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
-      0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
-      0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
-      0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
-      0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
-      0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
-      0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
-      0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
-      0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
-      0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
-      0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
-      0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
-      0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
-      0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
-      0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
-      0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
-      0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
-      0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
-};
-
-
-
-long IMB_compute_crc (register char* buf, register size_t size)
-/*
-
-
-
-In/out variables: 
-
--buf                  (type register char*)
--size                 (type register int)
-
-Return value          (type long)
-
-*/
-{
-    long crccode = INITCRC;
-
-    if( /*size <= 0*/ size == 0 )   /*!!! the type of size is modified to unsigned size_t*/
-	crccode = 0;
-    else 
-    {
-	int i;
-	for (i = 0;  i < size;  i ++) 
-	{
-	    crccode = crc_32_tab[(int) ((crccode) ^ (buf[i])) & 0xff] ^
-		(((crccode) >> 8) & 0x00FFFFFFL);
-	}
-    }
-
-    return(crccode);
-}
-
-#endif
diff --git a/src/IMB_comm_info.h b/src/IMB_comm_info.h
deleted file mode 100644
index a0359f74..00000000
--- a/src/IMB_comm_info.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-
- ***************************************************************************/
-
-
-
-
-
-#ifndef _COMM_INFO_H
-#define _COMM_INFO_H 
-
-#include "IMB_declare.h"
-
-
-#ifdef MPIIO
-typedef struct { int Locsize; MPI_Offset Offset; int Totalsize;} SPLITTING;
-#endif
-
-struct comm_info
-{ 
-/* Communication information as for MPI-1/2 parts */
-
-    int 	w_num_procs;		/* number of procs in COMM_WORLD            */
-    int 	w_rank;			/* rank of actual process in COMM_WORLD     */
-
-    int 	NP;			/* #processes participating in benchmarks   */
-    int 	px,py;			/* processes are part of px x py topology   */
-
-    MPI_Comm 	communicator;		/* underlying communicator for benchmark(s) */
-
-    int 	num_procs;		/* number of processes in communicator      */
-    int 	rank;			/* rank of actual process in communicator   */
-    int     root_shift;     /* switch for root change at each iteration */ 
-    int     sync;           /* switch for rank synchronization after each iter */
-
-    MPI_Datatype 	s_data_type;	/* data type of sent data                   */
-    MPI_Datatype 	r_data_type;	/* data type of received data               */
-
-    MPI_Datatype 	red_data_type;	/* data type of reduced data               */
-    MPI_Op 		op_type;	/* operation type                          */
-
-    int 	pair0, pair1;		/* process pair                            */
-    int 	select_tag;		/* 0/1 for tag selection off/on            */
-    int 	select_source;		/* 0/1 for sender selection off/on         */
-
-    void* 		s_buffer;	/* send    buffer                          */
-    assign_type*	s_data;		/* assign_type equivalent of s_buffer      */
-    size_t		s_alloc;	/* #bytes allocated in s_buffer            */
-    void* 		r_buffer;	/* receive buffer                          */
-    assign_type* 	r_data;		/* assign_type equivalent of r_buffer      */
-    size_t   		r_alloc;	/* #bytes allocated in r_buffer            */
-
-/* IMB 3.1 << */
-    float 	max_mem, used_mem;	/* max. allowed / used GBytes for all      */
-					/* message  buffers                        */
-/* >> IMB 3.1  */
-
-    int 	n_lens;			/* # of selected lengths by -msglen option */
-    int* 	msglen;			/* list of  "       "                  "   */
-
-    int		group_mode;		/* Mode of running groups (<0,0,>0)        */
-    int		n_groups;		/* No. of independent groups               */
-    int		group_no;              	/* own group index                         */
-    int*	g_sizes;		/* array of group sizes                    */
-    int*	g_ranks;		/* w_ranks constituting the groups         */
-
-    int* 	sndcnt;			/* send count argument for global ops.     */
-    int*	sdispl;			/* displacement argument for global ops.   */
-    int*	reccnt;			/* recv count argument for global ops.     */
-    int*	rdispl;			/* displacement argument for global ops.   */
-
-/* IMB 3.2.3 << */
-    int		min_msg_log;
-    int		max_msg_log;
-/* >> IMB 3.2.3  */
-
-    MPI_Errhandler	ERR;
- 
-#ifdef MPIIO
-    /*   FILE INFORMATION     */
-    char* 	filename;
-    MPI_Comm 	File_comm;
-    int		File_num_procs;
-    int		all_io_procs;
-    int		File_rank;
-
-    MPI_File	fh;
-
-    MPI_Datatype	etype;
-    Type_Size		e_size;
-    MPI_Datatype	filetype;
-
-    SPLITTING	split;
-    int 	amode;
-    MPI_Info	info;
- 
-    /* View: */
-    MPI_Offset	disp;
-    char*	datarep;
-    MPI_Datatype	view;
-    MPI_Errhandler	ERRF;
-#endif
-
-#if (defined EXT || defined RMA)
-    MPI_Win		WIN;
-    MPI_Info		info;
-    MPI_Errhandler	ERRW;
-#endif
-
-};
-
-#endif
diff --git a/src/IMB_g_info.c b/src/IMB_g_info.c
deleted file mode 100644
index 379216fd..00000000
--- a/src/IMB_g_info.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_g_info.c 
-
- Implemented functions: 
-
- IMB_general_info;
- IMB_make_sys_info;
- IMB_end_msg;
-
- ***************************************************************************/
-
-
-
-
-char* VERSION="2018 Update 1";
-
-#include <stdio.h>
-#include <time.h>
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-extern FILE* unit;
-
-
-
-
-void IMB_general_info()
-/*
-
-                      
-                      Prints to stdout some basic information 
-                      (Version, time, system (see 'IMB_make_sys_info'))
-                      
-
-
-*/
-{
-  /*void IMB_make_sys_info();*/
-  time_t T;
-  
-  time(&T);
-  fprintf(unit,"#------------------------------------------------------------\n");
-
-#ifdef MPI1                 
-  fprintf(unit,"#    Intel (R) MPI Benchmarks %s, MPI-1 part    \n",VERSION);
-#elif defined EXT
-  fprintf(unit,"#    Intel (R) MPI Benchmarks %s, MPI-2 part    \n",VERSION);
-#elif defined MPIIO
-  fprintf(unit,"#    Intel (R) MPI Benchmarks %s, MPI-IO part   \n",VERSION);
-#elif defined NBC
-  fprintf(unit,"#    Intel (R) MPI Benchmarks %s, MPI-NBC part  \n",VERSION);
-#elif defined RMA
-  fprintf(unit,"#    Intel (R) MPI Benchmarks %s, MPI-RMA part  \n",VERSION);
-#endif
-
-
-  fprintf(unit,"#------------------------------------------------------------\n");
-  fprintf(unit,"# Date                  : %s",asctime(localtime(&T)));
-
-  IMB_make_sys_info();
-  fprintf(unit,"\n");
-}
-
-/* IMB 3.1 << */
-/* include WIN case */
-#ifndef WIN_IMB
-#include <sys/utsname.h>
-#else
-#include <Windows.h>
-#define INFO_BUFFER_SIZE 32767
-#endif
-/* >> IMB 3.1  */
-
-
-void IMB_make_sys_info()
-/*
-
-                      
-                      Prints to stdout some basic information about the system
-                      (outcome of the 'uname' command)
-                      
-
-
-*/
-{
-  int dont_care, mpi_subversion, mpi_version;
-/* IMB 3.1 << */
-#ifndef WIN_IMB
-  struct utsname info;
-  uname( &info );
-  dont_care = MPI_Get_version(&mpi_version,&mpi_subversion);
-  
-  fprintf(unit,"# Machine               : %s\n",info.machine);
-  fprintf(unit,"# System                : %s\n",info.sysname);
-  fprintf(unit,"# Release               : %s\n",info.release);
-  fprintf(unit,"# Version               : %s\n",info.version);
-#else
-/* include WIN case */
-  OSVERSIONINFOEX info;
-  TCHAR infoBuf[INFO_BUFFER_SIZE];
-  DWORD bufCharCount = INFO_BUFFER_SIZE;
-  char *substr_ptr;
-
-  dont_care = MPI_Get_version(&mpi_version,&mpi_subversion);
-  
-  info.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
-  GetVersionEx((OSVERSIONINFO *) &info);
-
-  bufCharCount = ExpandEnvironmentStrings("%PROCESSOR_IDENTIFIER%",infoBuf,INFO_BUFFER_SIZE);
-
-/* Replace  "Intel64" by "Intel(R) 64" */
-  substr_ptr = strstr(infoBuf, "Intel64");
-  if( substr_ptr != NULL )
-      fprintf(unit,"# Machine               : Intel(R) 64%s\n", substr_ptr+strlen("Intel64"));
-  else
-  {
-      /* Replace  "EM64T" by "Intel(R) 64" */
-      substr_ptr = strstr(infoBuf, "EM64T");
-      if( substr_ptr != NULL )
-	    fprintf(unit,"# Machine               : Intel(R) 64%s\n", substr_ptr+strlen("EM64T"));
-      else
-	    fprintf(unit,"# Machine               : %s\n",infoBuf);
-  }      
-
-  if (info.dwMajorVersion == 4)
-      switch (info.dwMinorVersion) {
-      case 90 :
-          fprintf(unit,"# System                : Windows Me\n");
-          break;
-      case 10 :
-          fprintf(unit,"# System                : Windows 98\n");
-          break;
-      case 0 :
-          fprintf(unit,"# System                : Windows NT 4.0\n");
-          break;
-      default :
-          break;
-      }
-  else if (info.dwMajorVersion == 5)
-      switch (info.dwMinorVersion) {
-      case 2 :
-          fprintf(unit,"# System                : Windows 2003\n");
-          break;
-      case 1 :
-          fprintf(unit,"# System                : Windows XP\n");
-          break;
-      case 0 :
-          fprintf(unit,"# System                : Windows 2000\n");
-          break;
-      default :
-          break;
-      }
-  else if (info.dwMajorVersion == 6)
-      switch (info.dwMinorVersion) {
-      case 0 :
-          if (info.wProductType == VER_NT_WORKSTATION)
-              fprintf(unit,"# System                : Windows Vista\n");
-          else
-              fprintf(unit,"# System                : Windows Server 2008\n");
-          break;
-      default :
-          break;
-      }
-  
-  fprintf(unit,"# Release               : %-d.%-d.%-d\n",info.dwMajorVersion,
-          info.dwMinorVersion,info.dwBuildNumber);
-  fprintf(unit,"# Version               : %s\n",info.szCSDVersion);
-#endif
-/* >> IMB 3.1  */
-  fprintf(unit,"# MPI Version           : %-d.%-d\n",mpi_version,mpi_subversion);
-  fprintf(unit,"# MPI Thread Environment: ");
-
-#ifdef USE_MPI_INIT_THREAD
-  switch (mpi_thread_environment) 
-  {
-    case MPI_THREAD_SINGLE :
-	fprintf(unit,"MPI_THREAD_SINGLE\n");
-	break;
-
-    case MPI_THREAD_FUNNELED :
-	fprintf(unit,"MPI_THREAD_FUNNELED\n");
-	break;
-
-    case MPI_THREAD_SERIALIZED :
-	fprintf(unit,"MPI_THREAD_SERIALIZED\n");
-	break;
-
-    default :
-	fprintf(unit,"MPI_THREAD_MULTIPLE\n");
-	break;
-  }
-#endif
-
-// IMB 3.2 add on: Version information to stdout
-  if( strcmp(VERSION,"3.2") >0 ) {
-    fprintf(unit,"\n\n# New default behavior from Version 3.2 on:\n\n");
-    fprintf(unit,"\
-# the number of iterations per message size is cut down \n\
-# dynamically when a certain run time (per message size sample) \n\
-# is expected to be exceeded. Time limit is defined by variable \n\
-# \"SECS_PER_SAMPLE\" (=> IMB_settings.h) \n\
-# or through the flag => -time \n\
-  ");
-  }
-}
-
-void IMB_end_msg(struct comm_info* c_info )
-/*
-
-                      
-                      Prints to stdout an eventual end message (currently empty)
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
-
-*/
-{
-
-FILE* u;
-
-if ( c_info ) {
-
-if( c_info->w_rank == 0 ) {
-
-for( u=stdout; u; u=(u==unit ? NULL : unit) )
-{
-fprintf(u,"\n\n# All processes entering MPI_Finalize\n\n");
-}
-
-}
-}
-else
-{
-fprintf(stderr,"\n\n# IMB has MPI_Finalize-d\n\n");
-}
-
-}
diff --git a/src/IMB_init.c b/src/IMB_init.c
deleted file mode 100644
index 19b9cd3e..00000000
--- a/src/IMB_init.c
+++ /dev/null
@@ -1,1885 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_init.c 
-
- Modifications IMB_2.3 => IMB_3.0:
- Better argument checking and error messages
- Include -h flag for help
- 2 new auxiliary functions:
- IMB_chk_arg_int
- IMB_chk_arg_file
- 
-
- Implemented functions: 
-
- IMB_basic_input;
- IMB_chk_arg_int
- IMB_chk_arg_file
- IMB_chk_arg_thread_level
- IMB_get_rank_portion;
- IMB_init_communicator;
- IMB_set_communicator;
- IMB_valid;
- IMB_set_default;
-
- ***************************************************************************/
-
-
-
-#include <ctype.h>
-
-#include "IMB_settings.h"
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-/* IMB 3.1 << */
-#include "IMB_mem_info.h"
-/* >> IMB 3.1  */
-
-#include "IMB_prototypes.h"
-
-#define MAX_INT_LOG (31)
-
-/*
-void print_array(char *name, int *ptr, int size) 
-{
-    int i;
-    printf("%s: { ", name);
-    for (i = 0; i < size; i++) {
-        printf("%d%s", ptr[i], (i == size-1 ? "" : ", "));
-    }
-    printf(" }\n");
-}
-*/
-
-char *bmark_names_from_input_file[100] = { NULL, };
-
-struct Blist_item
-{
-    const char *bname;
-    int   next_index;
-};
-
-
-static struct Blist_item* pool =NULL;
-static int pool_size = 0;
-static int curr_pos  = 0;
-
-
-/**** static functions declarations*****/
-static int IMB_chk_arg_int  (int* val, char ***argv, int *argc, int iarg);
-static int IMB_chk_arg_file (FILE** fd, char ***argv, int *argc, int iarg);
-#ifdef USE_MPI_INIT_THREAD
-static int IMB_chk_arg_thread_level(int* val, char **argv, int argc, int iarg);
-#endif /*USE_MPI_INIT_THREAD*/
-
-static void IMB_init_Blist_item_pool();
-static void IMB_free_Blist_item_pool();
-
-static void IMB_add_to_list_tail(const char*, int*, int*);
-static void IMB_print_list    (int list_head_index);
-static void IMB_remove_invalid_items( int* p_list_head, int* p_list_tail, int* n_cases);
-static void IMB_remove_item_from_list( const char* name, int* p_list_head, int* p_list_tail, int *n_cases);
-
-/********************************************************************/
-static int IMB_chk_arg_int(int* val, char ***argv, int *argc, int iarg)
-{
-    /* Checks command line argument for being nonnegative integer */
-    int ok;
-
-    ok=1;
-    if( iarg < *argc )
-    {
-        int tst=IMB_str_atoi((*argv)[iarg]);
-
-        if( tst>=0 ) 
-        {
-            *val=tst;
-        } else ok=0;
-    }
-    else ok=0;
-
-    return ok;
-}
-
-
-static int IMB_chk_arg_file(FILE** fd, char ***argv, int *argc, int iarg)
-{
-    /* Checks command line argument for being a file */
-    int ok;
-
-    *fd=(FILE*)NULL;
-
-    ok=1;
-    if( iarg < *argc )
-    {
-    FILE* tst=(FILE*) fopen((*argv)[iarg],"r");
-
-    if( tst ) 
-    {
-        *fd=tst;
-    } else ok=0;
-    }
-    else ok=0;
-
-    return ok;
-}
-
-#ifdef WIN_IMB
-#define STRCASECMP(s1,s2) _stricmp((s1),(s2))
-#else /*linux*/
-#define STRCASECMP(s1,s2) strcasecmp((s1),(s2))
-#endif
-#ifdef USE_MPI_INIT_THREAD
-static int IMB_chk_arg_thread_level(int* val, char **argv, int argc, int iarg)
-{
-    /* Checks command line argument for being nonnegative integer */
-    int ok;
-
-    ok=1;
-    if( iarg < argc )
-    {
-    if( !STRCASECMP(argv[iarg], "single"))
-    {
-        *val = MPI_THREAD_SINGLE;
-    } else if( !STRCASECMP(argv[iarg], "funneled"))
-    {
-        *val = MPI_THREAD_FUNNELED;
-    } else if( !STRCASECMP(argv[iarg], "serialized"))
-    {
-        *val = MPI_THREAD_SERIALIZED;
-    } else if( !STRCASECMP(argv[iarg], "multiple"))
-    {
-        *val = MPI_THREAD_MULTIPLE;
-    }else 
-        ok=0;
-    }
-    else 
-    ok=0;
-
-    return ok;
-}
-#endif /*#ifdef USE_MPI_INIT_THREAD*/
-
-
-static IMODE string_to_iter_policy(const char* str)
-{
-    IMODE i = imode_invalid;
-    size_t len = strlen(str);
-
-    if (strncmp(str, "off", min(len, 3)) == 0) {
-        i = imode_off;
-    } else if (strncmp(str, "dynamic", min(len, 6)) == 0) {
-        i = imode_dynamic;
-    } else if (strncmp(str, "multiple_np", min(len, 11)) == 0) {
-        i = imode_multiple_np;
-    } else if (strncmp(str, "auto", min(len, 4)) == 0) {
-        i = imode_auto;
-    }
-
-    return i;
-}
-
-int static IMB_chk_arg_switch (char *val) 
-{
-    int ret = -1;
-
-    if (val != NULL)
-    {
-        if ( 0 == STRCASECMP(val, "enable") ||
-             0 == STRCASECMP(val, "yes")    ||  
-             0 == STRCASECMP(val, "on")     ||  
-             0 == strcmp(val, "1") )                     
-        {
-            ret = 1;
-        }
-        else if ( 0 == STRCASECMP(val, "disable")   ||  
-                  0 == STRCASECMP(val, "no")        ||  
-                  0 == STRCASECMP(val, "off")       ||  
-                  0 == strcmp(val, "0") )                     
-        {
-            ret = 0;        
-        }
-    }
-    return ret;
-}
-
-
-#define N_baseinfo 18
-/* IMB 3.1 << */
-#define N_base_f_info 3  /* for float data */
-/* >> IMB 3.1  */
-
-/* IMB 3.1 << */
-/*
-   new "ITERATIONS" object for repetition count scheduling 
-
-   major changes in interpreting the command line
-   */
-
-int IMB_basic_input(struct comm_info* c_info, struct Bench** P_BList, 
-                    struct iter_schedule* ITERATIONS,
-                    int *argc, char ***argv, int* NP_min)
-/* >> IMB 3.1  */
-/*
-
-
-
-   Input variables: 
-
-   -argc                 (type int *)                      
-   Number of command line arguments
-
-
-   -argv                 (type char ***)                      
-   List of command line arguments
-
-
-
-   Output variables: 
-
-   -NP_min               (type int*)                      
-   Minimum number of processes to run (-npmin command line argument)
-
-
-   -P_BList              (type struct Bench**)                      
-   (For explanation of struct Bench type:
-   describes all aspects of modes of a benchmark;
-   see [1] for more information)
-
-   Address of list of benchmarks to run;
-   list is set up.
-
-
-   -c_info               (type struct comm_info*)                      
-   Collection of all base data for MPI;
-   see [1] for more information
-
-
-
-*/
-{
-    int i,n_cases,n_lens,iarg,iarg_msg;
-    int deflt;
-    int * ALL_INFO;
-    //char** DEFC, **CMT;
-    /* IMB 3.1 << */
-    float ALL_F_INFO[N_base_f_info];
-    /* >> IMB 3.1  */
-    int ok;
-    /* IMB_3.0 */
-    int help_only;
-
-    int Blist_head, Blist_incl_head, Blist_excl_head;
-    int Blist_tail, Blist_incl_tail, Blist_excl_tail;
-    int n_cases_incl, n_cases_excl;
-    enum {
-        CONSTRUCT_BLIST,
-        INCL_BLIST,
-        EXCL_BLIST
-    } blist_ind;
-
-    help_only=0;
-
-    *P_BList     = (struct Bench *)NULL;
-
-    /* run time control as default */
-    ITERATIONS->n_sample=0;
-    ITERATIONS->off_cache=0;
-    ITERATIONS->cache_size=-1;
-    ITERATIONS->s_offs = ITERATIONS->r_offs = 0;
-    ITERATIONS->s_cache_iter = ITERATIONS->r_cache_iter = 1;
-    ITERATIONS->msgspersample=MSGSPERSAMPLE;
-    ITERATIONS->msgs_nonaggr=MSGS_NONAGGR;
-    ITERATIONS->overall_vol=OVERALL_VOL;
-    ITERATIONS->secs=SECS_PER_SAMPLE;
-    ITERATIONS->iter_policy=ITER_POLICY;
-    ITERATIONS->numiters=(int*)NULL;
-
-    MPI_Comm_rank(MPI_COMM_WORLD,&c_info->w_rank);
-    MPI_Comm_size(MPI_COMM_WORLD,&c_info->w_num_procs);
-
-    unit = stdout; 
-
-    if( c_info->w_rank == 0 && strlen(OUTPUT_FILENAME) > 0 )
-        unit = fopen(OUTPUT_FILENAME,"w"); 
-
-    deflt = 0;
-    ok = 0;
-    iarg_msg=-1;
-
-    c_info->group_mode = -1;
-#ifdef MPIIO
-    *NP_min=1;
-#else
-    *NP_min=2;
-#endif
-
-    if( c_info->w_rank == 0 )
-    {
-        /* Interpret command line */
-        n_lens = 0;
-        n_cases   = n_cases_incl = n_cases_excl = 0;
-
-        IMB_init_Blist_item_pool(); 
-        Blist_head = Blist_incl_head = Blist_excl_head =
-            Blist_tail = Blist_incl_tail = Blist_excl_tail = -1;
-
-        if( *argc <= 1 )
-        {
-            /* Take default */
-            deflt = 1;
-        }
-        else
-        {
-
-            blist_ind = CONSTRUCT_BLIST;
-
-            iarg = 1;
-
-            while( iarg <= *argc-1 )
-            {
-
-                if(!strcmp((*argv)[iarg],"-h") || !strcmp((*argv)[iarg],"-help"))
-                {
-
-                    help_only=1;
-                    break;
-
-                } else if(!strcmp((*argv)[iarg],"-npmin"))
-                {
-                    /* IMB_3.0: Better arg checking for following cases */
-                    if( !IMB_chk_arg_int(NP_min,argv,argc,iarg+1) || (*NP_min<=0) )
-                    {
-                        ok=-1;
-                        fprintf(stderr,"Invalid argument after \"npmin\"\n");
-                        break;
-                    }
-
-                    iarg++;
-
-                    blist_ind = CONSTRUCT_BLIST;
-
-                } else if(!strcmp((*argv)[iarg],"-multi"))
-                {
-
-                    int tst;
-                    if( !IMB_chk_arg_int(&tst,argv,argc,iarg+1) )
-                    {
-                        ok=-1;
-                    }
-                    else if( tst==0 || tst==1 )
-                    {
-                        c_info->group_mode=tst;
-                    }
-                    else
-                    {
-                        ok=-1;
-                    }
-
-                    if( ok==-1 ) 
-                    {
-                        fprintf(stderr,"Invalid argument after \"multi\"\n");
-                        break;
-                    }
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-off_cache"))
-                {
-                    int ierr, cls;
-                    float cs;
-                    if( iarg+1>=*argc ) 
-                    {
-                        fprintf(stderr,"Missing argument after \"off_cache\"\n");
-                        ok=-1;
-                        break;
-                    }
-
-                    ierr=sscanf((*argv)[iarg+1],"%f,%d",&cs,&cls);
-                    if( ierr==1 ) 
-                    {
-                        if( cs<0. ) cs=CACHE_SIZE;
-                        cls=CACHE_LINE_SIZE;
-                    }
-                    else if( ierr!=2 )
-                    {
-                        fprintf(stderr,"Invalid off_cache selection\n");
-                        ok = -1;
-                        break;
-                    } 
-
-                    ITERATIONS->cache_size = cs;
-                    ITERATIONS->cache_line_size = cls;
-                    ITERATIONS->off_cache=1;
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-iter")) {
-                    if( iarg+1>=*argc ) {
-                        fprintf(stderr,"Missing argument after \"iter\"\n");
-                        ok=-1;
-                        break;
-                    } else {
-                        int int_counter   = 0;
-                        int param_counter = 0;
-                        const int n_param = 4; /* comma separated parameters*/
-                        char* param       = (*argv)[iarg+1];
-                        char const* token = NULL;
-
-                        for (token = strtok(param, ",");
-                             token && param_counter < n_param;
-                             token = strtok(NULL, ","), ++param_counter)
-                        {
-                            if (isdigit(*token)) {
-                                ++int_counter;
-                                switch (int_counter)
-                                {
-                                case 1:  sscanf(token,"%d", &ITERATIONS->msgspersample); break;
-                                case 2:  sscanf(token,"%d", &ITERATIONS->overall_vol);
-                                         ITERATIONS->overall_vol *= (1024 * 1024);       break;
-                                case 3:  sscanf(token,"%d", &ITERATIONS->msgs_nonaggr);  break;
-                                default: ITERATIONS->iter_policy = imode_invalid;        break;
-                                }
-                            } else {
-                                ITERATIONS->iter_policy = string_to_iter_policy(token);
-                            }
-                        }
-                    }
-
-                    if (ITERATIONS->iter_policy == imode_invalid) {
-                        fprintf(stderr,"Invalid iter selection\n");
-                        ok = -1;
-                        break;
-                    } 
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-iter_policy")) {
-                    int ierr;
-                    char iter_policy[32];
-                    if( iarg+1>=*argc ) 
-                    {
-                        fprintf(stderr,"Missing argument after \"iter_policy\"\n");
-                        ok=-1;
-                        break;
-                    }
-
-                    ierr=sscanf((*argv)[iarg+1],"%31s", iter_policy);
-                    iter_policy[31] = '\0';
-
-                    if (ierr == 1)
-                    {
-                        ITERATIONS->iter_policy = string_to_iter_policy(iter_policy);
-                    }
-
-                    if (ierr != 1 || ok == -1 || ITERATIONS->iter_policy == imode_invalid)
-                    {
-                        fprintf(stderr,"Invalid -iter_policy selection\n");
-                        ok = -1;
-                        break;
-                    } 
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-                } else if(!strcmp((*argv)[iarg],"-time")) {
-                    int ierr; 
-                    float secs;
-
-                    if( iarg+1>=*argc ) 
-                    {
-                        fprintf(stderr,"Missing argument after \"iter\"\n");
-                        ok=-1;
-                        break;
-                    }
-
-                    ierr=sscanf((*argv)[iarg+1],"%f",&secs);
-                    if( ierr!=1 )
-                    {
-                        fprintf(stderr,"Invalid -time selection\n");
-                        ok = -1;
-                        break;
-                    } 
-
-                    ITERATIONS->secs=secs;
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-mem"))
-                {
-
-                    int ierr; 
-                    float GB;
-
-                    if( iarg+1>=*argc ) 
-                    {
-                        fprintf(stderr,"Missing argument after \"-mem\"\n");
-                        ok=-1;
-                        break;
-                    }
-
-                    ierr=sscanf((*argv)[iarg+1],"%f",&GB);
-                    if( ierr!=1 )
-                    {
-                        fprintf(stderr,"Invalid -time selection\n");
-                        ok = -1;
-                        break;
-                    } 
-
-                    c_info->max_mem=GB;
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-map"))
-                {
-                    int ierr;
-                    if( iarg+1>=*argc ) 
-                    {
-                        fprintf(stderr,"Missing argument after \"map\"\n");
-                        ok=-1;
-                        break;
-                    }
-
-                    ierr=sscanf((*argv)[iarg+1],"%d%c%d",&c_info->px,(char*) &i,&c_info->py);
-                    if(ierr<3 || c_info->px*c_info->py < c_info->w_num_procs)
-                    {
-                        fprintf(stderr,"Invalid map selection\n");
-                        ok = -1;
-                        break;
-                    } 
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-msglen"))
-                {
-                    FILE*t;
-                    if( !IMB_chk_arg_file(&t,argv,argc,iarg+1) )
-                    {
-                        ok=-1; 
-                        fprintf(stderr,"Filename after \"msglen\" flag invalid\n");
-                        break;
-                    }
-
-                    iarg_msg=iarg+1;
-                    blist_ind = CONSTRUCT_BLIST;
-
-                    if( t )
-                    {
-                        char inp_line[72];
-
-                        while(fgets(inp_line,72,t))
-                        {
-                            if( inp_line[0] != '#' && strlen(inp_line)>1 )
-                                n_lens++;
-                        }
-                        fclose(t);
-                    }
-
-                    if ( n_lens==0 )
-                    {
-                        fprintf(stderr,"Sizes file %s invalid or doesnt exist\n",(*argv)[iarg_msg]);
-                        ok = -1;
-                    }
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-input"))
-                {
-                    FILE*t;
-                    if( !IMB_chk_arg_file(&t,argv,argc,iarg+1) )
-                    {
-                        ok=-1; 
-                        fprintf(stderr,"Filename after \"input\" flag invalid\n");
-                        break;
-                    }
-
-                    blist_ind = CONSTRUCT_BLIST;
-
-                    if( t )
-                    {
-                        char inp_line[72], nam[32];
-                        char *nam_copy;
-                        while(fgets(inp_line,72,t))
-                        {
-                            if( inp_line[0] != '#' && strlen(inp_line)-1 )
-                            {
-                                sscanf(inp_line,"%32s",nam);
-                                nam_copy = bmark_names_from_input_file[n_cases] = strdup(nam);
-
-                                if (++n_cases >= 100) {
-                                    fprintf(unit,"Too many benchmark cases\n");
-                                    fflush(stderr);
-                                    ok=-1;
-                                    break;
-                                }
-                                IMB_add_to_list_tail( nam_copy, &Blist_head, &Blist_tail);
-                            }
-                        }
-                        fclose(t);
-                    }
-                    else fprintf(unit,"Input file %s doesnt exist\n",(*argv)[iarg+1]);
-
-                    iarg++;
-
-                } else if(!strcmp((*argv)[iarg],"-include"))                
-                {                                                    
-                    if( (iarg+1>=*argc) || ((*argv)[iarg+1][0] == '-')) 
-                    {
-                        fprintf(stderr,"Missing argument after \"include\"\n");
-                        fflush(stderr);
-                        ok=-1;
-                        break;
-                    }
-
-                    blist_ind = INCL_BLIST;
-
-                } else if(!strcmp((*argv)[iarg],"-exclude"))
-                {
-                    if( (iarg+1>=*argc) || ((*argv)[iarg+1][0] == '-')) 
-                    {
-                        fprintf(stderr,"Missing argument after \"exclude\"\n");
-                        fflush(stderr);
-                        ok=-1;
-                        break;
-                    }
-
-                    blist_ind = EXCL_BLIST;
-
-                } 
-                /* IMB 3.2.3 << */
-                else if(!strcmp((*argv)[iarg],"-msglog"))
-                {
-
-                    int ierr, max_log, min_log;
-                    if( iarg+1>=*argc ) 
-                    {
-                        fprintf(stderr,"Missing argument after \"-msglog\"\n");
-                        ok=-1;
-                        break;
-                    }
-
-                    ierr=sscanf((*argv)[iarg+1],"%d:%d",&min_log, &max_log);
-
-                    if(ierr==2)
-                    {
-
-                        if( (min_log>=0) &&
-                                (max_log>0) &&
-                                (min_log<MAX_INT_LOG) &&
-                                (max_log<MAX_INT_LOG) &&
-                                (max_log>min_log))
-
-                        {
-                            c_info->min_msg_log = min_log;
-                            c_info->max_msg_log = max_log;
-                        } else
-                        {
-                            ok= -1;
-                            break;
-                        }
-                    } else if( ierr==1)
-                    {
-                        if( min_log>0)
-                        {
-                            c_info->max_msg_log = min_log;
-                        } else
-                        {
-                            ok= -1;
-                            break;
-                        }
-
-                    } if( ierr==0)
-                    {
-                        ok = -1;
-                    } 
-
-                    if( ok == -1)
-                    {
-                        fprintf(stderr,"Invalid -msglog argument, must be <num1>:<num2>\n");
-                        fprintf(stderr,"where num1 and num2 are positive integer numbers, and num2>num1\n");
-                        break;
-                    }
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-                } 
-#ifdef USE_MPI_INIT_THREAD
-                else if(!strcmp((*argv)[iarg],"-thread_level"))
-                {
-                    int thread_level;
-                    if( !IMB_chk_arg_thread_level(&thread_level, *argv, *argc, iarg+1) )
-                    {
-                        ok=-1; 
-                        fprintf(stderr,"Invalid -thread_level argument, must be single/funneled/serialized/multiple\n");
-                        break;
-                    }
-
-                    mpi_thread_desired = thread_level;
-
-                    blist_ind = CONSTRUCT_BLIST;
-                    iarg++;
-
-                }
-#endif
-                /* >> IMB 3.2.3  */
-#if (defined MPI1 || defined NBC )                
-                else if (!strcmp((*argv)[iarg],"-root_shift"))
-                {
-                    int val = -1; 
-                    
-                    if( iarg+1 < *argc )
-                    {    
-                       val = IMB_chk_arg_switch((*argv)[iarg+1]);
-                    }    
-                    
-                    if (val == -1)
-                    {    
-                        fprintf(stderr,"Invalid -root_shift argument \n");
-                        ok = -1;
-                        break;
-                    }
-                    else
-                    { 
-                        c_info->root_shift = val;
-                    }    
-                    iarg++;
-                }
-                else if (!strcmp((*argv)[iarg],"-sync"))
-                {
-                    int val = -1;
-                    
-                    if( iarg+1 < *argc )
-                    {    
-                       val = IMB_chk_arg_switch((*argv)[iarg+1]);
-                    }    
-                    
-                    if (val == -1)
-                    {    
-                        fprintf(stderr,"Invalid -sync argument \n");
-                        ok = -1;
-                        break;
-                    }
-                    else
-                    { 
-                        c_info->sync = val;
-                    }    
-                    iarg++;
-                }
-#endif                 
-                else if (!strcmp((*argv)[iarg],"-imb_barrier"))
-                {
-                    int val = -1;
-
-                    if( iarg+1 < *argc )
-                    {
-                       val = IMB_chk_arg_switch((*argv)[iarg+1]);
-                    }
-
-                    if (val == -1)
-                    {
-                        fprintf(stderr,"Invalid -imb_barrier argument \n");
-                        ok = -1;
-                        break;
-                    }
-                    else
-                    {
-                        IMB_internal_barrier = val;
-                    }
-                    iarg++;
-                }
-                else
-                {
-                    /*It must be the name of one of benchmark*/
-                    if( blist_ind == CONSTRUCT_BLIST)
-                    {
-                        n_cases++;
-                        IMB_add_to_list_tail((*argv)[iarg], &Blist_head, &Blist_tail);
-                    }
-                    else if( blist_ind == INCL_BLIST)
-                    {
-                        n_cases_incl++;
-                        IMB_add_to_list_tail((*argv)[iarg], &Blist_incl_head, &Blist_incl_tail);
-                    } else if( blist_ind == EXCL_BLIST)
-                    {
-                        n_cases_excl++;
-                        IMB_add_to_list_tail((*argv)[iarg], &Blist_excl_head, &Blist_excl_tail);
-                    }
-                }
-
-                iarg++;
-            } /*while( iarg <= *argc-1 )*/
-        } /* else if( *argc > 1 )*/
-
-        /* IMB_3.0 */
-        if( help_only || ok<0 ) 
-        {
-
-            /* Set flag "not ok" => help mode in main */
-            n_cases=0;
-            IMB_i_alloc(int,ALL_INFO,N_baseinfo,"Basic_Input");
-            ok=-3;
-
-        }
-        else
-        {
-
-            /* remove wrong items*/
-            if( n_cases > 0)
-            {
-                IMB_remove_invalid_items( &Blist_head, &Blist_tail, &n_cases);
-            }
-
-            if( n_cases_excl > 0) 
-            {
-                IMB_remove_invalid_items( &Blist_excl_head, &Blist_excl_tail, &n_cases_excl);
-            }
-
-            if( n_cases_incl > 0)
-            {
-                IMB_remove_invalid_items( &Blist_incl_head, &Blist_incl_tail, &n_cases_incl);
-            }
-
-            if (n_cases==0 && n_cases_excl==0 && n_cases_incl==0) 
-            {
-                deflt = 1;
-            }
-
-            if( deflt)
-            {
-                IMB_construct_blist_default(P_BList);
-            } else
-            {
-
-                if( n_cases==0 )
-                {
-                    char** def_cases, **General_cmt;
-                    int i;
-
-                    n_cases = IMB_get_def_cases(&def_cases, &General_cmt);
-
-                    for( i=0; i<n_cases; i++)
-                        IMB_add_to_list_tail( def_cases[i], &Blist_head, &Blist_tail);
-                }
-
-                /* Add benchmarks specified by option -include*/
-                if( n_cases_incl > 0)
-                {
-                    struct Blist_item* include_tail = &pool[Blist_incl_tail];
-
-                    include_tail->next_index = Blist_head;
-                    Blist_head = Blist_incl_head;
-
-                    n_cases += n_cases_incl;
-                }
-
-                /* Remove benchmarks specified by option -exclude*/
-                if( n_cases_excl > 0) 
-                {
-                    int curr_index = Blist_excl_head;
-                    struct Blist_item*  curr_item;
-
-                    while( curr_index != -1)
-                    {
-                        curr_item  = &pool[curr_index];
-
-                        IMB_remove_item_from_list( curr_item->bname, &Blist_head, &Blist_tail, &n_cases);
-
-                        curr_index = curr_item->next_index;
-                    }
-
-                }
-
-                if( n_cases > 0)
-                {
-                    int i=0;
-                    int curr_index = Blist_head;
-                    struct Blist_item* blist_item;
-
-
-                    *P_BList = (struct Bench*)    IMB_v_alloc((1+n_cases)*sizeof(struct Bench), "Construct_Blist 2");
-
-                    while( curr_index != -1)
-                    {
-                        blist_item = &pool[curr_index];
-
-                        IMB_construct_blist( &(*P_BList)[i], blist_item->bname);
-
-                        curr_index = blist_item->next_index;
-                        i++;
-                    }
-                    (*P_BList)[n_cases].name = NULL;
-
-                } else
-                {
-                    ok = -1;
-                    *P_BList = (struct Bench*)    IMB_v_alloc(sizeof(struct Bench), "Construct_Blist 2");
-                    (*P_BList)[0].name = NULL;
-                }
-
-            }
-
-
-
-            IMB_free_Blist_item_pool();
-
-            if( iarg_msg>=0 )
-            {
-                FILE*t = fopen((*argv)[iarg_msg],"r");
-                c_info->n_lens=n_lens;
-
-                if( t && n_lens>0 )
-                {
-                    char inp_line[72], S[32];
-                    int sz, isz;
-
-                    IMB_i_alloc(int, c_info->msglen,n_lens,"Basic_Input");
-
-                    isz=-1;
-
-                    while(fgets(inp_line,72,t))
-                    {
-                        S[0]='\0';
-                        if( inp_line[0] != '#' && strlen(inp_line)-1 )
-                        {
-                            int ierr;
-                            sz=0;
-
-                            ierr=sscanf(&inp_line[0],"%d%s",&sz,&S[0]);
-                            if( ierr<=0 || ierr==EOF || sz<0 )
-                            {
-                                ierr=-1;
-                            }
-                            else if(ierr==2) 
-                            {
-                                if      (S[0]=='k' ||  S[0]=='K') {sz=sz*1024;}
-                                else if (S[0]=='m' ||  S[0]=='M') {sz=sz*1024*1024;}
-                                else 
-                                { 
-                                    ierr=-1; 
-                                } 
-                            } /*else if(ierr==2) */
-
-                            if( ierr>0 ) 
-                            {
-                                isz++;
-                                c_info->msglen[isz]=sz;
-                            }
-                            else
-                            {
-                                fprintf(stderr,"Invalid line in file %s\n",(*argv)[iarg_msg]);
-                            }
-                        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
-                    } /*while(fgets(inp_line,72,t))*/
-
-                    n_lens=c_info->n_lens=isz+1;
-                    fclose(t);
-
-                    if ( n_lens==0 )
-                    {
-                        fprintf(stderr,"Sizes File %s invalid or doesnt exist\n",(*argv)[iarg_msg]);
-                        ok = -1;
-                    }
-                } /*if( t && n_lens>0 )*/
-            } /*if( iarg_msg>=0 )*/
-
-            IMB_i_alloc(int, ALL_INFO,N_baseinfo+n_cases,"Basic_Input");
-
-            if( !deflt )
-            {
-                i=0;
-                n_cases = 0;
-
-                while( (*P_BList)[i].name )
-                {
-                    int index;
-                    index = IMB_get_bmark_index((*P_BList)[i].name);
-                    //IMB_get_def_index(&index,(*P_BList)[i].name );
-
-                    /* IMB_3.0
-                       if( index >= 0 )
-                       */
-                    /*if( index  != LIST_END )*/
-                    ALL_INFO[N_baseinfo+n_cases++] = index;
-
-                    i++;
-                } /*while( (*P_BList)[i].name )*/
-
-            } /*if( !deflt )*/
-
-            /* IMB_3.0 end "!help_only" */
-        } /* else if !( help_only || ok<0 ) */
-
-
-        /* IMB 3.1 << */
-        ALL_INFO[0]  = *NP_min;
-        ALL_INFO[1]  = c_info->group_mode;
-        ALL_INFO[2]  = deflt;
-        ALL_INFO[3]  = ITERATIONS->cache_line_size;
-        ALL_INFO[4]  = ITERATIONS->msgspersample;
-        ALL_INFO[5]  = ITERATIONS->overall_vol;
-        ALL_INFO[6]  = ITERATIONS->msgs_nonaggr;
-        ALL_INFO[7]  = ITERATIONS->iter_policy;
-        ALL_INFO[8]  = n_cases;
-        ALL_INFO[9] = c_info->n_lens;
-        ALL_INFO[10] = c_info->px;
-        ALL_INFO[11] = c_info->py;
-        ALL_INFO[12] = c_info->min_msg_log;
-        ALL_INFO[13] = c_info->max_msg_log;
-        ALL_INFO[14] = c_info->root_shift;
-        ALL_INFO[15] = c_info->sync;
-        ALL_INFO[16] = ok;
-        ALL_INFO[17] = IMB_internal_barrier;
-
-        ALL_F_INFO[0] = ITERATIONS->cache_size;
-        ALL_F_INFO[1] = ITERATIONS->secs;
-        ALL_F_INFO[2] = c_info->max_mem;
-
-        MPI_Bcast(ALL_F_INFO,N_base_f_info,MPI_FLOAT,0,MPI_COMM_WORLD);
-        /* >> IMB 3.1  */
-        MPI_Bcast(ALL_INFO,N_baseinfo,MPI_INT,0,MPI_COMM_WORLD);
-
-        if( ok<0 ) return ok;
-
-        if (n_cases > 0 && !deflt) {
-            MPI_Bcast(ALL_INFO + N_baseinfo, n_cases, MPI_INT, 0, MPI_COMM_WORLD);
-        }
-
-        if ( n_lens  > 0 ) {
-            MPI_Bcast(c_info->msglen,n_lens,MPI_INT,0,MPI_COMM_WORLD);
-            /* Used for dynamic caclulations on the number iterations */
-            if(ITERATIONS->iter_policy != imode_off && ITERATIONS->iter_policy != imode_invalid) {
-                IMB_i_alloc(int, ITERATIONS->numiters,n_lens,"Basic_Input");
-            }
-        }
-
-        IMB_v_free((void**)&ALL_INFO);
-
-    } else  /* w_rank > 0 */
-        /* Receive input arguments */
-    {
-        int TMP[N_baseinfo];
-
-        /* IMB 3.1 << */
-        MPI_Bcast(ALL_F_INFO,N_base_f_info,MPI_FLOAT,0,MPI_COMM_WORLD);
-        /* >> IMB 3.1  */
-        MPI_Bcast(TMP,N_baseinfo,MPI_INT,0,MPI_COMM_WORLD);
-
-        *NP_min = TMP[0];
-        c_info->group_mode = TMP[1];
-        deflt = TMP[2];
-        /* IMB 3.1 << */
-
-        ITERATIONS->cache_line_size  = TMP[3];
-        ITERATIONS->msgspersample    = TMP[4];
-        ITERATIONS->overall_vol      = TMP[5];
-        ITERATIONS->msgs_nonaggr     = TMP[6];
-        ITERATIONS->iter_policy      = TMP[7];
-        n_cases                      = TMP[8];
-        n_lens                       = TMP[9];
-        c_info->px                   = TMP[10];
-        c_info->py                   = TMP[11];
-        c_info->min_msg_log          = TMP[12];
-        c_info->max_msg_log          = TMP[13];
-        c_info->root_shift          = TMP[14];
-        c_info->sync                 = TMP[15];
-        ok                           = TMP[16];
-        IMB_internal_barrier         = TMP[17];
-
-        ITERATIONS->cache_size       = ALL_F_INFO[0];
-        ITERATIONS->off_cache        = (ITERATIONS->cache_size < 0.) ? 0 : 1;
-        ITERATIONS->secs             = ALL_F_INFO[1];
-        c_info->max_mem              = ALL_F_INFO[2];
-        /* >> IMB 3.1  */
-
-        if( ok<0 ) return ok;
-
-        if( deflt )
-        {
-            IMB_construct_blist_default(P_BList);
-        }
-        else if( n_cases>0 )
-        {
-            char** ALLC;
-
-            IMB_i_alloc(int, ALL_INFO,n_cases,"Basic_Input");
-            MPI_Bcast(ALL_INFO,n_cases,MPI_INT,0,MPI_COMM_WORLD);
-
-            //IMB_get_def_cases(&DEFC,&CMT);
-            IMB_get_all_cases(&ALLC);
-
-            *P_BList = (struct Bench*)    IMB_v_alloc((1+n_cases)*sizeof(struct Bench), "Construct_Blist 1");
-
-            for( i = 0; i<n_cases; i++ )
-                /* IMB_3.0 */
-            {
-                IMB_construct_blist(&(*P_BList)[i], ALLC[ALL_INFO[i]]);
-            }
-
-            (*P_BList)[n_cases].name = NULL;
-
-            IMB_v_free((void**)&ALL_INFO);
-        }
-
-        if( n_lens>0 ) 
-        {
-            c_info->n_lens = n_lens;
-
-            IMB_i_alloc(int, c_info->msglen,n_lens,"Basic_Input");
-            MPI_Bcast(c_info->msglen,n_lens,MPI_INT,0,MPI_COMM_WORLD);
-
-            if (ITERATIONS->iter_policy != imode_off && ITERATIONS->iter_policy != imode_invalid) {
-                IMB_i_alloc(int,ITERATIONS->numiters,n_lens,"Basic_Input");
-            }
-        } /*if( n_lens>0 ) */
-    }
-
-#ifdef DEBUG
-    {
-        int i;
-
-        if( n_lens>0 )
-        {
-            fprintf(dbg_file,"Got msglen:\n");
-
-            for(i=0; i<n_lens; i++) fprintf(stderr,"%d ",c_info->msglen[i]);
-        }
-
-        fprintf(dbg_file,"\n\n");
-        fprintf(dbg_file,"px py = %d %d\n",c_info->px,c_info->py);
-        fprintf(dbg_file,"\n\n");
-    }
-#endif /*DEBUG*/
-
-#ifndef EXT
-    if( do_nonblocking )
-        IMB_cpu_exploit(TARGET_CPU_SECS, 1);
-#endif
-
-    return 0;
-}
-
-
-void IMB_get_rank_portion(int rank, int NP, size_t size, 
-        size_t unit_size, size_t* pos1, size_t* pos2)
-/*
-
-
-   Splits <size> into even contiguous pieces among processes
-
-
-
-   Input variables: 
-
-   -rank                 (type int)                      
-   Process' rank
-
-
-   -NP                   (type int)                      
-   Number of processes
-
-
-   -size                 (type int)                      
-   Portion to split
-
-
-   -unit_size            (type int)                      
-   Base unit for splitting
-
-
-
-   Output variables: 
-
-   -pos1                 (type int*)
-   -pos2                 (type int*)                      
-   Process' portion is from unit pos1 to pos2
-
-
-
-*/
-{
-    size_t ne, baslen;
-    int    mod;
-
-    ne = (size+unit_size-1)/unit_size;
-    baslen = ne/NP;
-    mod    = (int) ne%NP;
-
-    if( rank < mod )
-    {
-        *pos1 = rank*(baslen+1)*unit_size;
-        *pos2 = *pos1-1+(baslen+1)*unit_size;
-    }
-    else
-    {
-        *pos1 = (rank*baslen + mod)*unit_size;
-        *pos2 = *pos1-1 + baslen*unit_size;
-    }
-
-    *pos2 = min(*pos2,size-1);
-
-}
-
-/********************************************************************/
-
-
-int IMB_init_communicator(struct comm_info* c_info, int NP)
-    /*
-
-
-
-       Input variables: 
-
-       -NP                   (type int)                      
-       Number of all started processes
-
-
-
-       In/out variables: 
-
-       -c_info               (type struct comm_info*)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-       Communicator of active processes gets initialized;
-       grouping of processes (in the 'multi' case) in communicators
-
-
-
-       Return value          (type int)                      
-       Non currently used error exit (value is always 0)
-
-
-
-*/
-{
-    int i,snd,cnt,proc,*aux_ptr;
-
-    MPI_Group group, w_group;
-    MPI_Status stat;
-
-    c_info->NP=NP;                         /* NUMBER OF OVERALL PROCESSES */
-    IMB_set_communicator( c_info );     /* GROUP MANAGEMENT               */
-
-    /* INITIALIZATION  WITHIN THE ACTUAL COMMUNICATOR */
-    if( c_info->communicator != MPI_COMM_NULL )
-    {
-        MPI_Comm_size(c_info->communicator,&(c_info ->num_procs));
-        MPI_Comm_rank(c_info->communicator,&(c_info ->rank));
-
-        c_info->pair0 = 0;
-        c_info->pair1 = c_info ->num_procs-1;
-
-        c_info->select_tag = 0;
-        /*c_info->select_source = 0;*/
-    }
-    else
-    {
-        c_info -> rank = -1;
-    }
-
-    if( c_info->communicator == MPI_COMM_WORLD )
-    {
-        c_info->n_groups = 1;
-        c_info->g_sizes[0] = c_info->w_num_procs;
-
-        for(i=0; i<c_info->w_num_procs; i++) c_info->g_ranks[i]=i;
-        IMB_set_errhand(c_info);
-        return 0;
-    }
-
-    /* Collect global group information */
-    // The idea of this code is to collect the information on:
-    // 1) number of groups, stored in c_info->n_groups variable on rank 0
-    // 2) sizes of those groups, stored in c_info->g_sizes[] on ranks 0
-    // 3) rank numbers in MPI_COMM_WORLD numbering of all ranks in groups
-    // Mostly this info is for output usage
-    if( c_info->rank == 0 )
-    {
-        /* group leaders provide group ranks */
-        MPI_Comm_group(MPI_COMM_WORLD,&w_group);
-        MPI_Comm_group(c_info->communicator,&group);
-
-        for (i=0; i<c_info->num_procs; i++) c_info->g_sizes[i] = i;
-
-        /* TRANSLATION OF RANKS */
-        MPI_Group_translate_ranks( group, c_info->num_procs, 
-                c_info->g_sizes,w_group,
-                c_info->g_ranks );
-        //print_array(">> c_info->g_ranks", c_info->g_ranks, c_info->num_procs);
-        snd = c_info->num_procs;
-    }
-    else
-    {
-        *c_info->g_ranks = -1;
-        snd = 1;
-    }
-
-    /* w_rank 0 collects in g_ranks ranks of single groups */
-    if( c_info->w_rank == 0 ) 
-    {
-        if( c_info->rank == 0 )
-        {
-            c_info->n_groups = 1;
-            c_info->g_sizes[0] = c_info->num_procs;
-            aux_ptr = c_info->g_ranks + c_info->g_sizes[0];
-        }
-        else
-        {
-            c_info->n_groups = 0;
-            aux_ptr = c_info->g_ranks;
-        }
-
-        for( proc=1; proc<c_info->w_num_procs; proc++ )
-        {
-            /* Recv group ranks or -1  */
-            cnt = (int)(c_info->g_ranks+c_info->w_num_procs-aux_ptr);
-            /* July 2002 fix V2.2.1 (wrong logistics), next 23 lines */
-
-            if( cnt <= 0 )
-                /* all leaders have sent, recv dummies (-1) from others! */
-            {
-                cnt=1;
-                MPI_Recv(&i,cnt,MPI_INT,proc,1000,MPI_COMM_WORLD,&stat);
-            }
-            else
-            {
-
-                MPI_Recv(aux_ptr,cnt,MPI_INT,proc,1000,MPI_COMM_WORLD,&stat);
-
-                //print_array(">> aux_ptr", aux_ptr, cnt);
-
-                if( *aux_ptr >= 0 ) 
-                {
-                    /* Message was from a group leader  */
-                    c_info->n_groups++;
-                    MPI_Get_count(&stat, MPI_INT, &c_info->g_sizes[c_info->n_groups-1]);
-                    aux_ptr += c_info->g_sizes[c_info->n_groups-1];
-                } 
-
-            }
-            /* end fix V2.2.1 */
-        } /*for( proc=1; proc<c_info->w_num_procs; proc++ )*/
-    }
-    else  /* w_rank != 0 */
-    {
-        MPI_Send(c_info->g_ranks,snd,MPI_INT,0,1000,MPI_COMM_WORLD);
-        // print_array(">> c_info->g_ranks", c_info->g_ranks, snd);
-    }
-    /* End collection of group information */   
-
-    IMB_set_errhand(c_info);
-
-    return 0;
-}
-
-void  IMB_adjust_timings_scale(struct comm_info *c_info, struct Bench *bmark)
-{
-    if (bmark->RUN_MODES[0].type == MultPassiveTransfer)
-    {
-         /* Just sanity check */
-        if (c_info->num_procs > 1) 
-        {    
-            bmark->scale_bw = (double)c_info->num_procs - 1;
-        }    
-    }  
-}    
-/**********************************************************************/
-
-void IMB_set_communicator(struct comm_info *c_info )
-    /*
-
-
-       Performs the actual communicator splitting
-
-
-
-       In/out variables: 
-
-       -c_info               (type struct comm_info *)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-       Application communicator gets initialized
-
-
-
-*/
-{
-    int color,key; 
-    int errcode=0;
-
-    /* insert choice for communicator here;
-NOTE   :  globally more than one communicator is allowed   
-Example: grouping of pairs of processes: 
-0 0 1 1 2 2  .. (if even),  UNDEF 0 0 1 1 2 2  .. (if odd) 
-*/
-
-    if( c_info->communicator != MPI_COMM_NULL &&  
-            c_info->communicator != MPI_COMM_SELF &&
-            c_info->communicator != MPI_COMM_WORLD)
-    {
-        errcode = MPI_Comm_free(&c_info->communicator);
-        IMB_err_hand(1, errcode);
-    }
-
-    if (c_info->px == 1 || c_info->py == 1) {
-        key = c_info->w_rank;
-    } else {
-        int prod = c_info->py * c_info->px;
-        key = (c_info->py * c_info->w_rank) % (prod - 1); 
-        if (key == 0) 
-            key = c_info->w_rank;
-    }
-
-    if(c_info->group_mode >= 0)
-    {
-        color = key / c_info->NP;
-        c_info->group_no = color;
-        if(color >= c_info->w_num_procs/c_info->NP) {
-            color=MPI_UNDEFINED;
-        }
-    }
-    /* Default choice and Group definition.  */
-    else
-    {
-        c_info->group_no = 0;
-        if (key < c_info->NP) 
-            color = 0;
-        else 
-            color = MPI_UNDEFINED;   
-    }
-    MPI_Comm_split(MPI_COMM_WORLD, color, key, &c_info->communicator);
-}
-
-
-int IMB_valid(struct comm_info * c_info, struct Bench* Bmark, int NP)
-    /*
-
-
-       Validates an input Benchmark / NP setting
-
-
-
-       Input variables: 
-
-       -c_info               (type struct comm_info *)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-
-       -Bmark                (type struct Bench*)                      
-       (For explanation of struct Bench type:
-       describes all aspects of modes of a benchmark;
-       see [1] for more information)
-
-       User input benchmark setting
-
-
-       -NP                   (type int)                      
-       Number of active processes
-
-
-
-       Return value          (type int)                      
-       1/0 for valid / invalid input
-
-
-
-*/
-{
-    /* Checks validity of Bmark for NP processes */
-    /* Erroneous cases: */
-    int invalid, skip;
-
-    invalid = 0; 
-    skip = 0;
-
-#ifndef MPIIO
-    if (Bmark->RUN_MODES[0].type == SingleTransfer || 
-        Bmark->RUN_MODES[0].type == SingleElementTransfer)
-    {    
-        invalid = NP <= 1;
-        skip    = NP >  2;
-    }    
-#endif
-    if (Bmark->RUN_MODES[0].type == ParallelTransferMsgRate)
-    {    
-        invalid = NP <= 1;
-    }    
-
-    if ( invalid )
-    {
-        if( c_info->w_rank == 0 )
-        {
-            fprintf(unit,"\n# !! Benchmark %s invalid for %d processes !! \n\n",Bmark->name,NP);
-        }
-
-        return 0;
-    }
-
-    /* Cases to skip: */
-    if ( skip ) return 0;
-
-    return 1;
-}
-
-void IMB_set_default(struct comm_info* c_info)
-    /*
-
-
-       Default initialization of comm_info
-
-
-
-       Output variables: 
-
-       -c_info               (type struct comm_info*)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-
-
-*/
-{
-    c_info->w_num_procs=0;          
-    c_info->w_rank=-1;
-    c_info->NP=0;                   
-    c_info->px=0;                   
-    c_info->py=0;                   
-    c_info->communicator=MPI_COMM_NULL;    
-    c_info->num_procs =0;
-    c_info->rank = -1;
-    c_info->s_data_type =  MPI_DATATYPE_NULL;
-    c_info->r_data_type =  MPI_DATATYPE_NULL;
-    c_info->red_data_type =  MPI_DATATYPE_NULL;
-    c_info->op_type = MPI_OP_NULL;
-    c_info->pair0 = c_info->pair1 = -2;
-    c_info->select_tag = 0;
-    c_info->select_source = 0;
-    c_info->s_buffer = NULL;
-    c_info->s_data = NULL;
-    c_info->s_alloc = 0;
-    c_info->r_buffer = NULL;
-    c_info->r_data = NULL;
-    c_info->r_alloc = 0;
-    /* IMB 3.1 << */
-    c_info->max_mem = MAX_MEM_USAGE;
-    /* >> IMB 3.1  */
-    c_info->n_lens = 0;
-    c_info->msglen = NULL;
-    c_info->group_mode = 0;
-    c_info->n_groups = 0;
-    c_info->group_no = -1;
-    c_info->g_sizes = NULL;
-    c_info->g_ranks = NULL;
-    c_info->reccnt = NULL;
-    c_info->rdispl = NULL;
-    c_info->sync = 1;
-    c_info->root_shift = 0; 
-
-    /* IMB 3.2.3 << */
-    c_info->max_msg_log = MAXMSGLOG;
-    c_info->min_msg_log = MINMSGLOG;
-    /* >> IMB 3.2.3  */
-
-    c_info->ERR=MPI_ERRHANDLER_NULL;
-
-#ifdef MPIIO
-    /*   FILE INFORMATION     */
-
-    c_info->filename=NULL;
-    c_info->File_comm=MPI_COMM_NULL;
-    c_info->File_num_procs=0;
-    c_info->all_io_procs=0;
-    c_info->File_rank=-1;
-
-    c_info->fh=MPI_FILE_NULL;
-    c_info->etype=MPI_DATATYPE_NULL;
-    c_info->e_size=0;
-    c_info->filetype=MPI_DATATYPE_NULL;
-
-    c_info->split.Locsize=0;
-    c_info->split.Offset=(MPI_Offset)0;
-    c_info->split.Totalsize=0;
-
-    c_info->amode=0;
-    c_info->info=MPI_INFO_NULL;
-
-    /* View: */
-    c_info->disp=(MPI_Offset)0;
-    c_info->datarep=NULL;
-    c_info->view=MPI_DATATYPE_NULL;
-    c_info->ERRF=MPI_ERRHANDLER_NULL;
-#endif /*MPIIO*/
-
-#if (defined EXT || defined RMA)
-    c_info->WIN=MPI_WIN_NULL;
-    c_info->info=MPI_INFO_NULL;
-    c_info->ERRW=MPI_ERRHANDLER_NULL;
-#endif /*EXT || RMA*/
-}
-
-static void IMB_init_Blist_item_pool()
-{
-    char** allc;
-    pool_size = IMB_get_all_cases(&allc)  *3;
-    curr_pos  = 0;
-
-    pool = (struct Blist_item*) malloc( sizeof(struct Blist_item)*pool_size );
-    IMB_Assert(pool != NULL);
-
-}
-
-static void IMB_free_Blist_item_pool()
-{
-    free(pool);
-    pool = NULL;
-
-    pool_size = 0;
-    curr_pos  = 0;
-
-}
-
-
-static int IMB_get_Blist_item_index()
-{
-    int   ret;
-    int   i, n;
-
-    i = curr_pos;
-    n = pool_size;
-
-
-    if( i == n )
-    {
-        char** allc;
-        n += IMB_get_all_cases(&allc);
-
-        pool = realloc( pool, sizeof(struct Blist_item)*n );
-        IMB_Assert(pool != NULL);
-        pool_size = n;
-
-    } else
-        IMB_Assert(i<n);
-
-
-    ret = i;
-    i++;
-    curr_pos = i;
-
-    return ret;
-}
-
-static void IMB_add_to_list_tail(const char* Bname, int *list_head_index, int* list_tail_index)
-{
-    int head = *list_head_index;
-    int new_item_index = IMB_get_Blist_item_index();
-    struct Blist_item* blist_item = &pool[new_item_index];
-
-    blist_item->bname       = Bname;
-    blist_item->next_index = -1;
-
-    if( head == -1)
-        /* empty list*/
-    {
-        IMB_Assert(*list_tail_index==-1);
-        *list_head_index = new_item_index;
-    } else
-    {
-        int tail = *list_tail_index;
-        struct Blist_item* blist_tail_item = &pool[tail];
-
-        blist_tail_item->next_index = new_item_index;
-
-    }
-
-    *list_tail_index = new_item_index;
-}
-
-static void IMB_print_list(int list_head_index)
-{
-    int index = list_head_index;
-    struct Blist_item* blist_item;
-
-    while( index != -1)
-    {
-        blist_item = &pool[index];
-        index = blist_item->next_index;
-        printf("%s ", blist_item->bname);
-    }
-
-}
-
-static void IMB_remove_invalid_items( int* p_list_head, int* p_list_tail, int *n_cases)
-{
-    int    curr_item = *p_list_head;
-    int    prev_item = -1;
-    int    iret;
-    struct Blist_item* blist_item;
-
-    while(curr_item != -1)
-    {
-        blist_item = &pool[curr_item];
-
-        iret = IMB_get_bmark_index((char*) blist_item->bname);
-
-        if( iret == LIST_INVALID)
-        {
-            int next_item = blist_item->next_index;
-
-            fprintf(stderr,"Invalid benchmark name %s\n", blist_item->bname);
-
-            (*n_cases)--;
-
-            if( prev_item != -1)
-            {
-                struct Blist_item* prev_blist_item = &pool[prev_item];
-
-                prev_blist_item->next_index = next_item;
-
-                if( next_item == -1) *p_list_tail = prev_item;
-
-                curr_item = next_item;
-
-                IMB_Assert( (*n_cases) > 0);
-
-            } else
-            {
-                curr_item = *p_list_head = next_item;
-
-                if( next_item == -1) 
-                {
-                    *p_list_tail = -1;
-                    IMB_Assert( (*n_cases) == 0);
-                } else
-                    IMB_Assert( (*n_cases) > 0);
-
-            }
-
-        } else
-        {
-            prev_item = curr_item;
-            curr_item = blist_item->next_index;
-        }
-
-    } /* while*/
-
-}
-
-static void IMB_remove_item_from_list( const char* name, int* p_list_head, int* p_list_tail, int *n_cases)
-{
-    int    curr_item = *p_list_head;
-    int    prev_item = -1;
-    int    iret;
-    struct Blist_item* blist_item;
-
-    while(curr_item != -1)
-    {
-        blist_item = &pool[curr_item];
-
-        iret = IMB_strcasecmp(name, blist_item->bname);
-
-        if( iret == 0)
-        {
-            int next_item = blist_item->next_index;
-
-            (*n_cases)--;
-
-            if( prev_item != -1)
-            {
-                struct Blist_item* prev_blist_item = &pool[prev_item];
-
-                prev_blist_item->next_index = next_item;
-
-                if( next_item == -1) *p_list_tail = prev_item;
-
-                curr_item = next_item;
-
-                IMB_Assert( (*n_cases) > 0);
-
-            } else
-            {
-                curr_item = *p_list_head = next_item;
-
-                if( next_item == -1) 
-                {
-                    *p_list_tail = -1;
-                    IMB_Assert( (*n_cases) == 0);
-                } else
-                    IMB_Assert( (*n_cases) > 0);
-
-            }
-
-        } else
-        {
-            prev_item = curr_item;
-            curr_item = blist_item->next_index;
-        }
-
-    } /* while*/
-
-}
-/********************************************************************/
-
-
diff --git a/src/IMB_init_file.c b/src/IMB_init_file.c
deleted file mode 100644
index 79487e24..00000000
--- a/src/IMB_init_file.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-
- File: IMB_init_file.c 
-
- Implemented functions: 
-
- IMB_init_file_content;
- IMB_init_file;
- IMB_free_file;
- IMB_del_file;
- IMB_open_file;
-
- ***************************************************************************/
-
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-
-
-
-void IMB_init_file_content(void* BUF, int pos1, int pos2)
-/*
-
-                      
-                      Initializes contents of a file for READ benchmarks
-                      
-
-
-Input variables: 
-
--pos1                 (type int)
--pos2                 (type int)                      
-                      pos1, pos2: target positions (start/end) in file
-                      
-
-
-In/out variables: 
-
--BUF                  (type void*)                      
-                      Content of buffer to be written to file between these positions
-                      
-
-
-*/
-{
-IMB_ass_buf( BUF, 0, pos1, pos2, 1);
-}
-
-
-
-
-/* << IMB 3.1 */
-int IMB_init_file(struct comm_info* c_info, struct Bench* Bmark, struct iter_schedule* ITERATIONS, int NP)
-/* >> IMB 3.1 */
-/*
-
-
-
-Input variables: 
-
--Bmark                (type struct Bench*)                      
-                      (For explanation of struct Bench type:
-                      describes all aspects of modes of a benchmark;
-                      see [1] for more information)
-                      
-                      Given file i/o benchmark
-
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
-                      
-
--NP                   (type int)                      
-                      Number of active processes
-                      
-
-
-In/out variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-                      MPI_File component is set accordingly
-                      
-
-
-Return value          (type int)                      
-                      Error code (identical with MPI error code if occurs)
-                      
-
-
-*/
-{
-int error = 0;
-int fnlen;
-
-IMB_free_file(c_info);
-
-c_info->fh = MPI_FILE_NULL;
-c_info->etype = MPI_BYTE;
-MPI_Type_size(c_info->etype,&c_info->e_size);
-c_info->filetype = c_info->etype;
-
-IMB_user_set_info(&c_info->info);
-
-c_info-> disp = (MPI_Offset)0;
-
-c_info->datarep = IMB_str("native"); 
-
-if( Bmark->RUN_MODES[0].type == SingleTransfer )
-    c_info->all_io_procs = 1;
-else
-    c_info->all_io_procs = c_info->num_procs;
-
-fnlen = 1+strlen(FILENAME);
-/* July 2002 fix V2.2.1: group_mode >= 0 */
-if( c_info->group_mode >= 0 )
-  fnlen += 4;
-
-if( Bmark->fpointer == private )
-  {
-
-  if( c_info->rank > c_info->all_io_procs-1 || c_info->rank < 0 )
-  {
-  c_info->File_comm = MPI_COMM_NULL;
-  c_info->File_rank = -1;
-  c_info->File_num_procs = 0;
-  }
-  else
-  {
-  c_info->File_comm = MPI_COMM_SELF;
-  c_info->File_rank = 0;
-  c_info->File_num_procs = 1;
-  }
-
-  if( c_info->File_rank >= 0 )
-  {
-
-  fnlen += 4;
-
-  c_info->filename = (char*) IMB_v_alloc(sizeof(char)*fnlen,"Init_File");
-
-/* July 2002 fix V2.2.1: group_mode >= 0 */
-  if( c_info->group_mode >= 0 )
-  sprintf(c_info->filename,"%s_g%d_%d",FILENAME,c_info->group_no,c_info->w_rank);
-  else
-  sprintf(c_info->filename,"%s_%d",FILENAME,c_info->w_rank);
-
-  c_info->amode = MPI_MODE_CREATE | MPI_MODE_RDWR | MPI_MODE_UNIQUE_OPEN;
-  }
-
-  }
-else
-  {
-if( c_info->communicator == MPI_COMM_NULL ) 
-  {
-  c_info->File_comm = MPI_COMM_NULL;
-  c_info->File_rank = -1;
-  c_info->File_num_procs = 0;
-  }
-else
-  {
-
-
-  c_info->File_comm = c_info->communicator;
-
-  c_info->File_rank = c_info->rank;
-  c_info->File_num_procs = c_info->num_procs;
-  }
-
-  c_info->filename = (char*) IMB_v_alloc(sizeof(char)*fnlen,"Init_File");
-/* July 2002 fix V2.2.1: group_mode >= 0 */
-  if( c_info->group_mode >= 0 )
-  sprintf(c_info->filename,"%s_g%d",FILENAME,c_info->group_no);
-  else
-  sprintf(c_info->filename,"%s",FILENAME);
-  
-  c_info->amode = MPI_MODE_CREATE | MPI_MODE_RDWR;
-  }
-
-  if( Bmark->access == no ) return 0;
-
-  IMB_del_file(c_info); // if exists
-
-  if( c_info -> File_rank == 0 )
-   {
-
-   int ierr, size, total, i;
-   MPI_Status stat;
-
-
-/* << IMB 3.1. fixes of size */
-   if( c_info->n_lens>0 )
-   {
-       size=0;
-       for(i=0; i< c_info->n_lens; i++ )
-	   size = max(size,c_info->msglen[i]);
-   }
-   else
-   {
-       size=1<<c_info->max_msg_log;
-   }
-
-   total = max(size,ITERATIONS->overall_vol);
-
-   if( ITERATIONS->overall_vol/size > MSGSPERSAMPLE )
-       total = size*MSGSPERSAMPLE;
-
-/* >> IMB 3.1 */
-
-/* July 2002 fix V2.2.1: calculation of file sizes in "priv" case */
-   if ( Bmark->fpointer == private ) 
-    {
-    int NP= c_info->all_io_procs;
-
-    total = (total+NP-1)/NP;
-
-    if ( size%NP ) total += asize*MSGSPERSAMPLE;
-    }
-/* July 2002 end fix */
-
-   ierr = MPI_File_open(MPI_COMM_SELF, c_info->filename,
-                        c_info->amode, c_info->info, &c_info->fh);
-   IMB_err_hand(1,ierr);
-
-   MPI_File_set_view(c_info->fh,(MPI_Offset)0,
-       c_info->etype,c_info->etype,c_info->datarep,c_info->info);
-   
-/* July 2002 fix V2.2.1: size <-> total */
-   ierr = MPI_File_set_size(c_info->fh, total);
-   IMB_err_hand(1,ierr);
-
-   if( Bmark->access == get )
-   {
-/* Prepare File for input */
-
-
-   int el_size = 1<<20;
-   int pos1, pos2;
-
-   pos1 = 0;
-
-   while( pos1 < total )
-   {
-   pos2 = min(total-1,pos1+el_size-1);
-   size = ((pos2-pos1)/asize+1)*asize;
-
-
-   IMB_alloc_buf(c_info, "Init_File 1 ",size, 0);
-   IMB_init_file_content(c_info->s_buffer, pos1, pos2);
-
-   ierr=MPI_File_write(c_info->fh,c_info->s_buffer,pos2-pos1+1,c_info->etype,&stat);
-   IMB_err_hand(1,ierr);
-
-   pos1 = pos2+1;
-   }
-   
-   IMB_del_s_buf(c_info);
-   }
-
-   ierr= MPI_File_close(&c_info->fh);
-   IMB_err_hand(1,ierr);
-
-   }
-
-return error;
-}
-
-
-
-
-void IMB_free_file(struct comm_info * c_info)
-/*
-
-
-
-In/out variables: 
-
--c_info               (type struct comm_info *)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-                      File related components are free-d and reset to 
-                      NULL initialization
-                      
-
-
-*/
-{
-if( c_info->filename != (char*)NULL ) IMB_v_free((void**)&c_info->filename);
-if( c_info->datarep  != (char*)NULL ) IMB_v_free((void**)&c_info->datarep );
-if( c_info->filename != (char*)NULL )
-if( c_info->view     != MPI_DATATYPE_NULL ) 
-                        MPI_Type_free(&c_info->view);
-if( c_info->info     != MPI_INFO_NULL ) 
-                        MPI_Info_free(&c_info->info);
-if( c_info->fh       != MPI_FILE_NULL )
-                        MPI_File_close(& c_info->fh );
-c_info->filename = (char*)NULL;
-c_info->datarep  = (char*)NULL;
-c_info->view     = MPI_DATATYPE_NULL;
-c_info->info     = MPI_INFO_NULL;
-c_info->fh       = MPI_FILE_NULL;
-}
-
-
-
-
-void IMB_del_file(struct comm_info* c_info)
-/*
-
-
-
-In/out variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-                      File associated to MPI_File component is erased from disk
-                      
-
-
-*/
-{
-if ( c_info->File_comm != MPI_COMM_NULL )
-{
-if( c_info-> fh != MPI_FILE_NULL ) MPI_File_close(&c_info->fh);
-MPI_Barrier(c_info->File_comm);
-
-if ( c_info->filename != (char*)NULL )
-{
-if( c_info->File_rank == 0 )
-  {
-// touch file
-ierr = MPI_File_open(MPI_COMM_SELF, c_info->filename,
-                     c_info->amode, MPI_INFO_NULL, &c_info->fh);
-
-if( c_info-> fh != MPI_FILE_NULL ) MPI_File_close(&c_info->fh);
-
-/* IMB_3.0: simplify file deletion */
-ierr=MPI_File_delete(c_info->filename,MPI_INFO_NULL);
-  }
-}
-MPI_Barrier(c_info->File_comm);
-}
-}
-
-int IMB_open_file(struct comm_info* c_info)
-/*
-
-
-
-In/out variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-                      File associated to MPI_File component is opened, view is set
-                      
-
-
-Return value          (type int)                      
-                      Error code (identical with MPI error code if occurs)
-                      
-
-
-*/
-{
-int ierr;
-ierr = 0;
-if ( c_info->File_comm != MPI_COMM_NULL )
-{
-ierr = MPI_File_open(c_info->File_comm, c_info->filename,
-                     c_info->amode, c_info->info, &c_info->fh);
-MPI_ERRHAND(ierr);
-
-ierr = MPI_File_set_view(c_info->fh, c_info->disp, c_info->etype, 
-                         c_info->filetype, c_info->datarep, c_info->info);
-MPI_ERRHAND(ierr);
-}
-return ierr;
-}
diff --git a/src/IMB_init_transfer.c b/src/IMB_init_transfer.c
deleted file mode 100644
index 5602ba71..00000000
--- a/src/IMB_init_transfer.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-
- File: IMB_init_transfer.c 
-
- Implemented functions: 
-
- IMB_init_transfer;
- IMB_close_transfer;
-
- ***************************************************************************/
-
-
-
-
-
-#include "mpi.h"
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-
-
-
-/* IMB 3.1 << */
-void IMB_init_transfer(struct comm_info* c_info, struct Bench* Bmark, int size, MPI_Aint acc_size)
-/* >> IMB 3.1  */
-/*
-
-                      
-                      For IO  case: file splitting/view is set, file is opened
-                      For EXT case: window is created and synchronized (MPI_Win_fence)
-                      
-
-
-Input variables: 
-
--Bmark                (type struct Bench*)                      
-                      (For explanation of struct Bench type:
-                      describes all aspects of modes of a benchmark;
-                      see [1] for more information)
-                      
-                      Given benchmark
-                      
-
--size                 (type int)                      
-                      (Only IO case): used to determine file view
-                      
-IMB 3.1 <<
--acc_size             (type int)                      
-                      (Only EXT case): accumulate window size
->> IMB 3.1
-
-
-In/out variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-                      Corresponding components (File or Window related) are set
-                      
-
-
-*/
-{
-
-
-#if defined MPIIO
-    #include <limits.h>	
-    int ne, baslen, mod;
-    int ierr;
-    size_t pos1, pos2;
-
-    if( c_info->File_rank < 0 || Bmark->access == no ) return;
-
-    if( size > 0)
-    {
-	IMB_get_rank_portion(c_info->File_rank, c_info->all_io_procs, size, asize,
-			     &pos1, &pos2);
-	baslen =  (pos2 >= pos1) ? pos2-pos1+1 : 0;
-    } else
-    {
-	baslen = 0;
-	pos2 = pos1 = 0;
-    }
-
-    if( c_info->view != MPI_DATATYPE_NULL )
-		    MPI_Type_free(&c_info->view);
-
-    if( Bmark->fpointer == private )
-    {
-
-	c_info->split.Locsize	= baslen;
-	c_info->split.Offset	= 0;
-	c_info->split.Totalsize	= baslen;
-
-	if( Bmark->access == put )
-		IMB_set_buf(c_info, c_info->File_rank, 0, 
-			    (baslen>0)? baslen-1 : 0 ,
-			     1, 0);
-
-	if( Bmark->access == get )
-		IMB_set_buf(c_info, c_info->File_rank, 1, 0, 0,
-			     (baslen>0)? baslen-1 : 0 );
-  
-    }
-
-    if( Bmark->fpointer == indv_block || Bmark->fpointer == shared ||
-	Bmark->fpointer == explicit )
-    {
-	int bllen[3];
-
-	MPI_Aint displ[3];
-	MPI_Datatype types[3];
-
-	bllen[0]=1; displ[0] = 0; types[0] = MPI_LB;
-
-	bllen[1] = baslen;
-	displ[1] = pos1;
-	types[1] = c_info->etype;
-
-	bllen[2] = 1;
-	displ[2] = size;
-	types[2] = MPI_UB;
-
-	if( Bmark->fpointer == indv_block )
-	{
-	    /* July 2002 fix V2.2.1: handle empty view case separately */
-	    if( baslen>0 )
-	    {
-		/* end change */
-		ierr=MPI_Type_struct(3,bllen,displ,types,&c_info->view);
-		IMB_err_hand(1,ierr);
-		ierr=MPI_Type_commit(&c_info->view);
-		IMB_err_hand(1,ierr);
-		c_info->filetype = c_info->view;
-
-		/* July 2002 fix V2.2.1: handle empty case */
-	    }
-	    else c_info->filetype = c_info->etype;
-	    /* end change */
-	}
-
-	if( Bmark->access == put )
-	    IMB_set_buf(c_info, c_info->File_rank, 0, (baslen>0)? baslen-1 : 0, 1, 0 );
-
-	if( Bmark->access == get )
-	    IMB_set_buf(c_info, c_info->File_rank, 1, 0, 0, (baslen>0)? baslen-1 : 0 );
-
-	c_info->split.Locsize = bllen[1];
-	c_info->split.Offset  = pos1;
-	c_info->split.Totalsize = size;
-    }
-
-    ierr = IMB_open_file(c_info);
-
-#elif defined  EXT
-    MPI_Aint sz;
-    int s_size, r_size;
-    int ierr;
-
-    ierr=0;
-
-    if( Bmark->reduction )
-    {
-	MPI_Type_size(c_info->red_data_type,&s_size);
-	r_size=s_size;
-    }
-    else
-    {
-	MPI_Type_size(c_info->s_data_type,&s_size);
-	MPI_Type_size(c_info->r_data_type,&r_size);
-    }
-
-    if( c_info -> rank >= 0 )
-    {
-	IMB_user_set_info(&c_info->info);
-
-	/* IMB 3.1 << */
-	sz = acc_size;
-	/* >> IMB 3.1  */
-
-	if( Bmark->access == put)
-	{
-	    ierr = MPI_Win_create(c_info->r_buffer,sz,r_size,c_info->info,
-				  c_info->communicator, &c_info->WIN);
-	    MPI_ERRHAND(ierr);
-	    ierr = MPI_Win_fence(0, c_info->WIN);
-	    MPI_ERRHAND(ierr);
-	}
-	else if( Bmark->access == get)
-	{
-	    ierr = MPI_Win_create(c_info->s_buffer,sz,s_size,c_info->info,
-				  c_info->communicator, &c_info->WIN);
-	    MPI_ERRHAND(ierr);
-	    ierr = MPI_Win_fence(0, c_info->WIN);
-	    MPI_ERRHAND(ierr);
-	}
-    }
-#elif defined RMA
-    int s_size, r_size;
-    int ierr = 0;
-
-    if(Bmark->reduction)
-    {
-        MPI_Type_size(c_info->red_data_type,&s_size);
-        r_size=s_size;
-    }
-    else
-    {
-        MPI_Type_size(c_info->s_data_type,&s_size);
-        MPI_Type_size(c_info->r_data_type,&r_size);
-    }
-
-    if(c_info->rank >= 0)
-    {
-        IMB_user_set_info(&c_info->info);
-
-        if (Bmark->access == put)
-        {    
-            ierr = MPI_Win_create(c_info->r_buffer, acc_size, r_size, c_info->info,
-                    c_info->communicator, &c_info->WIN);
-        }
-        else if (Bmark->access == get)
-        {
-            ierr = MPI_Win_create(c_info->s_buffer, acc_size, r_size, c_info->info,
-                    c_info->communicator, &c_info->WIN);
-        }    
-        MPI_ERRHAND(ierr);
-    }
-#endif 
-
-    IMB_set_errhand(c_info);
-    err_flag = 0;
-}
-
-
-void IMB_close_transfer (struct comm_info* c_info, struct Bench* Bmark, int size)
-    /*
-
-
-       Closes / frees file / window components
-
-
-
-       Input variables: 
-
-       -Bmark                (type struct Bench*)                      
-       (For explanation of struct Bench type:
-       describes all aspects of modes of a benchmark;
-       see [1] for more information)
-
-       Given benchmark
-
-
-       -size                 (type int)                      
-       (Only IO case): used to determine file view
-
-
-
-       In/out variables: 
-
-       -c_info               (type struct comm_info*)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-       Corresponding components (File or Window related) are freed
-
-
-
-*/
-{
-#ifdef MPIIO
-    if( c_info->view != MPI_DATATYPE_NULL )
-        MPI_Type_free(&c_info->view);
-
-    if( c_info->File_rank >= 0 && Bmark->access != no && c_info->fh!=MPI_FILE_NULL)
-        MPI_File_close(&c_info->fh);
-
-#else /*not MPIIO*/
-#if (defined EXT || defined RMA)
-
-    if( c_info->WIN != MPI_WIN_NULL )
-        MPI_Win_free(&c_info->WIN);
-
-#endif /*EXT || RMA*/
-#endif /*MPIIO*/
-}
-
diff --git a/src/IMB_ones_bidir.c b/src/IMB_ones_bidir.c
deleted file mode 100644
index 42fc5308..00000000
--- a/src/IMB_ones_bidir.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_ones_bidir.c 
-
- Implemented functions: 
-
- IMB_bidir_get;
- IMB_bidir_put;
-
- ***************************************************************************/
-
-
-/* ===================================================================== */
-/* 
-IMB 3.1 changes
-July 2007
-Hans-Joachim Plum, Intel GmbH
-
-- replace "int n_sample" by iteration scheduling object "ITERATIONS"
-  (see => IMB_benchmark.h)
-
-- proceed with offsets in send / recv buffers to eventually provide
-  out-of-cache data
-*/
-/* ===================================================================== */
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-/*************************************************************************/
-
-
-
-void IMB_bidir_get(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-                   MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-2 benchmark kernel
-                      Driver for aggregate / non agg. bidirectional MPI_Get benchmarks
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-  double t1, t2;
-  
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  int dest, source,sender;
-  MPI_Status stat;
-
-  ierr = 0;
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-    {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    } 
-
-  if (c_info->rank == c_info->pair0)
-    {
-      dest = c_info->pair1;
-    }
-  else if (c_info->rank == c_info->pair1)
-    {
-      dest =c_info->pair0 ;
-    }
-  else
-    {
-      dest   = -1;
-    }
-  sender=0;
-
-  if( !RUN_MODE->AGGREGATE )
-     IMB_ones_get(  c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-  if( RUN_MODE->AGGREGATE )
-     IMB_ones_mget( c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-
-}
-
-
-
-
-void IMB_bidir_put(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-                   MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-2 benchmark kernel
-                      Driver for aggregate / non agg. bidirectional MPI_Put benchmarks
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-  double t1, t2;
-  
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  int dest, source,sender;
-  int ierr;
-  MPI_Status stat;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-    {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    } 
-
-  if (c_info->rank == c_info->pair0)
-    {
-      dest = c_info->pair1;
-    }
-  else if (c_info->rank == c_info->pair1)
-    {
-      dest =c_info->pair0 ;
-    }
-  else
-    {
-      dest   = -1;
-    }
-
-  
-  sender=1;
-
-  if( !RUN_MODE->AGGREGATE )
-     IMB_ones_put(  c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-  
-  if( RUN_MODE->AGGREGATE )
-     IMB_ones_mput( c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-
-}
diff --git a/src/IMB_ones_unidir.c b/src/IMB_ones_unidir.c
deleted file mode 100644
index 22dd6740..00000000
--- a/src/IMB_ones_unidir.c
+++ /dev/null
@@ -1,728 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_ones_unidir.c 
-
- Implemented functions: 
-
- IMB_unidir_put;
- IMB_unidir_get;
- IMB_ones_get;
- IMB_ones_mget;
- IMB_ones_put;
- IMB_ones_mput;
-
- ***************************************************************************/
-
-
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-/*************************************************/
-
-
-/* ===================================================================== */
-/* 
-IMB 3.1 changes
-July 2007
-Hans-Joachim Plum, Intel GmbH
-
-- replace "int n_sample" by iteration scheduling object "ITERATIONS"
-  (see => IMB_benchmark.h)
-
-- proceed with offsets in send / recv buffers to eventually provide
-  out-of-cache data
-*/
-/* ===================================================================== */
-
-
-void IMB_unidir_put (struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
-                     MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-2 benchmark kernel
-                      Driver for aggregate / non agg. unidirectional MPI_Put benchmarks
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-  double t1, t2;
-  
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  int dest, source,sender;
-  MPI_Status stat;
-
-  ierr = 0;
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-    {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    } 
-
-  if (c_info->rank == c_info->pair0)
-    {
-      dest = c_info->pair1;
-      sender = 1;
-    }
-  else if (c_info->rank == c_info->pair1)
-    {
-      dest =c_info->pair0 ;
-      sender = 0;
-    } else
-    {
-      dest   = -1;
-      sender = -1;
-    }
- 
-  if( !RUN_MODE->AGGREGATE )
-     IMB_ones_put(  c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-  if( RUN_MODE->AGGREGATE )
-     IMB_ones_mput( c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-
-}
-
-/*************************************************************************/
-
-
-
-void IMB_unidir_get (struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                     MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-2 benchmark kernel
-                      Driver for aggregate / non agg. unidirectional MPI_Get benchmarks
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-  double t1, t2;
-  
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  int dest, source,sender;
-  int ierr;
-  MPI_Status stat;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-    {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    } 
-
-  if (c_info->rank == c_info->pair0)
-    {
-      dest = c_info->pair1;
-      sender = 1;
-    }
-  else if (c_info->rank == c_info->pair1)
-    {
-      dest =c_info->pair0 ;
-      sender = 0;
-    }
-  else
-  {
-      dest   = -1;
-      sender = -1;
-  }
- 
-
-  if( !RUN_MODE->AGGREGATE )
-     IMB_ones_get(  c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-  if( RUN_MODE->AGGREGATE )
-     IMB_ones_mget( c_info,
-                s_num, dest, 
-                r_num, sender,
-                size, ITERATIONS,
-                time);
-  
-}
-
-
-
-
-void IMB_ones_get(struct comm_info* c_info, int s_num, int dest, 
-                  int r_num, int sender, int size, 
-                  struct iter_schedule *ITERATIONS, double* time)
-/*
-
-                      
-                      Non aggregate MPI_Get + MPI_Win_fence
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--s_num                (type int)                      
-                      #buffer entries to put if relevant for calling process 
-                      
-
--dest                 (type int)                      
-                      destination rank
-                      
-
--r_num                (type int)                      
-                      #buffer entries to get if relevant for calling process 
-                      
-
--sender               (type int)                      
-                      logical flag: 1/0 for 'local process puts/gets'
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-                      
-
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
-
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-                      
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-                      
-
-
-*/
-{
-int i, ierr;
-int s_size;
-
-#ifdef CHECK 
-  defect=0;
-#endif
-
-MPI_Type_size(c_info->s_data_type,&s_size);
-
-if( c_info-> rank < 0 )
-*time = 0.;
-else
-{
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-*time = MPI_Wtime();
-
-if( sender ) 
-{
-
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-
-/* "Send ", i.e. synchronize window */
-
-       ierr = MPI_Win_fence(0, c_info->WIN);
-       MPI_ERRHAND(ierr);
-
-	}
-}
-else
-{
-
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-
-/* "Receive" */
-       ierr = MPI_Get((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                      r_num, c_info->r_data_type,
-                      dest, i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                      s_num, c_info->s_data_type, c_info->WIN);
-       ierr = MPI_Win_fence(0, c_info->WIN);
-
-       MPI_ERRHAND(ierr);
-
-       DIAGNOSTICS("MPI_Get: ",c_info,c_info->r_buffer,r_num,r_num,i,0);
-
-       CHK_DIFF("MPI_Get",c_info, (void*)((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs),0,
-                 size, size, asize,
-                 get, 0, ITERATIONS->n_sample, i,
-                 dest, &defect);
-	}
-
-}
-
-*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
-
-}
-}
-
-
-
-
-void IMB_ones_mget(struct comm_info* c_info, int s_num, int dest, 
-                   int r_num, int sender, int size, 
-                   struct iter_schedule* ITERATIONS, double* time)
-/*
-
-                      
-                      Aggregate MPI_Get + MPI_Win_fence
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--s_num                (type int)                      
-                      #buffer entries to put if relevant for calling process 
-                      
-
--dest                 (type int)                      
-                      destination rank
-                      
-
--r_num                (type int)                      
-                      #buffer entries to get if relevant for calling process 
-                      
-
--sender               (type int)                      
-                      logical flag: 1/0 for 'local process puts/gets'
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-                      
-
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
-
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-                      
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-                      
-
-
-*/
-{
-int i, ierr;
-char* recv;
-
-#ifdef CHECK 
-defect=0;
-#endif
-
-if( c_info-> rank < 0 )
-*time = 0.;
-else
-{
-recv = (char*)c_info->r_buffer;
-
-ierr = MPI_Win_fence(0, c_info->WIN);
-MPI_ERRHAND(ierr);
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-*time = MPI_Wtime();
-
-if( !sender )
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-        ierr = MPI_Get((void*)(recv+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 
-                       r_num, c_info->r_data_type,
-                       dest, i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                       s_num, c_info->s_data_type, c_info->WIN);
-	}
-
-ierr = MPI_Win_fence(0, c_info->WIN);
-MPI_ERRHAND(ierr);
-
-*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
-
-#ifdef CHECK
-if(!sender)
-{
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-       CHK_DIFF("MPI_Get",c_info, (void*)((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs),0,
-                 size, size, asize,
-                 get, 0, ITERATIONS->n_sample, i,
-                 dest, &defect);
-        }
-}
-#endif
-
-}
-}
-
-
-
-
-void IMB_ones_put(struct comm_info* c_info, int s_num, int dest, 
-                  int r_num, int sender, int size, 
-                  struct iter_schedule* ITERATIONS,  double* time)
-/*
-
-                      
-                      Non aggregate MPI_Put + MPI_Win_fence
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--s_num                (type int)                      
-                      #buffer entries to put if relevant for calling process 
-                      
-
--dest                 (type int)                      
-                      destination rank
-                      
-
--r_num                (type int)                      
-                      #buffer entries to get if relevant for calling process 
-                      
-
--sender               (type int)                      
-                      logical flag: 1/0 for 'local process puts/gets'
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-                      
-
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
-
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-                      
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-                      
-
-
-*/
-{
-int i, ierr, r_size;
-char* recv;
-
-#ifdef CHECK 
-defect=0;
-#endif
-
-MPI_Type_size(c_info->r_data_type,&r_size);
-
-recv = (char*)c_info->r_buffer;
-
-if( c_info-> rank < 0 )
-*time = 0.;
-else
-{
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-*time = MPI_Wtime();
-
-if( sender ) 
-{
-
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-
-/* Send */
-
-       ierr = MPI_Put((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                      s_num, c_info->s_data_type,
-                      dest, i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                      r_num, c_info->r_data_type, c_info->WIN);
-
-       ierr = MPI_Win_fence(0, c_info->WIN);
-       MPI_ERRHAND(ierr);
-
-        }
-}
-else
-{
-
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-
-/* "Receive", i.e. synchronize the window */
-       ierr = MPI_Win_fence(0, c_info->WIN);
-       MPI_ERRHAND(ierr);
-
-       DIAGNOSTICS("MPI_Put: ",c_info,c_info->r_buffer,r_num,r_num,i,0);
-
-       CHK_DIFF("MPI_Put",c_info, (void*)(recv+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 0,
-                 size, size, asize,
-                 get, 0, ITERATIONS->n_sample, i,
-                 dest, &defect);
-
-        }
-}
-
-*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
-
-}
-}
-
-
-
-
-void IMB_ones_mput(struct comm_info* c_info, int s_num, int dest, 
-                   int r_num, int sender, int size, 
-                   struct iter_schedule* ITERATIONS, double* time)
-/*
-
-                      
-                      Aggregate MPI_Put + MPI_Win_fence
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--s_num                (type int)                      
-                      #buffer entries to put if relevant for calling process 
-                      
-
--dest                 (type int)                      
-                      destination rank
-                      
-
--r_num                (type int)                      
-                      #buffer entries to get if relevant for calling process 
-                      
-
--sender               (type int)                      
-                      logical flag: 1/0 for 'local process puts/gets'
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-                      
-
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
-
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-                      
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-                      
-
-
-*/
-{
-int i, ierr;
-char* send, *recv;
-
-#ifdef CHECK 
-defect=0;
-#endif
-
-if( c_info-> rank < 0 )
-*time = 0.;
-else
-{
-send = (char*)c_info->s_buffer;
-recv = (char*)c_info->r_buffer;
-
-ierr = MPI_Win_fence(0, c_info->WIN);
-MPI_ERRHAND(ierr);
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-*time = MPI_Wtime();
-
-if( sender )
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-          ierr = MPI_Put((void*)(send+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs),
-                         s_num, c_info->s_data_type,
-                         dest, i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                         r_num, c_info->r_data_type, c_info->WIN);
-          MPI_ERRHAND(ierr);
-	}
-
-ierr = MPI_Win_fence(0, c_info->WIN);
-MPI_ERRHAND(ierr);
-
-*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
-
-if(!sender)
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-       CHK_DIFF("MPI_Put",c_info, (void*)(recv+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 0,
-                 size, size, asize,
-                 get, 0, ITERATIONS->n_sample, i,
-                 dest, &defect);
-}
-
-}
-}
diff --git a/src/IMB_output.c b/src/IMB_output.c
deleted file mode 100644
index c77f5391..00000000
--- a/src/IMB_output.c
+++ /dev/null
@@ -1,1486 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_output.c 
-
- Implemented functions: 
-
- IMB_output;
- IMB_display_times;
- IMB_calculate_times;
- IMB_show_selections;
- IMB_show_procids;
- IMB_print_array;
- IMB_print_int_row;
- IMB_print_info;
- IMB_print_header;
- IMB_edit_format;
- IMB_make_line;
-
-New in IMB_3.0:
- IMB_help;
-
- ***************************************************************************/
-
-#include <string.h>
-#include <float.h> // DBL_MAX
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-enum output_format
-{
-    /* print msg size, number of iterations, time and bandwidth */
-    OUT_TIME_AND_BW,
-
-    /* print msg size, number of iterations, bandwidth and msg rate */
-    OUT_BW_AND_MSG_RATE,
-
-    /* print msg size, number of iterations, 
-     * min, max and avrg times (among all ranks) and bandwidth */
-    OUT_TIME_RANGE_AND_BW,     
-    
-    /* print msg size, number of iterations 
-     * min, max and avrg times (among all ranks) */
-    OUT_TIME_RANGE,         
-    
-    /* print pure communication time, total time, computation time and
-       the overlap of computation and communication (in %-s) */
-    OUT_OVERLAP,  
-
-    /* It is used for operations where msg size is not relevant 
-     * (for instance Barrier, Ibarrier). The format may differ for
-     * different benchmarks, and msg size is not printed. */
-    OUT_SYNC               
-};
-
-
-/*****************************************************************/
-
-
-
-/* IMB 3.1 << */
-/*
-Introduce new ITERATIONS object
-*/
-void IMB_output(struct comm_info* c_info, struct Bench* Bmark, MODES BMODE, 
-                int header, int size, struct iter_schedule* ITERATIONS,
-                double *time)
-/* >> IMB 3.1  */
-/*
-
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--Bmark                (type struct Bench*)                      
-                      (For explanation of struct Bench type:
-                      describes all aspects of modes of a benchmark;
-                      see [1] for more information)
-                      
-                      The actual benchmark
-                      
-
--BMODE                (type MODES)                      
-                      The actual benchmark mode (if relevant; only MPI-2 case, see [1])
-                      
-
--header               (type int)                      
-                      1/0 for do/don't print table headers
-                      
-
--size                 (type int)                      
-                      Benchmark message size
-                      
-
--ITERATIONS           (type struct iter_schedule)                      
-                      Benchmark repetition descr. object
-                      
-
--time                 (type double *)                      
-                      Benchmark timing outcome
-                      3 numbers (min/max/average)
-                      
-
-
-*/
-{
-    double scaled_time[MAX_TIME_ID];
-
-    int i,i_gr;
-    int li_len;
-    int out_format;
-
-    const int DO_OUT    = (c_info->w_rank  == 0)   ? 1 : 0;
-    const int GROUP_OUT = (c_info->group_mode > 0) ? 1 : 0;
-
-    ierr = 0;
-
-    if (DO_OUT)
-    {
-        /* Fix IMB_1.0.1: NULL all_times before allocation */
-        IMB_v_free((void**)&all_times);
-
-        all_times = (double*)IMB_v_alloc(c_info->w_num_procs * Bmark->Ntimes * sizeof(double), "Output 1");
-#ifdef CHECK
-      if(!all_defect)
-      {
-          all_defect = (double*)IMB_v_alloc(c_info->w_num_procs * sizeof(double), "Output 1");
-          for(i=0; i<c_info->w_num_procs; i++) all_defect[i]=0.;
-      }
-#endif   
-    } /*if (DO_OUT)*/
-
-    /* Scale the timings */
-    for(i=0; i < Bmark->Ntimes; i++) 
-    {
-        scaled_time[i] = time[i] * SCALE * Bmark->scale_time;
-    }
-
-    /* collect all times  */
-    ierr=MPI_Gather(scaled_time,Bmark->Ntimes,MPI_DOUBLE,all_times,Bmark->Ntimes,MPI_DOUBLE,0,MPI_COMM_WORLD);
-    MPI_ERRHAND(ierr);
-
-#ifdef CHECK      
-    /* collect all defects */     
-    ierr=MPI_Gather(&defect,1,MPI_DOUBLE,all_defect,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
-    MPI_ERRHAND(ierr);
-#endif
-
-    if( DO_OUT ) 
-    {
-        BTYPES type= Bmark->RUN_MODES[0].type;
-        const int n_groups = GROUP_OUT ? c_info->n_groups : 1;
-
-        if ( Bmark->RUN_MODES[0].NONBLOCKING && type != Sync) 
-        {
-            out_format = OUT_OVERLAP;
-        } 
-        else if ( (type == SingleTransfer && c_info->group_mode != 0) || 
-                   type == MultPassiveTransfer || 
-                   (type == SingleElementTransfer && c_info->group_mode != 0) ) 
-        {
-            out_format = OUT_TIME_AND_BW;
-        } 
-        else if ( type == ParallelTransfer || type == SingleTransfer || type == SingleElementTransfer) 
-        {
-            out_format = OUT_TIME_RANGE_AND_BW;
-        } 
-        else if ( type == ParallelTransferMsgRate ) 
-        {
-            out_format = OUT_BW_AND_MSG_RATE;
-        }
-        else if (type == Collective ) 
-        {
-#ifdef MPIIO
-            out_format = OUT_TIME_RANGE_AND_BW;
-#else
-            out_format = OUT_TIME_RANGE;
-#endif
-        } 
-        else 
-        {
-            out_format = OUT_SYNC;
-        }
-
-        if (header)
-        {
-            IMB_print_header (out_format, Bmark, c_info, BMODE);
-        } 
-
-        if( GROUP_OUT )
-        {
-            fprintf(unit,"\n");
-        }
-
-        for(i_gr = 0; i_gr < n_groups; i_gr++)
-        {
-            IMB_display_times(Bmark, all_times, c_info, i_gr, ITERATIONS->n_sample, size, out_format);
-        } 
-    } /*if( DO_OUT )*/
-}
-
-
-/*****************************************************************/
-void IMB_display_times(struct Bench* Bmark, double* tlist, struct comm_info* c_info, 
-        int group, int n_sample, int size, int out_format)
-/*
-
-
-
-   Input variables: 
-
-   -Bmark                (type struct Bench*)                      
-   (For explanation of struct Bench type:
-   describes all aspects of modes of a benchmark;
-   see [1] for more information)
-
-   The actual benchmark
-
-
-   -tlist                (type double*)                      
-   Benchmark timing outcome
-   3 numbers (min/max/average)
-
-
-   -c_info               (type struct comm_info*)                      
-   Collection of all base data for MPI;
-   see [1] for more information
-
-
-   -group                (type int)                      
-   Index of group to be displayed (multi-case only)
-
-
-   -n_sample             (type int)                      
-   Benchmark repetition number
-
-
-   -size                 (type int)                      
-   Benchmark message size
-
-
-   -out_format            (type int)                      
-   Code for table formatting details
-
-
-
-*/
-{
-    int i, offset = 0, peers;
-    static double MEGA = 1.0/1e6;
-
-    double throughput = 0.;
-    double overlap    = 0.;
-    double t_pure     = 0.;
-    double t_ovrlp    = 0.;
-    double t_comp     = 0.; 
-    double msgrate = 0;
-
-    Timing timing[MAX_TIME_ID]; // min, max and avg
-#ifdef CHECK
-    double defect = 0.;
-#endif
-    memset(&timing, 0, MAX_TIME_ID * sizeof(timing[MIN]));
-
-
-    if (c_info->g_sizes[group] <= 0) {
-        return;
-    }
-
-#ifdef CHECK
-    IMB_calculate_times(Bmark->Ntimes, c_info, group, tlist, timing, &defect);
-#else
-    IMB_calculate_times(Bmark->Ntimes, c_info, group, tlist, timing);
-#endif
-
-#ifdef NBC
-    if ( ! strstr(Bmark->name, "_pure")) 
-    {
-        const size_t rank_index = timing[MAX].offset[OVRLP];
-        t_pure  = tlist[rank_index + PURE];
-        t_ovrlp = tlist[rank_index + OVRLP];
-        t_comp  = tlist[rank_index + COMP];
-        overlap = 100. * max(0., min(1., (t_pure + t_comp - t_ovrlp) / max(t_pure, t_comp)));
-    }
-
-#elif defined RMA
-    /* RMA benchmarks which test truly passive synchronisation presence */
-    if (Bmark->RUN_MODES[0].NONBLOCKING) 
-    {
-        /* Time when the target was inside MPI stack */
-        t_pure  = timing[MAX].times[PURE]; 
-
-        /* Time when the target was calculating something outside the MPI stack
-         * for a while and then entered the MPI stack */
-        t_ovrlp = timing[MAX].times[OVRLP];
-    }
-
-#else // NBC || RMA
-    if (Bmark->RUN_MODES[0].NONBLOCKING) 
-    {
-        t_pure  = timing[MAX].times[PURE];
-        t_ovrlp = timing[MAX].times[OVRLP];
-        t_comp  = tCPU;
-        overlap = 100.* max(0, min(1, (t_pure + t_comp - t_ovrlp) / min(t_pure, t_comp)));
-    }
-#endif // NBC || RMA
-
-    if (timing[MAX].times[PURE] > 0.) 
-    {
-        if (Bmark->RUN_MODES[0].type != ParallelTransferMsgRate)
-            throughput = (Bmark->scale_bw * SCALE * MEGA) * size / timing[MAX].times[PURE];
-#ifndef MPIIO
-        else
-        {
-            peers = c_info->num_procs / 2;
-            msgrate = (Bmark->scale_bw * SCALE * MAX_WIN_SIZE * peers) / timing[MAX].times[PURE];
-            throughput = MEGA * msgrate * size;
-        }
-#endif
-    }
-    if (c_info->group_mode > 0) 
-    {
-        IMB_edit_format(1, 0);
-        sprintf(aux_string, format, group);
-        offset=strlen(aux_string);
-    }
-
-    if (Bmark->sample_failure) 
-    {
-        IMB_edit_format(1, 0);
-        sprintf(aux_string + offset, format, size);
-        offset = strlen(aux_string);
-
-        switch (Bmark->sample_failure)
-        {
-          case SAMPLE_FAILED_MEMORY:
-            sprintf(aux_string + offset,
-                    " out-of-mem.; needed X=%8.3f GB; use flag \"-mem X\" or MAX_MEM_USAGE>=X (IMB_mem_info.h)",
-                    (1000. * c_info->used_mem + 1.) / 1000.);
-            break;
-
-          case SAMPLE_FAILED_INT_OVERFLOW:
-            sprintf(aux_string + offset," int-overflow.; The production rank*size caused int overflow for given sample");
-            break;
-          case SAMPLE_FAILED_TIME_OUT:
-            aux_string[offset] = '\0';
-            fprintf(unit, "%s%s", aux_string, " time-out.; Time limit (secs_per_sample * msg_sizes_list_len) is over; use \"-time X\" or SECS_PER_SAMPLE=X (IMB_settings.h) to increase time limit.");
-            aux_string[0] = '\0';
-            break;
-        } /*switch*/
-    } 
-    else 
-    {
-        switch (out_format)
-        {
-        case OUT_TIME_AND_BW:
-            IMB_edit_format(2, 2);
-            sprintf(aux_string + offset, format, size, n_sample, timing[MAX].times[PURE], throughput);
-            break;
-        case OUT_BW_AND_MSG_RATE:
-            IMB_edit_format(2, 1);
-            offset += sprintf(aux_string + offset, format, size, n_sample, throughput);
-            sprintf(&(format[0]),"%%%d.0f",ow_format);
-            sprintf(aux_string + offset, format, msgrate);
-            break;
-        case OUT_TIME_RANGE_AND_BW:    
-            IMB_edit_format(2, 4);
-            sprintf(aux_string + offset, format, size, n_sample, timing[MIN].times[PURE], timing[MAX].times[PURE], timing[AVG].times[PURE], throughput);
-            break;
-        case OUT_TIME_RANGE:    
-            IMB_edit_format(2, 3);
-            sprintf(aux_string + offset, format, size, n_sample, timing[MIN].times[PURE], timing[MAX].times[PURE], timing[AVG].times[PURE]);
-            break;
-        case OUT_SYNC:    
-#ifdef NBC
-            if (Bmark->RUN_MODES[0].NONBLOCKING && !strstr(Bmark->name, "_pure")) {
-                IMB_edit_format(1, 4);
-                sprintf(aux_string + offset, format, n_sample, t_ovrlp, t_pure, t_comp, overlap);
-            } else
-#endif // NBC
-            {
-                IMB_edit_format(1, 3);
-                sprintf(aux_string + offset, format, n_sample, timing[MIN].times[PURE], timing[MAX].times[PURE], timing[AVG].times[PURE]);
-            }
-            break;
-        case OUT_OVERLAP:
-#ifdef RMA
-            IMB_edit_format(2, 2);
-            sprintf(aux_string + offset, format, size, n_sample, t_pure, t_ovrlp);
-#else            
-            IMB_edit_format(2, 4);
-            sprintf(aux_string + offset, format, size, n_sample, t_ovrlp, t_pure, t_comp, overlap);
-#endif            
-            break; 
-        }
-
-#ifdef CHECK 
-        if (out_format != OUT_SYNC  && strcmp(Bmark->name,"Window"))
-        {
-            IMB_edit_format(0, 1);
-            offset=strlen(aux_string);
-            sprintf(aux_string + offset, format, defect);
-
-            if(defect > TOL) {
-                Bmark->success = 0;
-            }
-        }
-#endif
-
-    } /*if( Bmark->sample_failure )*/
-
-    fprintf(unit, "%s\n", aux_string);
-    fflush(unit);
-}
-
-void IMB_calculate_times(int ntimes,
-                         struct comm_info* c_info,
-                         int group_id,
-                         double* tlist,
-                         Timing* timing
-#ifdef CHECK
-                       , double* defect
-#endif
-                        )
-{
-    int offset      = 0;
-    int nproc       = 0;
-    int rank        = 0;
-    int times_count = 0;
-    int i           = 0;
-    Time_Id time_id = PURE;
-
-    const int is_group_mode = c_info->group_mode > 0 ? 1 : 0;
-
-
-    int ncount = is_group_mode
-               ? group_id
-               : c_info->n_groups;
-
-#ifdef CHECK
-    *defect = 0;
-#endif
-
-
-    for (i = 0; i < ncount; i++) {
-        nproc += c_info->g_sizes[i];
-    }
-
-    ncount = is_group_mode
-           ? c_info->g_sizes[group_id]
-           : nproc;
-
-    for (time_id = PURE; time_id < ntimes; time_id++) 
-    {
-        times_count = 0;
-        timing[MIN].times[time_id] = DBL_MAX;
-
-        for (i = 0; i < ncount; i++) 
-        {
-            rank   = is_group_mode
-                   ? (nproc + i) * ntimes
-                   : c_info->g_ranks[i] * ntimes;
-
-            offset = rank + time_id;
-            if (tlist[offset] < 0.) {
-                continue;
-            }
-            times_count++;
-
-            if (tlist[offset] < timing[MIN].times[time_id]) {
-                timing[MIN].times[time_id] = tlist[offset];
-                timing[MIN].offset[time_id] = rank;
-            }
-
-            if ((tlist[offset] > timing[MAX].times[time_id])) {
-                timing[MAX].times[time_id] = tlist[offset];
-                timing[MAX].offset[time_id] = rank;
-            }
-
-            timing[AVG].times[time_id] += tlist[offset];
-#ifdef CHECK
-            {
-		const size_t check_index = is_group_mode
-                                     ? c_info->g_ranks[nproc + i]
-                                     : c_info->g_ranks[i];
-            	*defect = max(*defect, all_defect[check_index]);
-	    }
-#endif 
-        }
-        // fixed 'times_count may be 0' issue
-        if (times_count != 0)
-            timing[AVG].times[time_id] /= times_count;
-        else
-            timing[AVG].times[time_id] = 0;
-    }
-}
-
-
-/************************************************************************/
-/* IMB 3.1 << */
-// Re-display calling sequence
-void IMB_show_selections(struct comm_info* c_info, struct Bench* BList, int *argc, char ***argv)
-    /* >> IMB 3.1  */
-    /*
-
-
-       Displays on stdout an overview of the user selections
-
-
-
-       Input variables: 
-
-       -c_info               (type struct comm_info*)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-
-       -BList                (type struct Bench*)                      
-       (For explanation of struct Bench type:
-       describes all aspects of modes of a benchmark;
-       see [1] for more information)
-
-       The requested list of benchmarks
-
-       -argc                 (type int *)                      
-       Number of command line arguments
-
-
-       -argv                 (type char ***)                      
-       List of command line arguments
-
-
-
-
-*/
-{
-    int iarg=0;
-    int i;
-
-    if(c_info->w_rank == 0 )
-    {
-        IMB_general_info();
-        /* IMB 3.1 << */
-
-        // repeat calling sequence
-        fprintf(unit,"\n\n# Calling sequence was: \n\n#");
-
-        while( iarg<*argc )
-        {
-            if (iarg>0 && iarg%6==0)
-            {
-                if( (*argv)[iarg][0]=='-' && iarg+1<*argc ) 
-                {
-                    fprintf(unit," %s %s\n#", (*argv)[iarg], (*argv)[iarg+1]);
-                    iarg++;
-                }
-                else
-                    fprintf(unit," %s\n#", (*argv)[iarg]);
-
-                for(i=0; i<=strlen((*argv)[0]); i++) fprintf(unit," ");
-            }
-            else
-                fprintf(unit," %s", (*argv)[iarg]);
-
-            iarg++;
-        } /*while( iarg<*argc )*/
-
-        fprintf(unit,"\n\n");
-#ifndef MPIIO
-        if( c_info->n_lens>0 )
-        {
-            fprintf(unit,"# Message lengths were user defined\n");
-        }
-        else
-            /* >> IMB 3.1  */
-        {
-            fprintf(unit,"# Minimum message length in bytes:   %d\n",0);
-            fprintf(unit,"# Maximum message length in bytes:   %d\n",1<<c_info->max_msg_log);
-        }
-
-        fprintf(unit,"#\n");
-        fprintf(unit,"# MPI_Datatype                   :   MPI_BYTE \n");
-        fprintf(unit,"# MPI_Datatype for reductions    :   MPI_FLOAT\n");
-        fprintf(unit,"# MPI_Op                         :   MPI_SUM  \n");
-#else
-        if( c_info->n_lens>0 )
-        {
-            fprintf(unit,"# IO lengths were user defined\n");
-        }
-        else
-        {
-            fprintf(unit,"# Minimum io portion in bytes:   %d\n",0);
-            fprintf(unit,"# Maximum io portion in bytes:   %d\n",1<<c_info->max_msg_log);
-        }
-        fprintf(unit,"#\n");
-        IMB_print_info();
-#endif
-        fprintf(unit,"#\n");
-
-#ifdef IMB_OPTIONAL
-        fprintf(unit,"#\n\n");
-        fprintf(unit,"# !! Attention: results have been achieved in\n");
-        fprintf(unit,"# !! IMB_OPTIONAL mode.\n");
-        fprintf(unit,"# !! Results may differ from standard case.\n");
-        fprintf(unit,"#\n");
-#endif
-
-        fprintf(unit,"#\n");
-
-        IMB_print_blist(c_info, BList);
-
-        if( do_nonblocking )
-        {
-            fprintf(unit,"\n\n# For nonblocking benchmarks:\n\n");
-            fprintf(unit,"# Function CPU_Exploit obtains an undisturbed\n");
-            fprintf(unit,"# performance of %7.2f MFlops\n",MFlops);         
-        }
-
-    } /*if(c_info->w_rank == 0 )*/
-
-} /* end of IMB_show_selections*/
-
-/****************************************************************************/
-void IMB_show_procids(struct comm_info* c_info)
-    /*
-
-       Prints to stdout the process ids (of group eventually)
-
-       Input variables: 
-
-       -c_info               (type struct comm_info*)                      
-       Collection of all base data for MPI;
-       see [1] for more information
-
-
-
-*/
-{
-    int ip, py, i, k, idle;
-
-    if( c_info->w_rank == 0 )
-    {
-        if(c_info->n_groups == 1)
-        {
-            if( c_info->px>1 && c_info->py>1 )
-            {
-                fprintf(unit,"\n# #processes = %d; rank order (rowwise): \n",
-                        c_info->num_procs);
-                ip=0;
-
-                for( i=0; i<c_info->px && ip<c_info->NP; i++)
-                {  
-                    py = c_info->w_num_procs/c_info->px;
-                    if( i<c_info->w_num_procs%c_info->px ) py++;
-                    py = min(py,c_info->NP-ip);
-                    IMB_print_array(c_info->g_ranks+ip,1,0,py,"",unit);
-                    fprintf(unit,"\n");
-                    ip = ip+py;
-                }
-            }
-            else
-                fprintf(unit,"\n# #processes = %d \n",c_info->num_procs);
-
-            idle = c_info->w_num_procs-c_info->num_procs;
-        } /*if(c_info->n_groups == 1)*/
-
-        if(c_info->n_groups != 1)
-        {
-            fprintf(unit,"\n# ( %d groups of %d processes each running simultaneous ) \n",
-                    c_info->n_groups,c_info->num_procs); 
-
-            IMB_print_array(c_info->g_ranks,c_info->n_groups,0,
-                    c_info->g_sizes[0],"Group ",unit);
-
-            idle = c_info->w_num_procs - c_info->n_groups*c_info->g_sizes[0];
-        }
-
-        if( idle )
-        {
-            if( idle == 1 )
-                fprintf(unit,"# ( %d additional process waiting in MPI_Barrier)\n",idle);
-            else
-                fprintf(unit,"# ( %d additional processes waiting in MPI_Barrier)\n",idle);
-        }
-    } /*if( c_info->w_rank == 0 )*/
-
-} /* end of IMB_show_procids*/
-
-
-/****************************************************************************/
-void IMB_print_array(int* Array, int N, int disp_N, 
-        int M, char* txt, FILE* unit)
-/*
-
-
-   Formattedly prints to stdout a M by N int array 
-
-
-
-   Input variables: 
-
-   -Array                (type int*)                      
-   Array to be printed
-
-
-   -N                    (type int)                      
-   Number of rows to be printed
-
-
-   -disp_N               (type int)                      
-   Displacement in Array where frist row begins
-
-
-   -M                    (type int)                      
-   Number of columns
-
-
-   -txt                  (type char*)                      
-   Accompanying text
-
-
-   -unit                 (type FILE*)                      
-   Output unit
-
-
-
-*/
-{
-#define MAX_SHOW 1024
-    int i,j;
-
-    char* outtxt;
-    int do_out;
-
-    do_out=0;
-
-    if( txt )
-        if( strcmp(txt,"") )
-        {
-            outtxt = (char*)IMB_v_alloc((strlen(txt)+6)*sizeof(char)," IMB_print_array ");
-            do_out=1;
-        }
-
-    if( N<=1 )
-    {
-        if( M>MAX_SHOW )
-        {
-            fprintf(unit,"#  "); 
-            IMB_print_int_row(unit, Array, MAX_SHOW/2);
-            fprintf(unit," ... "); 
-            IMB_print_int_row(unit, &Array[M-MAX_SHOW/2], MAX_SHOW/2);
-        }
-        else
-        {
-            if( do_out ) fprintf(unit,"# %s",txt); 
-            else         fprintf(unit,"# "); 
-            IMB_print_int_row(unit, Array, M);
-        }
-    }
-    else if ( N<=MAX_SHOW )
-    {
-        int zero=0, one=1;
-        for( i=0; i<N; i++) 
-        {
-            if( do_out )
-                sprintf(outtxt,"%s %d: ",txt,disp_N+i);
-            else    outtxt=(char*)NULL;
-            IMB_print_array(&Array[i*M], one, zero, M, outtxt, unit);
-
-            fprintf(unit,"\n");}
-    } /*for( i=0...*/
-    else
-    {
-        int disp;
-
-        disp=0;
-        IMB_print_array(Array, MAX_SHOW/2, disp, M, txt, unit);
-        fprintf(unit,"#  . \n"); 
-        fprintf(unit,"#  . \n"); 
-        disp=N-MAX_SHOW/2;
-        IMB_print_array(&Array[(N-MAX_SHOW/2)*M], MAX_SHOW/2, disp, M, txt, unit);
-    }
-
-    if( do_out )
-    {
-        IMB_v_free((void**)&outtxt);
-    }
-}
-
-/****************************************************************************/
-void IMB_print_int_row(FILE* unit, int* Array, int M)
-    /*
-
-
-       Formattedly prints to stdout a row of int numbers
-
-
-
-       Input variables: 
-
-       -unit                 (type FILE*)                      
-       Output unit
-
-
-       -Array                (type int*)                      
-       Data to be printed
-
-
-       -M                    (type int)                      
-       Number of data
-
-
-
-*/
-{
-#define X_PER_ROW 16
-    int i,j,i0,irest;
-
-    irest = M%X_PER_ROW;
-    for(j=0; j<(M+X_PER_ROW-1)/X_PER_ROW; j++)
-    {
-        i0=j*X_PER_ROW;
-
-        for(i=0; i<min(M-i0,X_PER_ROW); i++) fprintf(unit," %4d",Array[i0+i]);
-
-        fprintf(unit,"\n# ");
-    }
-
-}
-
-#ifdef MPIIO
-/****************************************************************************/
-void IMB_print_info()
-    /*
-
-
-       Prints MPI_Info selections (MPI-2 only)
-
-
-
-*/
-{
-    int nkeys,ikey,vlen,exists;
-    MPI_Info tmp_info;
-    char key[MPI_MAX_INFO_KEY], *value;
-
-    IMB_user_set_info(&tmp_info);
-
-    /* July 2002 fix V2.2.1: handle NULL case */
-    if( tmp_info!=MPI_INFO_NULL ) 
-    {
-        /* end change */
-
-        MPI_Info_get_nkeys(tmp_info, &nkeys);
-
-        if( nkeys > 0) fprintf(unit,"# Got %d Info-keys:\n\n",nkeys);
-
-        for( ikey=0; ikey<nkeys; ikey++ )
-        {
-            MPI_Info_get_nthkey(tmp_info, ikey, key);
-
-            MPI_Info_get_valuelen(tmp_info, key, &vlen, &exists);
-
-            value = (char*)IMB_v_alloc((vlen+1)* sizeof(char), "Print_Info");
-
-            MPI_Info_get(tmp_info, key, vlen, value, &exists);
-            printf("# %s = \"%s\"\n",key,value);
-
-            IMB_v_free ((void**)&value);
-        }
-
-        MPI_Info_free(&tmp_info);
-
-        /* July 2002 fix V2.2.1: end if */
-    }
-    /* end change */
-
-}
-#endif
-
-
-
-/*****************************************************************/
-void IMB_print_header (int out_format, struct Bench* bmark, 
-        struct comm_info* c_info, MODES bench_mode)
-{
-
-    int line_len = 0;
-    char* help;
-    char* token;
-
-    fprintf(unit,"\n");            /* FOR GNUPLOT: CURVE SEPERATOR  */
-
-    if( c_info->group_mode > 0 ) 
-    {
-        /* several groups output*/ 
-        strcpy(aux_string,"&Group") ; 
-        line_len = 1; 
-    }
-    else
-    { 
-        strcpy(aux_string,"");  
-        line_len = 0; 
-    }
-
-
-    switch (out_format)
-    {
-    case OUT_TIME_AND_BW:
-        line_len += 4;
-        strcat(aux_string,"&#bytes&#repetitions&t[usec]&Mbytes/sec&");
-        break;
-
-    case OUT_BW_AND_MSG_RATE:
-        line_len += 4;
-        strcat(aux_string,"&#bytes&#repetitions&Mbytes/sec&Msg/sec&");
-        break;
-
-    case OUT_TIME_RANGE_AND_BW:
-        line_len += 6;
-        strcat(aux_string,
-                "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&Mbytes/sec&");
-        break;
-
-    case OUT_TIME_RANGE:
-        line_len += 5;
-        strcat(aux_string, "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
-        break;
-
-    case OUT_SYNC:
-        if (bmark->RUN_MODES[0].NONBLOCKING && !strstr(bmark->name, "_pure")) 
-        {
-            line_len += 5;
-            strcat(aux_string,
-                    "&#repetitions&t_ovrl[usec]&t_pure[usec]&t_CPU[usec]& overlap[%]&");
-        } 
-        else 
-        {
-            line_len += 4;
-            strcat(aux_string,
-                    "&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
-        }
-        break;
-
-    case OUT_OVERLAP:
-#ifdef RMA
-        line_len += 4;
-        strcat(aux_string,
-                "&#bytes&#repetitions&t_pure[usec]&t_ovrl[usec]&");
-#else        
-        line_len += 6;
-        strcat(aux_string,
-                "&#bytes&#repetitions&t_ovrl[usec]&t_pure[usec]&t_CPU[usec]& overlap[%]&");
-#endif        
-        break;
-    } 
-
-#ifdef CHECK
-    if( bmark->RUN_MODES[0].type != Sync && strcmp(bmark->name,"Window") )
-    {
-        line_len+=1;
-        strcat(aux_string, "&defects&");
-    }
-#endif
-
-    IMB_make_line(line_len);
-
-    if( c_info->n_groups > 1)
-    {    
-        fprintf(unit, "# Benchmarking Multi-%s ", bmark->name);
-    }    
-    else
-    {    
-        fprintf(unit, "# Benchmarking %s ", bmark->name);
-    }    
-
-    IMB_show_procids(c_info); 
-
-    IMB_make_line(line_len);
-
-    switch(bench_mode->AGGREGATE)
-    {
-      case 1:
-        fprintf(unit, "#\n#    MODE: AGGREGATE \n#\n");
-        break;
-
-      case 0:
-        fprintf(unit, "#\n#    MODE: NON-AGGREGATE \n#\n");
-        break;
-    }
-
-    help=aux_string;
-    while (token = strtok(help, "&"))
-    {
-        sprintf(format, "%%%ds", ow_format);
-        fprintf(unit, format, token);
-        help = NULL;
-    }
-    fprintf(unit, "\n");
-
-    return;
-}
-
-
-/*****************************************************************/
-void IMB_edit_format(int n_ints , int n_floats)
-    /*
-
-
-       Edits format string for output
-
-
-
-       In/out variables: 
-
-       -n_ints               (type int)                      
-# of int items to be printed
-
-
--n_floats             (type int)                      
-# of float items to be printed
-
-
-
-*/
-{
-    int ip,i;
-
-    ip=0;
-
-    for(i=1 ; i<=n_ints; i++)
-    {    
-        sprintf(&(format[ip]),"%%%dd",ow_format); 
-        ip=strlen(format);
-    }
-
-    for(i=1 ; i<=n_floats ; i++)
-    {
-        sprintf(&(format[ip]),"%%%d.2f",ow_format);
-        ip=strlen(format);
-    }
-}
-
-/***************************************************************************/
-void IMB_make_line(int line_len)
-    /*
-
-
-       Prints an underline
-
-
-
-       Input variables: 
-
-       -line_len               (type int)                      
-       Length of underline
-
-
-
-*/
-{
-    int i;
-    char* char_line = "-";
-    fprintf(unit,"#");
-
-    for( i=1;i<line_len*ow_format; i++ )
-    {
-        fprintf(unit,"%s",char_line);
-    }
-    fprintf(unit,"\n");
-}
-
-/***************************************************************************/
-/* New function for IMB_3.0 */
-void IMB_help()
-{
-    fflush(stderr);
-    fflush(unit);
-
-    fprintf(unit,"\nCalling sequence (command line will be repeated in Output table!):\n\n");
-
-#ifdef MPI1
-    fprintf(unit,"\nIMB-MPI1       [-h{elp}]\n");
-#elif defined(EXT)
-    fprintf(unit,"\nIMB-EXT        [-h{elp}]\n");
-#elif defined (MPIIO)
-    fprintf(unit,"\nIMB-IO         [-h{elp}]\n");
-#endif
-
-    /* IMB 3.1 << */
-    /* Update calling sequence */
-    /* >> IMB 3.1  */
-    fprintf(unit,
-            "[-npmin        <NPmin>]\n"
-            "[-multi        <outflag>]\n"
-            "[-off_cache    <cache_size[,cache_line_size]>\n"
-            "[-iter         <msgspersample[,overall_vol[,msgs_nonaggr[,iter_policy]]]>\n"
-            "[-iter_policy  <iter_policy>]\n"
-            "[-time         <max_runtime per sample>]\n"
-            "[-mem          <max. per process memory for overall message buffers>]\n"
-            "[-msglen       <Lengths_file>]\n"
-            "[-map          <PxQ>]\n"
-            "[-input        <filename>]\n"
-            "[benchmark1    [benchmark2 [...]]]\n"
-            "[-include      [benchmark1 [benchmark2 [...]]]\n"
-            "[-exclude      [benchmark1 [benchmark2 [...]]]\n"
-            "[-msglog       <[min_msglog]:max_msglog>]\n"
-#if (defined MPI1 || defined NBC)
-            "[-root_shift   <on or off>]\n"
-            "[-sync         <on or off>]\n"
-#endif            
-            "[-imb_barrier  <on or off>]\n"
-            "\n"
-            "where \n"
-            "\n"
-            "- h ( or help) just provides basic help \n"
-            "(if active, all other arguments are ignored)\n"
-            "\n"
-            "- npmin\n\n"
-            "the argumaent after npmin is NPmin, \n"
-            "the minimum number of processes to run on\n"
-            "(then if IMB is started on NP processes, the process numbers \n"
-            "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
-            ">>>\n"
-            "to run on just NP processes, run IMB on NP and select -npmin NP\n"
-            "<<<\n"
-            "default: \n"
-            "NPmin=2\n"
-            "\n");
-#ifdef MPIIO
-    fprintf(unit,
-            "-off_cache \n\n"
-            "no effect for IMB-IO (only IMB-MPI1, IMB-EXT) \n"
-            "\n");
-#else
-    fprintf(unit,
-            "- off_cache \n"
-            "\n"
-            "the argument after off_cache can be either 1 single number (cache_size),  \n"
-            "or 2 comma separated numbers (cache_size,cache_line_size), or just -1 \n"
-            "\n"
-            "By default, without this flag, the communications buffer is  \n"
-            "the same within all repetitions of one message size sample;   \n"
-            "most likely, cache reusage is yielded and thus throughput results   \n"
-            "that might be non realistic.    \n"
-            "\n"
-            "With -off_cache, it is attempted to avoid cache reusage.    \n"
-            "cache_size is a float for an upper bound of the size of the last level cache in MBytes \n"
-            "cache_line_size is assumed to be the size (Bytes) of a last level cache line  \n"
-            "(can be an upper estimate).  \n"
-            "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );  \n"
-            "when repetitively using messages of a particular size, their addresses are advanced within those  \n"
-            "buffers so that a single message is at least 2 cache lines after the end of the previous message.  \n"
-            "Only when those buffers have been marched through (eventually), they will re-used from the beginning.  \n"
-            "\n"
-            "A cache_size and a cache_line_size are assumed as statically defined    \n"
-            "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered  \n"
-            "\n"
-            "remark: -off_cache is effective for IMB-MPI1, IMB-EXT, but not IMB-IO  \n"
-            "\n"
-            "examples  \n"
-            "-off_cache -1 (use defaults of IMB_mem_info.h);  \n"
-            "-off_cache 2.5 (2.5 MB last level cache, default line size);  \n"
-            "-off_cache 16,128 (16 MB last level cache, line size 128);  \n"
-            "\n"
-            "NOTE: the off_cache mode might also be influenced by eventual internal  \n"
-            "caching with the MPI library. This could make the interpretation \n"
-            "intricate.  \n"
-            "\n"
-            "default: \n"
-            "no cache control, data likely to come out of cache most of the time \n"
-            "\n");
-#endif
-    fprintf(unit,
-            "- iter \n\n"
-            "the argument after -iter can contain from 1 to 4 comma separated values\n"
-            "3 integer numbers override the defaults \n"
-            "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of =>IMB_settings.h\n"
-            "and 1 string value overrides the default ITER_POLICY of => IMB_settings.h\n"
-            "examples \n"
-            "-iter 2000        (override MSGSPERSAMPLE by value 2000) \n"
-            "-iter 1000,100    (override OVERALL_VOL by 100) \n"
-            "-iter 1000,40,150 (override MSGS_NONAGGR by 150) \n"
-            "\n"
-            "\n"
-            "default: \n"
-            "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR,ITER_POLICY => IMB_settings.h \n"
-            "\n"
-            "NOTE: !! New in versions from IMB 3.2 on !!  \n"
-            "the iter selection is overridden by a dynamic selection that is a new default in \n"
-            "IMB 3.2: when a maximum run time (per sample) is expected to be exceeded, the \n"
-            "iteration number will be cut down; see -time flag  \n"
-            "\n"
-            "- iter_policy\n"
-            "\n"
-            "the argument after -iter_policy is a one from possible strings,\n"
-            "specifying that policy will be used for auto iteration control:\n"
-            "dynamic,multiple_np,auto,off\n"
-            "example \n"
-            "-iter_policy auto\n"
-            "default:\n"
-            "iteration control through parameter ITER_POLICY => IMB_settings.h \n"
-            "\n"
-            "- time\n"
-            "\n"
-            "the argument after -time is a float, specifying that \n"
-            "a benchmark will run at most that many seconds per message size \n"
-            "the combination with the -iter flag or its defaults is so that always \n"
-            "the maximum number of repetitions is chosen that fulfills all restrictions \n"
-            "example \n"
-            "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, iff\n"
-            "the default (or -iter selected) number of repetitions would take longer than that) \n"
-            "\n"
-            "remark: per sample, the rough number of repetitions to fulfill the -time request \n"
-            "is estimated in preparatory runs that use ~ 1 second overhead \n"
-            "\n"
-            "default: \n"
-            "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10  \n"
-            "(new default in IMB_3.2) \n"
-            "\n"
-            "- mem\n\n"
-            "the argument after -mem is a float, specifying that \n"
-            "at most that many GBytes are allocated per process for the message buffers \n"
-            "if the size is exceeded, a warning will be output, stating how much memory \n"
-            "would have been necessary, but the overall run is not interrupted \n"
-            "example \n"
-            "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process) \n"
-            "\n"
-            "default: \n"
-            "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h \n"
-            "\n"
-            "- map\n\n"
-            "the argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
-            "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
-            "the basic communicator is set up as P by Q process grid\n"
-            "\n"
-            "if, e.g., one runs on N nodes of X processors each, and inserts\n"
-            "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
-            "running PingPong with P=X, Q=2 would measure inter-node performance\n"
-            "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
-            "first priority) \n"
-            "\n"
-            "default: \n"
-            "Q=1\n"
-            "\n"
-            "- multi\n\n"
-            "the argument after -multi is outflag (0 or 1)\n"
-            "\n"
-            "if -multi is selected, running the N process version of a benchmark\n"
-            "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
-            "\n"
-            "outflag only controls default (0) or extensive (1) output charts.\n"
-            "0: only lowest performance groups is output\n"
-            "1: all groups are output\n"
-            "\n"
-            "default: \n"
-            "multi off\n"
-            "\n"
-            "- msglen\n\n"
-            "the argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
-            "message lengths, 1 per line\n"
-            "\n"
-            "default: \n"
-            "no lengths_file, lengths defined by settings.h, settings_io.h\n"
-            "\n"
-            "- input\n\n"
-            "the argument after -input is a filename is any text file containing, line by line, benchmark names\n"
-            "facilitates running particular benchmarks as compared to using the\n"
-            "command line.\n"
-            "default: \n"
-            "no input file exists\n"
-            "\n"
-            "- include\n\n"
-            "the argument after -include  is one or more benchmark names separated by spaces\n"
-            "\n"
-            "- exclude\n\n"
-            "the argument after -exclude  is one or more benchmark names separated by spaces\n"
-            "\n"
-            "\n"
-            "-msglog\n\n"
-            "the argument after -msglog min:max, min and max are positive integer numbers, min<max\n"
-            "where min is power of 2 so that second smallest data transfer size is max(unit,2^min)\n"
-            "(the smallest always being 0), where unit = sizeof(float) for reductions, unit = 1 else\n"
-            "max is power of 2 so that 2^max is largest messages size, max must be less than 31"
-            "\n\n"
-#if (defined MPI1 || defined NBC)
-            "-root_shift\n\n"
-            "controls root change at each iteration step for certain collective benchmarks,\n"
-            "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
-            "default:\n"
-            "off\n"
-            "\n"
-            "-sync\n\n"
-            "controls whether all processes are syncronized at each iteration step in collective benchmarks,\n"
-            "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
-            "default:\n"
-            "on\n"
-            "\n"
-            "\n"
-#endif            
-            "-imb_barrier\n\n"
-            "use internal MPI-independent barrier syncronization implementation,\n"
-            "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
-            "default:\n"
-            "off\n"
-            "\n"
-            "- benchmarkX is (in arbitrary lower/upper case spelling)\n"
-            "\n");
-#ifdef MPI1
-    fprintf(unit,
-            "PingPongSpecificSource\n"
-            "PingPongAnySource\n"
-            "PingPingSpecificSource\n"
-            "PingPingAnySource\n"
-            "PingPing\n"
-            "PingPong\n"
-            "Sendrecv\n"
-            "Exchange\n"
-            "Bcast\n"
-            "Allgather\n"
-            "Allgatherv\n"
-            "Gather\n"
-            "Gatherv\n"
-            "Scatter\n"
-            "Scatterv\n"
-            "Alltoall\n"
-            "Alltoallv\n"
-            "Reduce\n"
-            "Reduce_scatter\n"
-            "Allreduce\n"
-            "Barrier\n"
-            "Uniband\n"
-            "Biband\n"
-            "\n");
-
-#elif defined(EXT)
-    fprintf(unit,
-            "Window\n"
-            "Unidir_Put\n"
-            "Unidir_Get\n"
-            "Bidir_Get\n"
-            "Bidir_Put\n"
-            "Accumulate\n"
-            "\n");
-
-#elif defined(MPIIO)
-    fprintf(unit,
-            "S_Write_indv\n"
-            "S_Read_indv\n"
-            "S_Write_expl\n"
-            "S_Read_expl\n"
-            "P_Write_indv\n"
-            "P_Read_indv\n"
-            "P_Write_expl\n"
-            "P_Read_expl\n"
-            "P_Write_shared\n"
-            "P_Read_shared\n"
-            "P_Write_priv\n"
-            "P_Read_priv\n"
-            "C_Write_indv\n"
-            "C_Read_indv\n"
-            "C_Write_expl\n"
-            "C_Read_expl\n"
-            "C_Write_shared\n"
-            "C_Read_shared\n"
-            "\n");
-
-#elif defined(NBC) 
-    fprintf(unit,
-            "Ibcast\n"
-            "Ibcast_pure\n"
-            "Iallgather\n"
-            "Iallgather_pure\n"
-            "Iallgatherv\n"
-            "Iallgatherv_pure\n"
-            "Igather\n"
-            "Igather_pure\n"
-            "Igatherv\n"
-            "Igatherv_pure\n"
-            "Iscatter\n"
-            "Iscatter_pure\n"
-            "Iscatterv\n"
-            "Iscatterv_pure\n"
-            "Ialltoall\n"
-            "Ialltoall_pure\n"
-            "Ialltoallv\n"
-            "Ialltoallv_pure\n"
-            "Ireduce\n"
-            "Ireduce_pure\n"
-            "Ireduce_scatter\n"
-            "Ireduce_scatter_pure\n"
-            "Iallreduce\n"
-            "Iallreduce_pure\n"
-            "Ibarrier\n"
-            "Ibarrier_pure\n"
-            "\n");
-     
-#elif defined(RMA)
-    fprintf(unit,
-            "Unidir_put\n"
-            "Unidir_get\n"
-            "Bidir_put\n"
-            "Bidir_get\n"
-            "One_put_all\n"
-            "One_get_all\n"
-            "All_put_all\n"
-            "All_get_all\n"
-            "Put_local\n"
-            "Get_local\n"      
-            "Put_all_local\n"
-            "Get_all_local\n"
-            "Exchange_put\n"
-            "Exchange_get\n"
-            "Accumulate\n"
-            "Get_accumulate\n"
-            "Fetch_and_op\n"
-            "Compare_and_swap\n"
-            "Truly_passive_put\n"
-            "\n");
-#endif
-
-}
diff --git a/src/IMB_parse_name_mpi1.c b/src/IMB_parse_name_mpi1.c
deleted file mode 100644
index d2c28a50..00000000
--- a/src/IMB_parse_name_mpi1.c
+++ /dev/null
@@ -1,364 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_parse_name_mpi1.c 
-
- Implemented functions: 
-
- IMB_get_def_cases;
- IMB_set_bmark;
-
- ***************************************************************************/
-
-
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_bnames_mpi1.h"
-#include "IMB_comments.h"
-
-#include "IMB_prototypes.h"
-
- 
-int IMB_get_def_cases(char*** defc, char*** Gcmt)
-/*
-
-                      
-                      Initializes default benchmark names (defc) and accompanying
-                      comments (Gcmt)
-                      
-
-
-In/out variables: 
-
--defc                 (type char***)                      
-                      List of benchkark names (strings)
-                      
-
--Gcmt                 (type char***)                      
-                      List of general comments (strings)
-                      
-
-
-*/
-{
-    *defc = &DEFC[0];
-    *Gcmt = &Gral_cmt[0];
-    return (int) (sizeof(DEFC)/sizeof(char*));
-}
-
-int IMB_get_all_cases(char*** allc)
-/*
-
-                      
-                      Initializes default benchmark names (defc) and accompanying
-                      comments (Gcmt)
-                      
-
-
-In/out variables: 
-
--defc                 (type char***)                      
-                      List of benchkark names (strings)
-                      
-
--Gcmt                 (type char***)                      
-                      List of general comments (strings)
-                      
-
-
-*/
-{
-    *allc = &ALLC[0];
-    return (int) (sizeof(ALLC)/sizeof(char*));
-}
-
-
-void IMB_set_bmark(struct Bench* Bmark)
-/*
-
-
-
-In/out variables: 
-
--Bmark                (type struct Bench*)                      
-                      (For explanation of struct Bench type:
-                      describes all aspects of modes of a benchmark;
-                      see [1] for more information)
-                      
-                      On input, only the name of the benchmark is set.
-                      On output, all necessary run modes are set accordingly
-                      
-
-
-*/
-{
-    BTYPES 	type;
-
-#if 0
-    int 	index;
-
-    IMB_get_def_index(&index, Bmark->name);
-
-    if( index < 0 )
-    {
-	Bmark->RUN_MODES[0].type=BTYPE_INVALID;
-	Bmark->RUN_MODES[1].type=BTYPE_INVALID;
-	return;
-    }
-#endif /* 0 */
-
-    Bmark->N_Modes = 1;
-    Bmark->RUN_MODES[0].AGGREGATE   =-1;
-    Bmark->RUN_MODES[0].NONBLOCKING = 0;
-
-    Bmark->reduction	 = 0;
-    Bmark->Ntimes	 = 1;
-    Bmark->select_source = 0;
-
-    if (!strcmp(Bmark->name,"pingpong"))
-    { 
-          strcpy(Bmark->name,"PingPong");
-          Bmark->Benchmark	= IMB_pingpong;
-          Bmark->select_source	= 1;
-          Bmark->bench_comments = &PingPong_cmt[0];
-          type 			= SingleTransfer;
-          Bmark->scale_time 	= 0.5;
-    }
-    else if (!strcmp(Bmark->name,"pingping"))
-    { 
-	strcpy(Bmark->name,"PingPing");
-	Bmark->Benchmark 	= IMB_pingping;
-	Bmark->select_source 	= 1;
-	Bmark->bench_comments 	= &PingPing_cmt[0];
-	type 			= SingleTransfer;
-    }
-    else if (!strcmp(Bmark->name,"pingponganysource"))
-    {
-	strcpy(Bmark->name,"PingPongAnySource");
-	Bmark->Benchmark 	= IMB_pingpong;
-	Bmark->bench_comments 	= &PingPong_cmt[0];
-	type 			= SingleTransfer;
-	Bmark->scale_time 	= 0.5;
-    }
-    else if (!strcmp(Bmark->name,"pingpongspecificsource"))
-    {
-	strcpy(Bmark->name,"PingPongSpecificSource");
-	Bmark->Benchmark 	= IMB_pingpong;
-	Bmark->select_source 	= 1;
-	Bmark->bench_comments 	= &PingPong_cmt[0];
-	type 			= SingleTransfer;
-	Bmark->scale_time 	= 0.5;
-    }
-    else if (!strcmp(Bmark->name,"pingpinganysource"))
-    {
-	strcpy(Bmark->name,"PingPingAnySource");
-	Bmark->Benchmark 	= IMB_pingping;
-	Bmark->bench_comments 	= &PingPing_cmt[0];
-	type 			= SingleTransfer;
-    }
-    else if (!strcmp(Bmark->name,"pingpingspecificsource"))
-    {
-	strcpy(Bmark->name,"PingPingSpecificSource");
-	Bmark->Benchmark 	= IMB_pingping;
-	Bmark->select_source 	= 1;
-	Bmark->bench_comments 	= &PingPing_cmt[0];
-	type 			= SingleTransfer;
-    }
-    else if (!strcmp(Bmark->name,"sendrecv"))
-    { 
-	strcpy(Bmark->name,"Sendrecv");
-	Bmark->Benchmark = IMB_sendrecv;
-	Bmark->bench_comments = &Sendrecv_cmt[0];
-	type = ParallelTransfer;
-	Bmark->scale_time = 1.0;
-	Bmark->scale_bw   = 2.0;
-    }
-    else if (!strcmp(Bmark->name,"exchange") )
-    { 
-	strcpy(Bmark->name,"Exchange");
-	Bmark->Benchmark = IMB_exchange;
-	Bmark->bench_comments = &Exchange_cmt[0];
-	type = ParallelTransfer;
-	Bmark->scale_time = 1.0;
-	Bmark->scale_bw   = 4.0;
-    }
-    else if (!strcmp(Bmark->name,"allreduce"))
-    { 
-	strcpy(Bmark->name,"Allreduce");
-	Bmark->Benchmark = IMB_allreduce;
-	type = Collective;
-	Bmark->bench_comments = &Allreduce_cmt[0];
-	Bmark->reduction = 1;
-    }
-    else if (!strcmp(Bmark->name,"reduce"))
-    { 
-	strcpy(Bmark->name,"Reduce");
-	Bmark->Benchmark = IMB_reduce;
-	type = Collective;
-	Bmark->bench_comments = &Reduce_cmt[0];
-	Bmark->reduction = 1;
-    }
-    else if (!strcmp(Bmark->name,"reduce_scatter"))
-    { 
-	strcpy(Bmark->name,"Reduce_scatter");
-	Bmark->Benchmark = IMB_reduce_scatter;
-	type = Collective;
-	Bmark->bench_comments = &Reduce_scatter_cmt[0];
-	Bmark->reduction = 1;
-    }
-    else if (!strcmp(Bmark->name,"bcast"))
-    { 
-	strcpy(Bmark->name,"Bcast");
-	Bmark->Benchmark = IMB_bcast;
-	type = Collective;
-	Bmark->bench_comments = &Bcast_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"barrier"))
-    { 
-	strcpy(Bmark->name,"Barrier");
-	Bmark->Benchmark = IMB_barrier;
-	type = Sync;
-	Bmark->bench_comments = &Barrier_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"allgather"))
-    { 
-	strcpy(Bmark->name,"Allgather");
-	Bmark->Benchmark = IMB_allgather;
-	type = Collective;
-	Bmark->bench_comments = &Allgather_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"allgatherv"))
-    { 
-	strcpy(Bmark->name,"Allgatherv");
-	Bmark->Benchmark = IMB_allgatherv;
-	type = Collective;
-	Bmark->bench_comments = &Allgatherv_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"gather"))
-    { 
-	strcpy(Bmark->name,"Gather");
-	Bmark->Benchmark = IMB_gather;
-	type = Collective;
-	Bmark->bench_comments = &Gather_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"gatherv"))
-    { 
-	strcpy(Bmark->name,"Gatherv");
-	Bmark->Benchmark = IMB_gatherv;
-	type = Collective;
-	Bmark->bench_comments = &Gatherv_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"scatter"))
-    { 
-	strcpy(Bmark->name,"Scatter");
-	Bmark->Benchmark = IMB_scatter;
-	type = Collective;
-	Bmark->bench_comments = &Scatter_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"scatterv"))
-    { 
-	strcpy(Bmark->name,"Scatterv");
-	Bmark->Benchmark = IMB_scatterv;
-	type = Collective;
-	Bmark->bench_comments = &Scatterv_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"alltoall"))
-    { 
-	strcpy(Bmark->name,"Alltoall");
-	Bmark->Benchmark = IMB_alltoall;
-	type = Collective;
-	Bmark->bench_comments = &Alltoall_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"alltoallv"))
-    { 
-	strcpy(Bmark->name,"Alltoallv");
-	Bmark->Benchmark = IMB_alltoallv;
-	type = Collective;
-	Bmark->bench_comments = &Alltoallv_cmt[0];
-    }
-    else if (!strcmp(Bmark->name,"uniband"))
-    { 
-	strcpy(Bmark->name,"Uniband");
-	Bmark->Benchmark = IMB_uni_bandwidth;
-	Bmark->bench_comments = &Uni_bandwidth_cmt[0];
-	type = ParallelTransferMsgRate;
-	Bmark->scale_time = 1.0;
-	Bmark->scale_bw   = 1.0;
-    }
-    else if (!strcmp(Bmark->name,"biband"))
-    { 
-	strcpy(Bmark->name,"Biband");
-	Bmark->Benchmark = IMB_bi_bandwidth;
-	Bmark->bench_comments = &Bi_bandwidth_cmt[0];
-	type = ParallelTransferMsgRate;
-	Bmark->scale_time = 1.0;
-	Bmark->scale_bw   = 2.0;
-    }
-    else 
-    {
-	type = BTYPE_INVALID;
-    }
-
-    Bmark->RUN_MODES[0].type=type;
-}
-
diff --git a/src/IMB_read.c b/src/IMB_read.c
deleted file mode 100644
index c9c4f124..00000000
--- a/src/IMB_read.c
+++ /dev/null
@@ -1,736 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-    
- File: IMB_read.c 
-
- Implemented functions: 
-
- IMB_read_shared;
- IMB_read_indv;
- IMB_read_expl;
- IMB_read_ij;
- IMB_iread_ij;
-
- ***************************************************************************/
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
- 
-/*************************************************************************/
-
-/* ===================================================================== */
-/* 
-IMB 3.1 changes
-July 2007
-Hans-Joachim Plum, Intel GmbH
-
-- replace "int n_sample" by iteration scheduling object "ITERATIONS"
-  (see => IMB_benchmark.h)
-
-*/
-/* ===================================================================== */
-
-
-void IMB_read_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                     MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-IO benchmark kernel
-                      Driver for read benchmarks with shared file pointers
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-if( c_info->File_rank>=0 )
-{
-if( RUN_MODE->AGGREGATE )
-IMB_read_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
-else
-IMB_read_ij(c_info, size, shared, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
-
-if( RUN_MODE->NONBLOCKING )
-{
-MPI_File_close(&c_info->fh);
-IMB_open_file(c_info);
-
-if( RUN_MODE->AGGREGATE )
-IMB_iread_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time+1);
-else
-IMB_iread_ij(c_info, size, shared, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, 1, time+1);
-}
-
-}
-}
- 
-/*************************************************************************/
-
-
-void IMB_read_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                   MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-IO benchmark kernel
-                      Driver for read benchmarks with individual file pointers
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-
-if( c_info->File_rank>=0 )
-{
-if( RUN_MODE->AGGREGATE )
-IMB_read_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
-else
-IMB_read_ij(c_info, size, indv_block, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
-
-if( RUN_MODE->NONBLOCKING )
-{
-MPI_File_close(&c_info->fh);
-IMB_open_file(c_info);
-
-if( RUN_MODE->AGGREGATE )
-IMB_iread_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time+1);
-else
-IMB_iread_ij(c_info, size, indv_block, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, 1, time+1);
-
-}
-}
-
-}
-
-
-
-void IMB_read_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                   MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-IO benchmark kernel
-                      Driver for read benchmarks with explicit offsets
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-if( c_info->File_rank>=0 )
-{
-if( RUN_MODE->AGGREGATE )
-IMB_read_ij(c_info, size, explicit, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
-else
-IMB_read_ij(c_info, size, explicit, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
-
-if( RUN_MODE->NONBLOCKING )
-{
-MPI_File_close(&c_info->fh);
-IMB_open_file(c_info);
-
-if( RUN_MODE->AGGREGATE )
-IMB_iread_ij(c_info, size, explicit, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time+1);
-else
-IMB_iread_ij(c_info, size, explicit, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, 1, time+1);
-}
-}
-}
-
-
-
-
-void IMB_read_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                 BTYPES type, int i_sample, int j_sample, 
-                 int time_inner, double* time)
-/*
-
-                      
-                      Calls the proper read functions, blocking case
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      portion size in bytes
-                      
-
--pos                  (type POSITIONING)                      
-                      (see IMB_benchmark.h for definition of this enum type)
-                      descriptor for the file positioning
-                      
-
--type                 (type BTYPES)                      
-                      (see IMB_benchmark.h for definition of this enum type)
-                      descriptor for the file access synchronism
-                      
-
--i_sample,j_sample    (type int)                      
-                      aggregate case:     
-                      i_sample=1, j_sample=n_sample (set by driving function)
-                      non aggregate case: 
-                      i_sample=n_sample, j_sample=1 (set by driving function)
-                      Benchmark logistics then:
-                        for( i=0 .. i_sample-1 )
-                           for( j=0 .. j_sample-1 )
-                                   input ...
-                                   Synchronize (!)
-                      
-
--time_inner           (type int)                      
-                      logical flag: should timing be issued for the innermost loop 
-                      (and then averaged by outermost count), or for outermost loop
-                      
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-                      
-
-
-*/
-{
-    int i, j;
-    int Locsize, Totalsize;
-    MPI_Status stat;
-    MPI_Offset Offset;
-
-    int (* GEN_File_read)(MPI_File fh, void* buf, int count, 
-                       MPI_Datatype datatype, MPI_Status *status);
-    int (* GEN_File_read_shared)
-                      (MPI_File fh, void* buf, int count, 
-                       MPI_Datatype datatype, MPI_Status *status);
-    int (* GEN_File_read_at) 
-                      (MPI_File fh, MPI_Offset offset, void* buf, int count, 
-                       MPI_Datatype datatype, MPI_Status *status);
-
-#ifdef CHECK
-    defect = 0.;
-#endif
-    ierr = 0;
-
-    *time = 0.;
-
-    if( c_info->File_rank >= 0 )
-    {
-
-	if (type == Collective )
-	{
-	    GEN_File_read = MPI_File_read_all;
-	    GEN_File_read_shared = MPI_File_read_ordered;
-	    GEN_File_read_at = MPI_File_read_at_all;
-#ifdef DEBUG
-	    fprintf(unit,"Collective input\n");
-#endif
-	}
-	else
-	{
-	    GEN_File_read = MPI_File_read;
-	    GEN_File_read_shared = MPI_File_read_shared;
-	    GEN_File_read_at = MPI_File_read_at;
-#ifdef DEBUG
-	    fprintf(unit,"Non collective input\n");
-#endif
-	}
-
-
-	Locsize = c_info->split.Locsize;
-	Totalsize = c_info->split.Totalsize;
-
-/*
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-BLOCKING COLLECTIVE/NON COLLECTIVE INPUT CASES COMBINED
-(function pointer GEN_File_read_XXX
-either standard or collective MPI_File_read_XXX
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-*/
-
-	if( !time_inner ) *time = MPI_Wtime();
-
-	for ( i=0; i<i_sample; i++ )
-	{
-
-		if( time_inner ) 
-		{
-
-
-		    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->File_comm);
-
-		    *time = MPI_Wtime();
-		}
-
-		if( pos == indv_block )
-		{
-
-		    for( j=0; j<j_sample; j++ )
-		    {
-
-			ierr=GEN_File_read(c_info->fh,c_info->r_buffer,Locsize,c_info->etype,&stat);
-			MPI_ERRHAND(ierr);
-
-			DIAGNOSTICS("Read standard ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-			CHK_DIFF("Read_indv",c_info, c_info->r_buffer, 0,
-				 Locsize, Totalsize, asize, 
-				 get, pos, i_sample*j_sample, j,
-				 -2, &defect);
-
-		    }
-		} /*if( pos == indv_block )*/
-		else if( pos == explicit )
-		{
-
-		    for( j=0; j<j_sample; j++ )
-		    {
-			Offset = c_info->split.Offset+(MPI_Offset)(j*Totalsize);
-
-			ierr=GEN_File_read_at
-			    (c_info->fh, Offset, c_info->r_buffer,Locsize,c_info->etype,&stat);
-
-			MPI_ERRHAND(ierr);
-
-			DIAGNOSTICS("Read explicit ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-
-			CHK_DIFF("Read_expl",c_info, c_info->r_buffer, 0,
-				 Locsize, Totalsize, asize, 
-				 get, pos, i_sample*j_sample, j,
-				 -2, &defect);
-
-		    } /*for( j=0; j<j_sample; j++ )*/
-		}
-		else if( pos == shared )
-		{
-		    for( j=0; j<j_sample; j++ )
-		    {
-
-			ierr=GEN_File_read_shared
-			    (c_info->fh,c_info->r_buffer,Locsize,c_info->etype,&stat);
-			MPI_ERRHAND(ierr);
-
-			DIAGNOSTICS("Read shared ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-
-#ifdef CHECK
-			IMB_chk_diff("Read_shared",c_info, c_info->r_buffer, 0,
-				     Locsize, Totalsize, asize, 
-				     get, pos, i_sample*j_sample, j,
-				     -3, &defect);
-
-			MPI_Barrier(c_info->File_comm);
-#endif
-
-		    } /*for*/
-
-		}
-
-		if( time_inner ) *time = (MPI_Wtime()-*time)/(i_sample*j_sample);
-
-	} /*for ( i=0; i<i_sample; i++ )*/
-
-	if( !time_inner ) *time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-    } /* end if File_rank >= 0 */
-
-}
-
-
-void IMB_iread_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                  BTYPES type, int i_sample, int j_sample, 
-                  int time_inner, int do_ovrlp, double* time)
-/*
-
-                      
-                      Calls the proper read functions, non blocking case
-                      
-                      (See IMB_read_ij for documentation of calling sequence)
-                      
-
-
-*/
-{
-int i, j;
-int Locsize, Totalsize;
-MPI_Status *STAT, stat;
-MPI_Request *REQUESTS;
-MPI_Offset Offset;
-
-#ifdef CHECK
-defect = 0.;
-#endif
-ierr = 0;
-
-*time = 0.;
-
-if( c_info->File_rank >= 0 )
-{
-
-Locsize = c_info->split.Locsize;
-Totalsize = c_info->split.Totalsize;
-
-
-if( type == Collective )
-/*
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-NON BLOCKING COLLECTIVE INPUT CASES
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-*/
-
-{
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->File_comm);
-
-*time = MPI_Wtime();
-
-if( pos == indv_block )
-
-for ( j=0; j<i_sample*j_sample; j++ )
-{
-
-ierr=MPI_File_read_all_begin
-  (c_info->fh,c_info->r_buffer,Locsize,c_info->etype);
-MPI_ERRHAND(ierr);
-
-if( do_ovrlp )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-ierr=MPI_File_read_all_end
-  (c_info->fh,c_info->r_buffer,&stat);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IRead coll. ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-CHK_DIFF("Coll. IRead_indv",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize,
-         get, pos, i_sample*j_sample, j,
-         -2, &defect);
-
-}
-
-else if ( pos == explicit )
-
-for ( j=0; j<i_sample*j_sample; j++ )
-{
-
-Offset = c_info->split.Offset+(MPI_Offset)(j*Totalsize);
-
-ierr=MPI_File_read_at_all_begin
-  (c_info->fh,Offset,c_info->r_buffer,Locsize,c_info->etype);
-MPI_ERRHAND(ierr);
-
-
-if( do_ovrlp )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-ierr=MPI_File_read_at_all_end
-  (c_info->fh,c_info->r_buffer,&stat);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IRead expl coll. ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-CHK_DIFF("Coll. IRead_expl",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize,
-         get, pos, i_sample*j_sample, j,
-         -2, &defect);
-
-
-}
-
-else if ( pos == shared )
-
-for ( j=0; j<i_sample*j_sample; j++ )
-{
-
-ierr=MPI_File_read_ordered_begin
-  (c_info->fh,c_info->r_buffer,Locsize,c_info->etype);
-MPI_ERRHAND(ierr);
-
-
-if( do_ovrlp )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-ierr=MPI_File_read_ordered_end
-  (c_info->fh,c_info->r_buffer,&stat);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IRead shared coll. ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-CHK_DIFF("Coll. IRead_shared",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize,
-         get, pos, i_sample*j_sample, j,
-         -3, &defect);
-
-}
-
-
-*time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-}
-
-else  /* type non-Collective */
-/*
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-NON BLOCKING NON COLLECTIVE INPUT CASES
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-*/
-
-
-{
-
-REQUESTS = (MPI_Request*)IMB_v_alloc(j_sample*sizeof(MPI_Request), "IRead_ij");
-STAT     = (MPI_Status *)IMB_v_alloc(j_sample*sizeof(MPI_Status ), "IRead_ij");
-
-for( j=0; j<j_sample; j++ ) REQUESTS[j]=MPI_REQUEST_NULL;
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->File_comm);
-
-if( !time_inner ) *time = MPI_Wtime();
-
-for ( i=0; i<i_sample; i++ )
-{
-
-if( time_inner ) {
-
-MPI_Barrier(c_info->File_comm);
-
-*time = MPI_Wtime();
-}
-
-if( pos == indv_block )
-{
-
-for( j=0; j<j_sample; j++ )
-{
-
-ierr=MPI_File_iread
-   (c_info->fh,c_info->r_buffer,Locsize,c_info->etype,&REQUESTS[j]);
-MPI_ERRHAND(ierr);
-
-
-#if (defined CHECK || defined DEBUG)
-MPI_Wait(REQUESTS+j,STAT);
-DIAGNOSTICS("IRead standard ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-CHK_DIFF("IRead_indv",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize, 
-         get, pos, i_sample*j_sample, j,
-         -2, &defect);
-#endif
-
-}
-
-}
-
-else if( pos == explicit )
-{
-
-for( j=0; j<j_sample; j++ )
-{
-Offset = c_info->split.Offset+(MPI_Offset)(j*Totalsize);
-
-ierr=MPI_File_iread_at
-  (c_info->fh,Offset,c_info->r_buffer,Locsize,c_info->etype,&REQUESTS[j]);
-MPI_ERRHAND(ierr);
-
-
-
-#if (defined CHECK || defined DEBUG)
-MPI_Wait(REQUESTS+j,STAT);
-DIAGNOSTICS("IRead expl ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-CHK_DIFF("IRead_expl",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize, 
-         get, pos, i_sample*j_sample, j,
-         -2, &defect);
-#endif
-
-}
-}
-
-else if( pos == shared )
-{
-for( j=0; j<j_sample; j++ )
-{
-
-ierr=MPI_File_iread_shared
-  (c_info->fh,c_info->r_buffer,Locsize,c_info->etype,&REQUESTS[j]);
-MPI_ERRHAND(ierr);
-
-
-#if (defined CHECK || defined DEBUG)
-MPI_Wait(REQUESTS+j,STAT);
-DIAGNOSTICS("IRead shared ",c_info,c_info->r_buffer,Locsize,Totalsize,j,pos);
-
-CHK_DIFF("IRead_shared",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize, 
-         get, pos, i_sample*j_sample, j,
-         -3, &defect);
-
-MPI_Barrier(c_info->File_comm);
-#endif
-
-}
-
-}
-
-if( do_ovrlp )
-for ( j=0; j<j_sample; j++ )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-if( j_sample == 1 )
-MPI_Wait(REQUESTS,STAT);
-else                 
-MPI_Waitall(j_sample,REQUESTS,STAT);
-
-
-if( time_inner ) *time = (MPI_Wtime()-*time)/(i_sample*j_sample);
-
-}
-
-if( !time_inner ) *time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-IMB_v_free ((void**)&REQUESTS);
-IMB_v_free ((void**)&STAT);    
-
-} /* end if type */
-
-} /* end if File_rank >= 0 */
-
-}
diff --git a/src/IMB_rma_atomic.c b/src/IMB_rma_atomic.c
deleted file mode 100644
index a5d55c6a..00000000
--- a/src/IMB_rma_atomic.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-
- File: IMB_rma_atomic.c 
-
- Implemented functions: 
-
- IMB_rma_accumulate;
- IMB_rma_get_accumulate;
- IMB_rma_fetch_and_op;
- IMB_rma_compare_and_swap;
- 
- ***************************************************************************/
-
-/*-----------------------------------------------------------*/
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-#include "IMB_prototypes.h"
-
-void IMB_rma_accumulate (struct comm_info* c_info, int size,  
-                         struct iter_schedule* iterations,
-                         MODES run_mode, double* time)
-{
-    double res_time = -1.;
-    Type_Size s_size,r_size;
-    int s_num, r_num;
-    /* IMB 3.1 << */
-    int r_off;
-    int i;
-    int root = c_info->pair1;
-    ierr = 0;
-     
-    if (c_info->rank < 0)
-    {
-        *time = res_time;
-        return;
-    }    
-    
-    MPI_Type_size(c_info->red_data_type,&s_size);
-    s_num=size/s_size;
-    r_size=s_size;
-    r_num=s_num;
-    r_off=iterations->r_offs/r_size;
-
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-    if (c_info->rank == c_info->pair0)
-    {
-        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Accumulate(
-                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                        s_num, c_info->red_data_type, root, 
-                        i%iterations->r_cache_iter*r_off, r_num, 
-                        c_info->red_data_type, c_info->op_type, c_info->WIN );
-                MPI_ERRHAND(ierr);
-            }
-            ierr = MPI_Win_flush(root, c_info->WIN);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Accumulate(
-                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                        s_num, c_info->red_data_type, root, 
-                        i%iterations->r_cache_iter*r_off, r_num, 
-                        c_info->red_data_type, c_info->op_type, c_info->WIN );
-                MPI_ERRHAND(ierr);
-
-                ierr = MPI_Win_flush(root, c_info->WIN);
-                MPI_ERRHAND(ierr);
-            }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }
-        MPI_Win_unlock(root, c_info->WIN);
-    }
-    MPI_Barrier(c_info->communicator);
-
-    *time = res_time; 
-    return;
-}    
-
-
-void IMB_rma_get_accumulate (struct comm_info* c_info, int size,  
-                             struct iter_schedule* iterations,
-                             MODES run_mode, double* time)
-{
-    double res_time = -1.;
-    Type_Size s_size,r_size;
-    int s_num, r_num;
-    int r_off;
-    int i;
-    int root = c_info->pair1;
-    ierr = 0;
-     
-    if (c_info->rank < 0)
-    {
-        *time = res_time;
-        return;
-    }    
-    
-    MPI_Type_size(c_info->red_data_type,&s_size);
-    s_num=size/s_size;
-    r_size=s_size;
-    r_num=s_num;
-    r_off=iterations->r_offs/r_size;
-
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-    if (c_info->rank == c_info->pair0)
-    {
-        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Get_accumulate(
-                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                        s_num, c_info->red_data_type, 
-                        (char*)c_info->r_buffer+i%iterations->r_cache_iter*iterations->r_offs,
-                        r_num, c_info->red_data_type, 
-                        root, i%iterations->r_cache_iter*r_off, r_num, 
-                        c_info->red_data_type, c_info->op_type, c_info->WIN );
-                MPI_ERRHAND(ierr);
-            }
-            ierr = MPI_Win_flush(root, c_info->WIN);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Get_accumulate(
-                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                        s_num, c_info->red_data_type, 
-                        (char*)c_info->r_buffer+i%iterations->r_cache_iter*iterations->r_offs,
-                        r_num, c_info->red_data_type, 
-                        root, i%iterations->r_cache_iter*r_off, r_num, 
-                        c_info->red_data_type, c_info->op_type, c_info->WIN );
-                MPI_ERRHAND(ierr);
-
-                ierr = MPI_Win_flush(root, c_info->WIN);
-                MPI_ERRHAND(ierr);
-            }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }
-        MPI_Win_unlock(root, c_info->WIN);
-    }
-    MPI_Barrier(c_info->communicator);
-
-    *time = res_time; 
-    return;
-}    
-
-void IMB_rma_fetch_and_op (struct comm_info* c_info, int size,  
-                           struct iter_schedule* iterations,
-                           MODES run_mode, double* time)
-{
-    double res_time = -1.;
-    Type_Size r_size;
-    int r_off;
-    int i;
-    int root = c_info->pair1;
-    ierr = 0;
-     
-    if (c_info->rank < 0)
-    {
-        *time = res_time;
-        return;
-    }    
-    
-    MPI_Type_size(c_info->red_data_type,&r_size);
-    r_off=iterations->r_offs/r_size;
-
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-    if (c_info->rank == c_info->pair0)
-    {
-        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Fetch_and_op(
-                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                        (char*)c_info->r_buffer+i%iterations->r_cache_iter*iterations->r_offs,
-                        c_info->red_data_type, root, 
-                        i%iterations->r_cache_iter*r_off, c_info->op_type, c_info->WIN );
-                MPI_ERRHAND(ierr);
-            }
-            ierr = MPI_Win_flush(root, c_info->WIN);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Fetch_and_op(
-                        (char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                        (char*)c_info->r_buffer+i%iterations->r_cache_iter*iterations->r_offs,
-                        c_info->red_data_type, root, 
-                        i%iterations->r_cache_iter*r_off, c_info->op_type, c_info->WIN );
-                MPI_ERRHAND(ierr);
-
-                ierr = MPI_Win_flush(root, c_info->WIN);
-                MPI_ERRHAND(ierr);
-            }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }
-        MPI_Win_unlock(root, c_info->WIN);
-    }
-    MPI_Barrier(c_info->communicator);
-
-    *time = res_time; 
-    return;
-}
-
-void IMB_rma_compare_and_swap (struct comm_info* c_info, int size,  
-                               struct iter_schedule* iterations,
-                               MODES run_mode, double* time)
-{
-    double res_time = -1.;
-    int root = c_info->pair1;
-    int s_size;
-    int i;
-    void *comp_b, *orig_b, *res_b; 
-    MPI_Datatype data_type = MPI_INT;
-    ierr = 0;
-          
-    if (c_info->rank < 0)
-    {
-        *time = res_time;
-        return;
-    }    
-    
-    MPI_Type_size(data_type,&s_size);
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-
-    if (c_info->rank == c_info->pair0)
-    {
-        /* use r_buffer for all buffers required by compare_and_swap, because 
-         * on all ranks r_buffer is zero-initialized in IMB_set_buf function */
-        orig_b = (char*)c_info->r_buffer + s_size*2;
-        comp_b = (char*)c_info->r_buffer + s_size;
-        res_b  = c_info->r_buffer;
- 
-        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Compare_and_swap(
-                        (char*)orig_b + i%iterations->r_cache_iter*iterations->r_offs,
-                        (char*)comp_b + i%iterations->r_cache_iter*iterations->r_offs,
-                        (char*)res_b  + i%iterations->r_cache_iter*iterations->r_offs,
-                        data_type, root, i%iterations->r_cache_iter*iterations->r_offs, 
-                        c_info->WIN );
-                MPI_ERRHAND(ierr);
-            }
-            ierr = MPI_Win_flush(root, c_info->WIN);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
-            res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Compare_and_swap(
-                        (char*)orig_b + i%iterations->s_cache_iter*iterations->s_offs,
-                        (char*)comp_b + i%iterations->s_cache_iter*iterations->s_offs,
-                        (char*)res_b  + i%iterations->r_cache_iter*iterations->r_offs,
-                        data_type, root, i%iterations->r_cache_iter*iterations->r_offs,
-                        c_info->WIN );
-                MPI_ERRHAND(ierr);
-
-                ierr = MPI_Win_flush(root, c_info->WIN);
-                MPI_ERRHAND(ierr);
-            }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }
-        MPI_Win_unlock(root, c_info->WIN);
-    }
-    MPI_Barrier(c_info->communicator);
-
-    *time = res_time; 
-    return;
-}
-
diff --git a/src/IMB_write.c b/src/IMB_write.c
deleted file mode 100644
index 86a9dba3..00000000
--- a/src/IMB_write.c
+++ /dev/null
@@ -1,647 +0,0 @@
-/*****************************************************************************
- *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
- *                                                                           *
- *****************************************************************************
-
-This code is covered by the Community Source License (CPL), version
-1.0 as published by IBM and reproduced in the file "license.txt" in the
-"license" subdirectory. Redistribution in source and binary form, with
-or without modification, is permitted ONLY within the regulations
-contained in above mentioned license.
-
-Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
-within the regulations of the "License for Use of "Intel(R) MPI
-Benchmarks" Name and Trademark" as reproduced in the file
-"use-of-trademark-license.txt" in the "license" subdirectory. 
-
-THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-solely responsible for determining the appropriateness of using and
-distributing the Program and assumes all risks associated with its
-exercise of rights under this Agreement, including but not limited to
-the risks and costs of program errors, compliance with applicable
-laws, damage to or loss of data, programs or equipment, and
-unavailability or interruption of operations.
-
-EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
-ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
-WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
-DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
-YOUR JURISDICTION. It is licensee's responsibility to comply with any
-export regulations applicable in licensee's jurisdiction. Under
-CURRENT U.S. export regulations this software is eligible for export
-from the U.S. and can be downloaded by or otherwise exported or
-reexported worldwide EXCEPT to U.S. embargoed destinations which
-include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
-Afghanistan and any other country to which the U.S. has embargoed
-goods and services.
-
- ***************************************************************************
-
-For more documentation than found here, see
-
-[1] doc/ReadMe_IMB.txt 
-
-[2] Intel (R) MPI Benchmarks
-    Users Guide and Methodology Description
-    In 
-    doc/IMB_Users_Guide.pdf
-
- File: IMB_write.c 
-
- Implemented functions: 
-
- IMB_write_shared;
- IMB_write_indv;
- IMB_write_expl;
- IMB_write_ij;
- IMB_iwrite_ij;
-
- ***************************************************************************/
-
-
-
-
-#include "IMB_declare.h"
-#include "IMB_benchmark.h"
-
-#include "IMB_prototypes.h"
-
-
-/*************************************************************************/
-
-
-/*************************************************************************/
-
-/* ===================================================================== */
-/* 
-IMB 3.1 changes
-July 2007
-Hans-Joachim Plum, Intel GmbH
-
-- replace "int n_sample" by iteration scheduling object "ITERATIONS"
-  (see => IMB_benchmark.h)
-
-*/
-/* ===================================================================== */
-
-void IMB_write_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                      MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-IO benchmark kernel
-                      Driver for write benchmarks with shared file pointers
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-if( c_info->File_rank>=0 )
-{
-if( RUN_MODE->AGGREGATE )
-IMB_write_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
-else
-IMB_write_ij(c_info, size, shared, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, time);
-
-if( RUN_MODE->NONBLOCKING )
-{
-MPI_File_close(&c_info->fh);
-IMB_open_file(c_info);
-
-if( RUN_MODE->AGGREGATE )
-IMB_iwrite_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time+1);
-else
-IMB_iwrite_ij(c_info, size, shared, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, 1, time+1);
-}
-}
-}
-
-/*************************************************************************/
-
-
-
-void IMB_write_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                    MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-IO benchmark kernel
-                      Driver for write benchmarks with individual file pointers
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-if( c_info->File_rank>=0 )
-{
-if( RUN_MODE->AGGREGATE )
-IMB_write_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
-else
-IMB_write_ij(c_info, size, indv_block, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, time);
-
-if( RUN_MODE->NONBLOCKING )
-{
-MPI_File_close(&c_info->fh);
-IMB_open_file(c_info);
-
-if( RUN_MODE->AGGREGATE )
-IMB_iwrite_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time+1);
-else
-IMB_iwrite_ij(c_info, size, indv_block, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, 1, time+1);
-}
-}
-}
-
-/*************************************************************************/
-
-
-
-void IMB_write_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                    MODES RUN_MODE, double* time)
-/*
-
-                      
-                      MPI-IO benchmark kernel
-                      Driver for write benchmarks with explicit offsets
-                      
-
-
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
-
-*/
-{
-if( c_info->File_rank>=0 )
-{
-if( RUN_MODE->AGGREGATE )
-IMB_write_ij(c_info, size, explicit, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
-else
-IMB_write_ij(c_info, size, explicit, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, time);
-
-if( RUN_MODE->NONBLOCKING )
-{
-MPI_File_close(&c_info->fh);
-IMB_open_file(c_info);
-
-if( RUN_MODE->AGGREGATE )
-IMB_iwrite_ij(c_info, size, explicit, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time+1);
-else
-IMB_iwrite_ij(c_info, size, explicit, RUN_MODE->type,  ITERATIONS->n_sample, 1, 0, 1, time+1);
-}
-}
-}
-
-
-void IMB_write_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                  BTYPES type, int i_sample, int j_sample, 
-                  int time_inner, double* time)
-/*
-
-                      
-                      Calls the proper write functions, blocking case
-                      
-                      (See IMB_read_ij for documentation of calling sequence)
-                      
-
-
-*/
-{
-int i, j;
-int Locsize,Totalsize,Ioffs;
-MPI_Status stat;
-MPI_Offset Offset;
-
-ierr = 0;
-
-*time=0.;
-if( c_info->File_rank >= 0 )
-{
-
-int (* GEN_File_write)(MPI_File fh, void* buf, int count, 
-                       MPI_Datatype datatype, MPI_Status *status);
-int (* GEN_File_write_shared)
-                      (MPI_File fh, void* buf, int count, 
-                       MPI_Datatype datatype, MPI_Status *status);
-int (* GEN_File_write_at) 
-                      (MPI_File fh, MPI_Offset offset, void* buf, int count, 
-                       MPI_Datatype datatype, MPI_Status *status);
-
-#ifdef CHECK
-int chk_mode;
-
-if( pos == shared && type != Collective) chk_mode = -3;
-else                                     chk_mode = -2;
-
-defect = 0.;
-#endif
-
-if (type == Collective )
-  {
-    GEN_File_write = MPI_File_write_all;
-    GEN_File_write_shared = MPI_File_write_ordered;
-    GEN_File_write_at = MPI_File_write_at_all;
-#ifdef DEBUG
-fprintf(unit,"Collective output\n");
-#endif
-  }
-else
-  {
-    GEN_File_write = MPI_File_write;
-    GEN_File_write_shared = MPI_File_write_shared;
-    GEN_File_write_at = MPI_File_write_at;
-#ifdef DEBUG
-fprintf(unit,"Non collective output\n");
-#endif
-  }
-
-Locsize = c_info->split.Locsize;
-Totalsize = c_info->split.Totalsize;
-Offset = (MPI_Offset)c_info->split.Offset;
-
-/*
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-BLOCKING COLLECTIVE/NON COLLECTIVE OUTPUT CASES COMBINED
-(function pointer GEN_File_write_XXX
-either standard or collective MPI_File_write_XXX
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-*/
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->File_comm);
-
-if( !time_inner ) *time = MPI_Wtime();
-
-for( i=0; i<i_sample; i++ )
-{
-
-
-if( time_inner ) *time = MPI_Wtime();
-
-if( pos == indv_block )
-for ( j=0; j<j_sample; j++ )
-{
-
-ierr=GEN_File_write(c_info->fh,c_info->s_buffer,Locsize,c_info->etype,&stat);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("Write standard ",c_info,c_info->s_buffer,Locsize,Totalsize,i+j,pos);
-
-}
-
-else if ( pos == explicit )
-for ( j=0; j<j_sample; j++ )
-{
-
-Offset = c_info->split.Offset+(MPI_Offset)((i+j)*Totalsize);
-
-ierr=GEN_File_write_at
-     (c_info->fh, Offset, c_info->s_buffer,Locsize,c_info->etype,&stat);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("Write explicit ",c_info,c_info->s_buffer,Locsize,Totalsize,i+j,pos);
-
-}
-
-else if ( pos == shared )
-for ( j=0; j<j_sample; j++ )
-{
-
-ierr=GEN_File_write_shared
-     (c_info->fh,c_info->s_buffer,Locsize,c_info->etype,&stat);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("Write shared ",c_info,c_info->s_buffer,Locsize,Totalsize,i+j,pos);
-
-}
-
-// IMB_3.1 fix: use the following triple operation to assure write completion
-MPI_File_sync(c_info->fh);
-MPI_Barrier(c_info->File_comm);
-MPI_File_sync(c_info->fh);
-
-if( time_inner ) *time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-CHK_DIFF("Write_xxx",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize, 
-         put, pos, j_sample, time_inner ? -1 : i,
-         chk_mode, &defect);
-CHK_STOP;
-
-}
-
-if( !time_inner ) *time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-}  /* end if (File_comm ) */
-}
-
-
-
-void IMB_iwrite_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                   BTYPES type, int i_sample, int j_sample, 
-                   int time_inner, int do_ovrlp, double* time)
-/*
-
-                      
-                      Calls the proper write functions, non blocking case
-                      
-                      (See IMB_read_ij for documentation of calling sequence)
-                      
-
-
-*/
-{
-int i, j;
-int Locsize,Totalsize,Ioffs;
-MPI_Offset Offset;
-
-MPI_Status*  STAT, stat;
-MPI_Request* REQUESTS;
-
-ierr = 0;
-
-*time=0;
-
-if( c_info->File_rank >= 0 )
-{
-#ifdef CHECK
-int chk_mode;
-
-if( pos == shared && type != Collective) chk_mode = -3;
-else                                     chk_mode = -2;
-
-defect = 0.;
-#endif
-
-
-Locsize = c_info->split.Locsize;
-Totalsize = c_info->split.Totalsize;
-Offset = (MPI_Offset)c_info->split.Offset;
-
-
-if(type == Collective )
-
-/*
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-NON BLOCKING COLLECTIVE OUTPUT CASES
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-*/
-
-{
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->File_comm);
-
-*time = MPI_Wtime();
-
-if( pos == indv_block )
-
-for ( j=0; j<i_sample*j_sample; j++ )
-{
-
-ierr=MPI_File_write_all_begin
-  (c_info->fh,c_info->s_buffer,Locsize,c_info->etype);
-MPI_ERRHAND(ierr);
-DIAGNOSTICS("IWrite coll. ",c_info,c_info->s_buffer,Locsize,Totalsize,j,pos);
-
-if( do_ovrlp )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-ierr=MPI_File_write_all_end
-  (c_info->fh,c_info->s_buffer,&stat);
-MPI_ERRHAND(ierr);
-
-}
-
-else if ( pos == explicit )
-
-for ( j=0; j<i_sample*j_sample; j++ )
-{
-
-Offset = c_info->split.Offset+(MPI_Offset)(j*Totalsize);
-
-ierr=MPI_File_write_at_all_begin
-  (c_info->fh,Offset,c_info->s_buffer,Locsize,c_info->etype);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IWrite expl coll. ",c_info,c_info->s_buffer,Locsize,Totalsize,j,pos);
-
-if( do_ovrlp )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-ierr=MPI_File_write_at_all_end
-  (c_info->fh,c_info->s_buffer,&stat);
-MPI_ERRHAND(ierr);
-
-
-}
-
-else if ( pos == shared )
-
-for ( j=0; j<i_sample*j_sample; j++ )
-{
-
-ierr=MPI_File_write_ordered_begin
-  (c_info->fh,c_info->s_buffer,Locsize,c_info->etype);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IWrite shared coll. ",c_info,c_info->s_buffer,Locsize,Totalsize,j,pos);
-
-if( do_ovrlp )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-ierr=MPI_File_write_ordered_end
-  (c_info->fh,c_info->s_buffer,&stat);
-MPI_ERRHAND(ierr);
-
-}
-
-// IMB_3.1 fix: use the following triple operation to assure write completion
-MPI_File_sync(c_info->fh);
-MPI_Barrier(c_info->File_comm);
-MPI_File_sync(c_info->fh);
-
-
-*time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-CHK_DIFF("Coll. IWrite_xxx",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize, 
-         put, pos, i_sample*j_sample, -1,
-         chk_mode, &defect);
-CHK_STOP;
-
-}
-
-else  /* type non Collective */
-
-/*
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-NON BLOCKING NON COLLECTIVE OUTPUT CASES
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-*/
-
-{
-
-REQUESTS = (MPI_Request*)IMB_v_alloc(j_sample*sizeof(MPI_Request), "IWrite_ij");
-STAT     = (MPI_Status *)IMB_v_alloc(j_sample*sizeof(MPI_Status ), "IWrite_ij");
-
-for( j=0; j<j_sample; j++ ) REQUESTS[j]=MPI_REQUEST_NULL;
-
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->File_comm);
-
-if( !time_inner ) *time = MPI_Wtime();
-
-for( i=0; i<i_sample; i++ )
-{
-
-
-if( time_inner ) *time = MPI_Wtime();
-
-if( pos == indv_block )
-for ( j=0; j<j_sample; j++ )
-{
-
-ierr=MPI_File_iwrite(c_info->fh,c_info->s_buffer,Locsize,c_info->etype,&REQUESTS[j]);
-MPI_ERRHAND(ierr);
-DIAGNOSTICS("IWrite standard ",c_info,c_info->s_buffer,Locsize,Totalsize,i+j,pos);
-
-}
-
-else if ( pos == explicit )
-for ( j=0; j<j_sample; j++ )
-{
-
-Offset = c_info->split.Offset+(MPI_Offset)((i+j)*Totalsize);
-
-ierr=MPI_File_iwrite_at
-  (c_info->fh,Offset,c_info->s_buffer,Locsize,c_info->etype,&REQUESTS[j]);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IWrite expl ",c_info,c_info->s_buffer,Locsize,Totalsize,i+j,pos);
-
-
-}
-
-else if ( pos == shared )
-for ( j=0; j<j_sample; j++ )
-{
-
-ierr=MPI_File_iwrite_shared
-  (c_info->fh,c_info->s_buffer,Locsize,c_info->etype,&REQUESTS[j]);
-MPI_ERRHAND(ierr);
-
-DIAGNOSTICS("IWrite shared ",c_info,c_info->s_buffer,Locsize,Totalsize,i+j,pos);
-
-}
-
-
-if( do_ovrlp )
-for ( j=0; j<j_sample; j++ )
-IMB_cpu_exploit(TARGET_CPU_SECS,0);
-
-if( j_sample == 1 )
-MPI_Wait(REQUESTS,STAT);
-else                 
-MPI_Waitall(j_sample,REQUESTS,STAT);
-
-
-// IMB_3.1 fix: use the following triple operation to assure write completion
-MPI_File_sync(c_info->fh);
-MPI_Barrier(c_info->File_comm);
-MPI_File_sync(c_info->fh);
-
-if( time_inner ) *time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-CHK_DIFF("IWrite_xxx",c_info, c_info->r_buffer, 0,
-         Locsize, Totalsize, asize, 
-         put, pos, j_sample, time_inner ? -1 : i,
-         chk_mode, &defect);
-CHK_STOP;
-
-}
-if( !time_inner ) *time = (MPI_Wtime() - *time)/(i_sample*j_sample);
-
-IMB_v_free ((void**)&REQUESTS);
-IMB_v_free ((void**)&STAT);    
-
-}
-
-
-}  /* end if (File_comm ) */
-}
diff --git a/src/Makefile b/src/Makefile
deleted file mode 100755
index da9f799d..00000000
--- a/src/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-feedback:
-	@echo " "
-	@echo "As from version 3.1 IMB does not have a default Makefile any more."
-	@echo "This Makefile can be used to"
-	@echo " "
-	@echo "nmake clean"
-	@echo " "
-	@echo "For installing, please use:"
-	@echo " "
-	@echo "nmake -f make_ict_win"
-	@echo " "
-	@echo "to install the Intel(R) Parallel Studio version."
-	@echo " "
-
-clean:
-	del /f /q *.obj *~ PI* core IMB-IO.exe IMB-EXT.exe IMB-MPI1.exe exe_io exe_ext exe_mpi1 IMB_out IMB_out_*
diff --git a/src/Makefile.base b/src/Makefile.base
deleted file mode 100755
index f84cdafb..00000000
--- a/src/Makefile.base
+++ /dev/null
@@ -1,164 +0,0 @@
-.SUFFIXES: 
-.SUFFIXES: .c .o
-
-BUILDDIR=build_$(CPP)
-
-TARGETS=MPI1 EXT MPIIO NBC RMA
-
-SHELL       = /bin/sh
-PROFLIB     =
-
-SRC1 = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c IMB_mem_manager.c IMB_parse_name_mpi1.c  IMB_benchlist.c IMB_strgs.c \
-IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c \
-IMB_pingpong.c IMB_pingping.c IMB_allreduce.c IMB_reduce_scatter.c IMB_reduce.c IMB_exchange.c IMB_bcast.c IMB_barrier.c IMB_allgather.c IMB_allgatherv.c IMB_gather.c IMB_gatherv.c IMB_scatter.c IMB_scatterv.c IMB_alltoall.c IMB_alltoallv.c IMB_sendrecv.c IMB_init_transfer.c IMB_chk_diff.c IMB_cpu_exploit.c IMB_bandwidth.c
-
-OBJ1=$(SRC1:%.c=$(BUILDDIR)/%.o)
-
-SRCEXT = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c  IMB_mem_manager.c IMB_benchlist.c IMB_parse_name_ext.c IMB_strgs.c \
-IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c \
-IMB_window.c IMB_ones_unidir.c IMB_ones_bidir.c IMB_ones_accu.c IMB_init_transfer.c IMB_user_set_info.c IMB_chk_diff.c 
-
-OBJEXT=$(SRCEXT:%.c=$(BUILDDIR)/%.o)
-
-SRCIO=IMB.c IMB_utils.c IMB_declare.c  IMB_init.c  IMB_mem_manager.c IMB_init_file.c IMB_user_set_info.c\
-IMB_benchlist.c IMB_parse_name_io.c IMB_strgs.c \
-IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c IMB_cpu_exploit.c\
-IMB_open_close.c IMB_write.c IMB_read.c IMB_init_transfer.c IMB_chk_diff.c  
-
-OBJIO=$(SRCIO:%.c=$(BUILDDIR)/%.o)
-
-SRCNBC = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c IMB_mem_manager.c IMB_parse_name_nbc.c  IMB_benchlist.c IMB_strgs.c \
-IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c \
-IMB_allreduce.c IMB_reduce_scatter.c IMB_reduce.c IMB_bcast.c IMB_barrier.c IMB_allgather.c IMB_allgatherv.c IMB_gather.c IMB_gatherv.c IMB_scatter.c IMB_scatterv.c IMB_alltoall.c IMB_alltoallv.c IMB_sendrecv.c IMB_init_transfer.c IMB_chk_diff.c IMB_cpu_exploit.c
-
-OBJNBC=$(SRCNBC:%.c=$(BUILDDIR)/%.o)
-
-SRCRMA = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c  IMB_mem_manager.c IMB_benchlist.c IMB_parse_name_rma.c IMB_strgs.c \
-IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c  IMB_init_transfer.c IMB_user_set_info.c IMB_chk_diff.c \
-IMB_rma_put.c IMB_cpu_exploit.c IMB_rma_get.c IMB_rma_atomic.c
-
-OBJRMA=$(SRCRMA:%.c=$(BUILDDIR)/%.o)
-
-
-default: all
-all: IMB-IO  IMB-EXT IMB-MPI1 IMB-NBC IMB-RMA
-
-IMB-MPI1: $(SRC1) IMB_declare.h exe_mpi1
-	$(MAKE) -f Makefile.base MPI1 CPP=MPI1
-IMB-EXT:$(SRCEXT) IMB_declare.h  exe_ext
-	$(MAKE) -f Makefile.base EXT CPP=EXT
-IMB-IO: $(SRCIO) IMB_declare.h exe_io
-	$(MAKE) -f Makefile.base IO CPP=MPIIO
-IMB-NBC: $(SRCNBC) IMB_declare.h exe_nbc
-	$(MAKE) -f Makefile.base NBC CPP=NBC
-IMB-RMA: $(SRCRMA) IMB_declare.h exe_rma
-	$(MAKE) -f Makefile.base RMA CPP=RMA
-
-exe_io:
-	touch $@ *.c; rm -rf exe_ext exe_mpi1 exe_nbc exe_rma
-exe_ext:
-	touch $@ *.c; rm -rf exe_io exe_mpi1  exe_nbc exe_rma
-exe_mpi1:
-	touch $@ *.c; rm -rf exe_io exe_ext  exe_nbc exe_rma
-exe_nbc:
-	touch $@ *.c; rm -rf exe_io exe_ext exe_mpi1 exe_rma
-exe_rma:
-	touch $@ *.c; rm -rf exe_io exe_ext exe_mpi1 exe_nbc
-
-# Make sure that we remove executables for specific architectures
-clean:
-	/bin/rm -f *.o *~ PI* core IMB-IO IMB-EXT IMB-MPI1 IMB-NBC IMB-RMA IMB-IO.mic IMB-EXT.mic IMB-MPI1.mic IMB-NBC.mic IMB-RMA.mic exe_io exe_ext exe_mpi1 exe_nbc exe_rma MPIIO MPI1 EXT NBC RMA
-
-.c.o:
-	$(CC) $(MPI_INCLUDE) $(CPPFLAGS) -D$(CPP) $(OPTFLAGS) -c $*.c
-
-ifneq ($(filter $(CPP),$(TARGETS)),)
-MPI1: $(BUILDDIR) $(OBJ1) 
-	$(CLINKER) $(LDFLAGS) -o $(IMB-MPI1) $(OBJ1)  $(LIB_PATH) $(LIBS)
-
-EXT : $(BUILDDIR) $(OBJEXT) 
-	$(CLINKER) $(LDFLAGS) -o $(IMB-EXT) $(OBJEXT)  $(LIB_PATH) $(LIBS)
-
-IO: $(BUILDDIR) $(OBJIO) 
-	$(CLINKER) $(LDFLAGS) -o $(IMB-IO) $(OBJIO)  $(LIB_PATH) $(LIBS)
-
-NBC: $(BUILDDIR) $(OBJNBC) 
-	$(CLINKER) $(LDFLAGS) -o $(IMB-NBC) $(OBJNBC)  $(LIB_PATH) $(LIBS)
-
-RMA: $(BUILDDIR) $(OBJRMA) 
-	$(CLINKER) $(LDFLAGS) -o $(IMB-RMA) $(OBJRMA)  $(LIB_PATH) $(LIBS)
-
-$(BUILDDIR)/%.o: %.c
-	$(CC) $(MPI_INCLUDE) $(CPPFLAGS) -D$(CPP) $(OPTFLAGS) -c $*.c -o $(BUILDDIR)/$*.o
-
-$(BUILDDIR):
-	-mkdir $@
-else
-MPI1: $(SRC1) IMB_declare.h exe_mpi1
-	$(MAKE) -f Makefile.base MPI1 CPP=MPI1
-EXT:$(SRCEXT) IMB_declare.h  exe_ext
-	$(MAKE) -f Makefile.base EXT CPP=EXT
-IO: $(SRCIO) IMB_declare.h exe_io
-	$(MAKE) -f Makefile.base IO CPP=MPIIO
-NBC: $(SRCNBC) IMB_declare.h exe_nbc
-	$(MAKE) -f Makefile.base NBC CPP=NBC
-RMA: $(SRCRMA) IMB_declare.h exe_rma
-	$(MAKE) -f Makefile.base RMA CPP=RMA
-endif
-
-# DEPENDENCIES
-IMB_declare.h:	IMB_settings.h IMB_comm_info.h IMB_settings_io.h IMB_bnames_mpi1.h \
-                IMB_bnames_ext.h IMB_bnames_io.h IMB_err_check.h IMB_appl_errors.h IMB_mem_info.h
-	touch IMB_declare.h
-
-IMB.o:              IMB_declare.h IMB_benchmark.h
-IMB_init.o:	    IMB_declare.h IMB_benchmark.h
-IMB_mem_manager.o:	    IMB_declare.h IMB_benchmark.h
-IMB_benchlist.o:        IMB_declare.h IMB_benchmark.h 
-IMB_output.o:	    IMB_declare.h IMB_benchmark.h
-IMB_warm_up.o:          IMB_declare.h IMB_benchmark.h
-IMB_chk_diff.o:         IMB_declare.h IMB_chk_diff.c
-IMB_declare.o:	    IMB_declare.h
-IMB_g_info.o: 	    IMB_declare.h IMB_benchmark.h
-IMB_err_handler.o:	    IMB_declare.h IMB_appl_errors.h
-IMB_init_transfer.o:    IMB_declare.h IMB_benchmark.h IMB_comments.h
-
-# IMB-MPI1:
-IMB_parse_name_mpi1.o:  IMB_declare.h IMB_benchmark.h IMB_comments.h
-IMB_pingping.o:	    IMB_declare.h
-IMB_pingpong.o:	    IMB_declare.h
-IMB_sendrecv.o:	    IMB_declare.h
-IMB_exchange.o:	    IMB_declare.h
-IMB_reduce.o:	    IMB_declare.h
-IMB_reduce_scatter.o:   IMB_declare.h
-IMB_allreduce.o:	    IMB_declare.h
-IMB_bcast.o:	    IMB_declare.h
-IMB_allgather.o:        IMB_declare.h
-IMB_allgatherv.o:       IMB_declare.h
-IMB_alltoall.o:        IMB_declare.h
-IMB_alltoallv.o:       IMB_declare.h
-IMB_barrier.o:	    IMB_declare.h
-IMB_bandwidth.o:    IMB_declare.h
-
-# IMB-NBC:
-IMB_parse_name_nbc.o:  IMB_declare.h IMB_benchmark.h IMB_comments.h
-
-# IMB-EXT:
-IMB_parse_name_ext.o:   IMB_declare.h IMB_benchmark.h IMB_comments.h
-IMB_window.o:           IMB_declare.h
-IMB_ones_unidir.o:      IMB_declare.h
-IMB_ones_bidir.o:       IMB_declare.h
-IMB_ones_accu.o:        IMB_declare.h
-
-# IMB-IO:
-IMB_parse_name_io.o:     IMB_declare.h IMB_benchmark.h IMB_comments.h
-IMB_init_file.o:         IMB_declare.h IMB_benchmark.h IMB_comments.h
-IMB_open_close.o:        IMB_declare.h IMB_benchmark.h 
-IMB_write.o:             IMB_declare.h IMB_benchmark.h 
-IMB_read.o:              IMB_declare.h IMB_benchmark.h
-
-#IMB-RMA:
-IMB_parse_name_rma.o:     IMB_declare.h IMB_benchmark.h IMB_comments.h
-IMB_rma_put.o:            IMB_declare.h IMB_benchmark.h 
-IMB_rma_get.o:            IMB_declare.h IMB_benchmark.h 
-IMB_rma_atomic.o:         IMB_declare.h IMB_benchmark.h 
diff --git a/src/make_ict b/src/make_ict
deleted file mode 100755
index dadab84e..00000000
--- a/src/make_ict
+++ /dev/null
@@ -1,19 +0,0 @@
-LIB_PATH    =
-LIBS        =
-CC          = mpiicc 
-ifeq (,$(shell which ${CC}))
-$(error ${CC} is not defined through the PATH environment variable setting. Please try sourcing an Intel(R) Cluster Tools script file such as "mpivars.[c]sh" or "ictvars.[c]sh")
-endif
-OPTFLAGS    = 
-CLINKER     = ${CC} 
-LDFLAGS     =  
-CPPFLAGS    =
-
-IMB-MPI1 = IMB-MPI1
-IMB-IO   = IMB-IO
-IMB-EXT  = IMB-EXT
-IMB-NBC  = IMB-NBC
-IMB-RMA  = IMB-RMA
-
-export CC LIB_PATH LIBS OPTFLAGS CLINKER LDFLAGS CPPFLAGS IMB-MPI1 IMB-IO IMB-EXT IMB-NBC IMB-RMA
-include Makefile.base
diff --git a/src/make_ict_mic b/src/make_ict_mic
deleted file mode 100755
index 3eb85fed..00000000
--- a/src/make_ict_mic
+++ /dev/null
@@ -1,19 +0,0 @@
-LIB_PATH    =
-LIBS        =
-CC          = mpiicc
-ifeq (,$(shell which ${CC}))
-$(error ${CC} is not defined through the PATH environment variable setting. Please try sourcing an Intel(R) Cluster Tools script file such as "mpivars.[c]sh" or "ictvars.[c]sh")
-endif
-OPTFLAGS    = 
-CLINKER     = ${CC} 
-LDFLAGS     = -mmic
-CPPFLAGS    = -mmic
-
-IMB-MPI1 = IMB-MPI1.mic
-IMB-IO   = IMB-IO.mic
-IMB-EXT  = IMB-EXT.mic
-IMB-NBC  = IMB-NBC.mic
-IMB-RMA  = IMB-RMA.mic
-
-export CC LIB_PATH LIBS OPTFLAGS CLINKER LDFLAGS CPPFLAGS IMB-MPI1 IMB-IO IMB-EXT IMB-NBC IMB-RMA
-include Makefile.base
diff --git a/src/make_mpich b/src/make_mpich
deleted file mode 100755
index 035b24e1..00000000
--- a/src/make_mpich
+++ /dev/null
@@ -1,27 +0,0 @@
-# Enter root directory of mpich install
-MPI_HOME=
-
-MPICC=$(shell find ${MPI_HOME} -name mpicc -print)
-
-NULL_STRING :=
-ifneq (,$(findstring /bin/mpicc,${MPICC}))
-MPI_INCLUDE := -I${MPI_HOME}/include
-else
-$(error Variable MPI_HOME="${MPI_HOME}" does not seem to contain a valid mpicc)
-endif
-LIB_PATH    =
-LIBS        = 
-CC          = ${MPI_HOME}/bin/mpicc 
-OPTFLAGS    = -O3
-CLINKER     = ${CC}
-LDFLAGS     =
-CPPFLAGS    = 
-
-IMB-MPI1 = IMB-MPI1
-IMB-EXT  = IMB-EXT
-IMB-IO   = IMB-IO
-IMB-NBC  = IMB-NBC
-IMB-RMA  = IMB-RMA
-
-export MPI_INCLUDE CC LIB_PATH LIBS OPTFLAGS CLINKER LDFLAGS CPPFLAGS IMB-MPI1 IMB-EXT IMB-IO IMB-NBC IMB-RMA
-include Makefile.base
diff --git a/src_c/IMB.c b/src_c/IMB.c
new file mode 100644
index 00000000..21ded360
--- /dev/null
+++ b/src_c/IMB.c
@@ -0,0 +1,412 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB.c 
+
+ Implemented functions: 
+
+ main;
+
+ ***************************************************************************/
+
+#include "mpi.h"
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+extern int num_alloc, num_free;
+
+
+/**********************************************************************/
+
+
+
+int main(int argc, char **argv) {
+/*
+
+
+
+Input variables:
+
+-argc                 (type int)
+                      Number of command line arguments
+
+
+-argv                 (type char **)
+                      List of command line arguments
+
+
+
+Return value          (type int)
+                      0 always
+
+
+
+*/
+    struct comm_info C_INFO;   /* BASIC SETTINGS STRUCTURE    */
+    struct Bench* BList;       /* List of Benchmarks          */
+
+    struct Bench* Bmark;
+
+#ifdef CHECK
+    int NFAIL, NSUCCESS;
+#endif
+
+    char   *p;
+    int    i, j, iter, imod, ierr;
+    int    NP, NP_min;
+    int    do_it;
+
+    int    header;
+    int    size;
+    int    MAXMSG;
+    int    x_sample, n_sample;
+    int    ci_np; /* number of procs adjusted in case of ParallelTransferMsgRate benchmark */
+    /* IMB 3.1 << */
+    struct iter_schedule ITERATIONS;
+    int    mem_ok;
+    /* >> IMB 3.1  */
+    MODES  BMODE;
+    double time[MAX_TIME_ID];
+
+    Type_Size unit_size;
+
+#ifdef USE_MPI_INIT_THREAD
+
+    IMB_chk_arg_level_of_threading(&argv, &argc);
+
+    if ((ierr = MPI_Init_thread(&argc, &argv, mpi_thread_desired, &mpi_thread_environment)) != MPI_SUCCESS)
+        IMB_err_hand(1, ierr);
+    if (mpi_thread_environment != mpi_thread_desired) {
+        fprintf(unit, "ERROR: MPI_Init_thread was not able to set up desired threading level\n");
+        IMB_err_hand(1, ierr);
+    }
+#else
+    if ((ierr = MPI_Init(&argc, &argv)) != MPI_SUCCESS)
+        IMB_err_hand(1, ierr);
+#endif /*USE_MPI_INIT_THREAD*/
+
+    IMB_set_default(&C_INFO);
+
+    IMB_init_pointers(&C_INFO);
+
+    /* IMB 3.1 << */
+    if (IMB_basic_input(&C_INFO, &BList, &ITERATIONS, &argc, &argv, &NP_min) < 0) {
+    /* >> IMB 3.1  */
+        /* IMB_3.0: help mode */
+        if(C_INFO.w_rank == 0)
+            IMB_help();
+
+        MPI_Barrier(MPI_COMM_WORLD);
+        IMB_free_all(&C_INFO, &BList, &ITERATIONS);
+        MPI_Finalize();
+        return 0;
+    }
+
+    /* IMB 3.1 << */
+    IMB_show_selections(&C_INFO, BList, &argc, &argv);
+    /* >> IMB 3.1  */
+
+    /* LOOP OVER INDIVIDUAL BENCHMARKS */
+    j = 0;
+    while (p = BList[j].name) {
+        Bmark = BList+j;
+
+        ci_np = C_INFO.w_num_procs;
+        if (Bmark->RUN_MODES[0].type == ParallelTransferMsgRate) {
+            ci_np -= ci_np % 2;
+            NP_min += NP_min % 2;
+        }
+        if (Bmark->RUN_MODES[0].type != BTYPE_INVALID) {
+            NP = max(1, min(ci_np, NP_min));
+
+            if (Bmark->RUN_MODES[0].type == SingleTransfer || Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+#ifndef MPIIO
+                NP = min(2, ci_np);
+#else
+                NP = 1;
+#endif
+            }
+
+            do_it = 1;
+/* LOOP OVER PROCESS NUMBERS */
+            while (do_it) {
+                if (IMB_valid(&C_INFO, Bmark, NP)) {
+                    if (IMB_init_communicator(&C_INFO, NP) != 0)
+                        IMB_err_hand(0, -1);
+#ifdef MPIIO
+                    if (IMB_init_file(&C_INFO, Bmark, &ITERATIONS, NP) !=0)
+                        IMB_err_hand(0, -1);
+#endif
+
+#ifdef RMA
+                    /* Now when communicator/s is created, update scaling of reported timing values.
+                     * Some of RMA benchmarks accesses memory of all other processes, so their
+                     * bandwidth results should be update accordingly */
+                    IMB_adjust_timings_scale(&C_INFO, Bmark);
+#endif
+                    /* MINIMAL OUTPUT IF UNIT IS GIVEN */
+                    if (C_INFO.w_rank == 0)
+                        if (unit != stdout)
+                            printf("# Running %s; see file \"%s\" for results\n", p, OUTPUT_FILENAME);
+                    header = 1;
+
+#ifdef EXT
+                    MPI_Type_size(C_INFO.red_data_type, &unit_size);
+#else
+                    if (Bmark->reduction || Bmark->RUN_MODES[0].type == SingleElementTransfer)
+                        MPI_Type_size(C_INFO.red_data_type, &unit_size);
+                    else
+                        MPI_Type_size(C_INFO.s_data_type, &unit_size);
+#endif
+
+                    MAXMSG = (1 << C_INFO.max_msg_log) / unit_size * unit_size;
+
+                    for (imod = 0; imod < Bmark->N_Modes; imod++) {
+                        double sample_time  = MPI_Wtime();
+                        int    time_limit[] = {0, 0};
+
+                        BMODE = &Bmark->RUN_MODES[imod];
+
+                        /* IMB 3.1 << */
+                        // x_sample calc => IMB_init_buffers_iter
+                        /* >> IMB 3.1  */
+
+                        header = header | (imod > 0);
+
+                        iter = 0;
+                        size = 0;
+
+                        Bmark->sample_failure = 0;
+                        /* LOOP OVER MESSAGE LENGTHS */
+                        while ( ((C_INFO.n_lens == 0 && size < MAXMSG ) || (C_INFO.n_lens > 0  && iter < C_INFO.n_lens)) &&
+                                (Bmark->sample_failure != SAMPLE_FAILED_TIME_OUT) ) {
+                            if (Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+                                /* just one size needs to be tested (the size of one element) */
+                                MPI_Type_size(C_INFO.red_data_type, &size);
+                            } else {
+                                if (C_INFO.n_lens > 0)
+                                    size = C_INFO.msglen[iter];
+                                else {
+                                    if (iter == 0)
+                                        size = 0;
+                                    else {
+                                        if (iter == 1) {
+                                            size = ((1 << C_INFO.min_msg_log) + unit_size - 1) / unit_size * unit_size;
+#ifdef EXT
+                                            size = min(size, asize);
+#endif
+                                        } else
+                                            size = min(MAXMSG, size + size);
+                                    }
+                                }
+                            }
+
+                            if (size > MAXMSG) {
+                                if (C_INFO.w_rank == 0)
+                                    fprintf(unit,"Attention, msg size %d truncated to %d\n", size, MAXMSG);
+                                size = MAXMSG;
+                            }
+
+                            size = (size + unit_size - 1) / unit_size * unit_size;
+
+                            if (Bmark->RUN_MODES[0].type == Sync) {
+                                size = MAXMSG;
+                                iter = C_INFO.n_lens - 1;
+                            }
+
+                            /* IMB 3.1 << */
+                            // put some initialization stuff into:
+                            IMB_init_buffers_iter(&C_INFO, &ITERATIONS, Bmark, BMODE, iter, size);
+
+                            if (!Bmark->sample_failure) {
+                                time_limit[1] = 0;
+                                if (C_INFO.rank >= 0)
+                                    time_limit[1] = (MPI_Wtime() - sample_time < max(max(C_INFO.n_lens, C_INFO.max_msg_log - C_INFO.min_msg_log) - 1, 1) * ITERATIONS.secs) ? 0 : 1;
+                            }
+
+                            MPI_Allreduce(&time_limit[1], &time_limit[0], 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+
+                            if (time_limit[0])
+                                Bmark->sample_failure = SAMPLE_FAILED_TIME_OUT;
+
+                            if( !Bmark->sample_failure ) {
+                                IMB_warm_up(Bmark, &C_INFO, size, &ITERATIONS, iter);
+#ifdef MPI1
+                                C_INFO.select_source = Bmark->select_source;
+#endif
+                                Bmark->Benchmark(&C_INFO, size, &ITERATIONS, BMODE, time);
+                            }
+                            /* >> IMB 3.1  */
+
+                            /* Synchronization, in particular for idle processes
+                            which have to wait in a well defined manner */
+                            MPI_Barrier(MPI_COMM_WORLD);
+
+                            /* IMB 3.1 << */
+                            IMB_output(&C_INFO, Bmark, BMODE, header, size, &ITERATIONS, time);
+                            /* >> IMB 3.1  */
+
+                            IMB_close_transfer(&C_INFO, Bmark, size);
+
+                            CHK_BRK;
+
+                            header = 0;
+                            if (Bmark->RUN_MODES[0].type == SingleElementTransfer)
+                                break;
+                            iter++;
+                        } /*while ( (C_INFO.n_lens...*/
+
+                        if (!Bmark->success && C_INFO.w_rank == 0)
+                            fprintf(unit,"\n\n!!! Benchmark unsuccessful !!!\n\n");
+
+                        CHK_BRK;
+                    } /*for (imod=0*/
+
+                    CHK_BRK;
+                } /*if (IMB_valid(&C_INFO, Bmark, NP))*/
+
+                /* CALCULATE THE NUMBER OF PROCESSES FOR NEXT STEP */
+                if (NP >= ci_np)
+                    do_it = 0;
+                else
+                    NP = min(NP + NP, ci_np);
+
+#ifdef MPIIO
+                if (Bmark->RUN_MODES[0].type == SingleTransfer) do_it = 0;
+#endif
+
+                CHK_BRK;
+            } /*while (do_it)*/
+        } /*if (Bmark->RUN_MODES[0].type != BTYPE_INVALID)*/
+
+        CHK_BRK;
+        j++;
+    } /*while (p = BList[j].name)*/
+
+#ifdef CHECK
+    if (C_INFO.w_rank == 0) {
+        j        = 0;
+        NFAIL    = 0;
+        NSUCCESS = 0;
+
+        while (p = BList[j].name) {
+            Bmark = BList + j;
+
+            if (Bmark->RUN_MODES[0].type != BTYPE_INVALID) {
+                Bmark = BList + j;
+                if (!Bmark->success) NFAIL++;
+                else                 NSUCCESS++;
+            }
+            j++;
+        }
+
+        if (NFAIL == 0 && NSUCCESS > 0)
+            fprintf(unit,"\n\n!!!!  ALL BENCHMARKS SUCCESSFUL !!!! \n\n");
+        else if (NSUCCESS > 0)
+        {
+            if(NFAIL == 1)
+                fprintf(unit,"\n\n!!!!  %d  BENCHMARK FAILED     !!!! \n\n", NFAIL);
+            else
+                fprintf(unit,"\n\n!!!!  %d  BENCHMARKS FAILED     !!!! \n\n", NFAIL);
+
+            j = 0;
+
+            while (p = BList[j].name) {
+                Bmark = BList + j;
+                if (Bmark->RUN_MODES[0].type != BTYPE_INVALID) {
+                    if (Bmark->success)
+                        fprintf(unit, "%s    : Successful\n", p);
+                    else
+                        fprintf(unit,"%s    : FAILED !! \n",p);
+                }
+                j++;
+            }
+        } /*else if (NSUCCESS > 0)*/
+    } /*if (C_INFO.w_rank == 0)*/
+#endif /*#ifdef CHECK*/
+
+
+    /* IMB 3.1 << */
+    IMB_free_all(&C_INFO, &BList, &ITERATIONS);
+
+#ifdef CHECK
+    if num_alloc == num_free)
+        ierr=0;
+    else {
+        fprintf(stderr, "pr %d: calls to IMB_v_alloc %d / IMB_v_free %d (doesn't seem ok, are unequal!)\n", C_INFO.w_rank,num_alloc,num_free);
+        ierr = 1;
+    }
+
+    MPI_Allreduce(&ierr, &mem_ok, 1, MPI_INT,MPI_MAX, MPI_COMM_WORLD);
+
+    if (C_INFO.w_rank == 0)
+        if( mem_ok == 0 )
+            fprintf(stderr, "# of calls to IMB_v_alloc / IMB_v_free match on all processes\n");
+
+#endif /*#ifdef CHECK*/
+    MPI_Barrier(MPI_COMM_WORLD);
+    IMB_end_msg(&C_INFO);
+
+    /* >> IMB 3.1  */
+    MPI_Finalize();
+
+    return 0;
+} /* end of main*/
diff --git a/src/IMB_allgather.c b/src_c/IMB_allgather.c
similarity index 79%
rename from src/IMB_allgather.c
rename to src_c/IMB_allgather.c
index 2000df4f..e47a1730 100644
--- a/src/IMB_allgather.c
+++ b/src_c/IMB_allgather.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -75,7 +74,7 @@ For more documentation than found here, see
 /*******************************************************************************/
 
 /* ===================================================================== */
-/* 
+/*
 IMB 3.1 changes
 July 2007
 Hans-Joachim Plum, Intel GmbH
@@ -90,85 +89,80 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_allgather(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                   MODES RUN_MODE, double* time)
+                   MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Allgather
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
-  double t1, t2;
-  int    i;
+    double t1, t2;
+    int    i;
+
+    Type_Size s_size,r_size;
+    int s_num, r_num;
 
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
-  
-  *time =0.;
-  if(c_info->rank!=-1)
-  {
-      IMB_do_n_barriers (c_info->communicator, N_BARR);
-
-      for(i=0;i< ITERATIONS->n_sample;i++)
-      {
-          t1 = MPI_Wtime();
-          ierr = MPI_Allgather((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                               s_num,c_info->s_data_type,
-			       (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                               r_num,c_info->r_data_type,
-			       c_info->communicator);
-          t2 = MPI_Wtime();
-          *time += (t2 - t1);
-
-          MPI_ERRHAND(ierr);
-
-          CHK_DIFF("Allgather",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-                   0, (size_t) c_info->num_procs* (size_t) size, 1, 
-                   put, 0, ITERATIONS->n_sample, i,
-                   -2, &defect);
-          IMB_do_n_barriers (c_info->communicator, c_info->sync);
-      }
-      *time /= ITERATIONS->n_sample;
-  }
+    ierr = 0;
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    size *= c_info->size_scale;
+
+    *time =0.;
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers (c_info->communicator, N_BARR);
+
+        for(i=0; i < ITERATIONS->n_sample; i++) {
+            t1 = MPI_Wtime();
+            ierr = MPI_Allgather((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                 s_num, c_info->s_data_type,
+                                 (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                 r_num, c_info->r_data_type,
+                                 c_info->communicator);
+            t2 = MPI_Wtime();
+            *time += (t2 - t1);
+
+            MPI_ERRHAND(ierr);
+
+            CHK_DIFF("Allgather",c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0,
+                     0, (size_t)c_info->num_procs * (size_t)size, 1,
+                     put, 0, ITERATIONS->n_sample, i,
+                     -2, &defect);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -179,7 +173,7 @@ void IMB_iallgather(struct comm_info* c_info,
                     int size,
                     struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE,
-                    double* time)
+                    double* time) {
 /*
 
 
@@ -211,7 +205,6 @@ Output variables:
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -224,19 +217,19 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    if ((s_size!=0) && (r_size!=0)) {
-        s_num=size/s_size;
-        r_num=size/r_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         IMB_iallgather_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
         /* INITIALIZATION CALL */
@@ -244,8 +237,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Iallgather((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                   s_num,
@@ -269,7 +261,7 @@ Output variables:
                      0, 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
 
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
         t_comp  /= ITERATIONS->n_sample;
@@ -286,7 +278,7 @@ void IMB_iallgather_pure(struct comm_info* c_info,
                          int size,
                          struct iter_schedule* ITERATIONS,
                          MODES RUN_MODE,
-                         double* time)
+                         double* time) {
 /*
 
 
@@ -319,7 +311,6 @@ Output variables:
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -330,25 +321,22 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->s_data_type, &s_size);
     MPI_Type_size(c_info->s_data_type, &r_size);
-    if ((s_size != 0) && (r_size != 0)) 
-    {
+    if ((s_size != 0) && (r_size != 0)) {
         s_num = size / s_size;
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) 
-    {
+    if (c_info->rank != -1) {
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Iallgather((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                   s_num,
@@ -366,12 +354,11 @@ Output variables:
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      0, 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
-            
+
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_pure /= ITERATIONS->n_sample;
     }
-
     time[0] = t_pure;
 }
 
diff --git a/src/IMB_allgatherv.c b/src_c/IMB_allgatherv.c
similarity index 80%
rename from src/IMB_allgatherv.c
rename to src_c/IMB_allgatherv.c
index 6200b537..629228ff 100644
--- a/src/IMB_allgatherv.c
+++ b/src_c/IMB_allgatherv.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -79,7 +78,7 @@ For more documentation than found here, see
 /*******************************************************************************/
 
 /* ===================================================================== */
-/* 
+/*
 IMB 3.1 changes
 July 2007
 Hans-Joachim Plum, Intel GmbH
@@ -94,95 +93,88 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_allgatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                    MODES RUN_MODE, double* time)
+                    MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Allgatherv
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
-
 */
-{
-  double t1, t2;
-  int    i;
-  Type_Size s_size,r_size;
-  int s_num, r_num;
+    double t1, t2;
+    int    i;
+    Type_Size s_size,r_size;
+    int s_num, r_num;
 
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
-
-  /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
-
-  for (i=0;i<c_info->num_procs ;i++)
-  {
-      c_info->rdispl[i] = r_num*i;
-      c_info->reccnt[i] = r_num;
-  }
-
-  *time = 0.;
-  if(c_info->rank!=-1)
-  {
-      IMB_do_n_barriers(c_info->communicator, N_BARR);
-
-      for(i=0;i<ITERATIONS->n_sample;i++)
-      {
-          t1 = MPI_Wtime();
-          ierr = MPI_Allgatherv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                                s_num,c_info->s_data_type,
-                                (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                                c_info->reccnt,c_info->rdispl,
-                                c_info->r_data_type,
-                                c_info->communicator);
-          MPI_ERRHAND(ierr);
-          t2 = MPI_Wtime();
-          *time += (t2 - t1);
-
-          CHK_DIFF("Allgatherv",c_info, 
-                   (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-                   0, (size_t) c_info->num_procs* (size_t) size, 1, 
-                   put, 0, ITERATIONS->n_sample, i,
-                   -2, &defect);
-          
-          IMB_do_n_barriers(c_info->communicator, c_info->sync);
-      }
-      *time /= ITERATIONS->n_sample;
-  }
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    size *= c_info->size_scale;
+
+    /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
+
+    for (i = 0; i < c_info->num_procs; i++) {
+        c_info->rdispl[i] = r_num * i;
+        c_info->reccnt[i] = r_num;
+    }
+
+    *time = 0.;
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        for (i = 0;i < ITERATIONS->n_sample; i++) {
+            t1 = MPI_Wtime();
+            ierr = MPI_Allgatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                  s_num, c_info->s_data_type,
+                                  (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                  c_info->reccnt, c_info->rdispl,
+                                  c_info->r_data_type,
+                                  c_info->communicator);
+            MPI_ERRHAND(ierr);
+            t2 = MPI_Wtime();
+            *time += (t2 - t1);
+
+            CHK_DIFF("Allgatherv",c_info,
+                     (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0,
+                     0, (size_t)c_info->num_procs * (size_t)size, 1,
+                     put, 0, ITERATIONS->n_sample, i,
+                     -2, &defect);
+
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -193,7 +185,7 @@ void IMB_iallgatherv(struct comm_info* c_info,
                      int size,
                      struct iter_schedule* ITERATIONS,
                      MODES RUN_MODE,
-                     double* time)
+                     double* time) {
 /*
 
 
@@ -226,7 +218,6 @@ Output variables:
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -239,7 +230,7 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -251,7 +242,7 @@ Output variables:
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */
         IMB_iallgatherv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
@@ -260,7 +251,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
+        for(i = 0; i < ITERATIONS->n_sample; i++)
         {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Iallgatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
@@ -277,15 +268,15 @@ Output variables:
             t_comp -= MPI_Wtime();
             IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
-            
+
             MPI_Wait(&request, &status);
             t_ovrlp += MPI_Wtime();
-            
+
             CHK_DIFF("Iallgatherv", c_info,
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      0, 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
-            
+
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
@@ -303,7 +294,7 @@ void IMB_iallgatherv_pure(struct comm_info* c_info,
                           int size,
                           struct iter_schedule* ITERATIONS,
                           MODES RUN_MODE,
-                          double* time)
+                          double* time) {
 /*
 
 
@@ -336,7 +327,6 @@ Output variables:
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -347,7 +337,7 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -359,7 +349,7 @@ Output variables:
         r_num = size / r_size;
     }
 
-    for (i=0 ; i < c_info->num_procs; ++i) {
+    for (i = 0 ; i < c_info->num_procs; ++i) {
         c_info->rdispl[i] = r_num * i;
         c_info->reccnt[i] = r_num;
     }
diff --git a/src/IMB_allreduce.c b/src_c/IMB_allreduce.c
similarity index 78%
rename from src/IMB_allreduce.c
rename to src_c/IMB_allreduce.c
index 7da53f3e..a352b817 100644
--- a/src/IMB_allreduce.c
+++ b/src_c/IMB_allreduce.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -53,7 +52,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -82,7 +81,7 @@ For more documentation than found here, see
 
 
 /* ===================================================================== */
-/* 
+/*
 IMB 3.1 changes
 July 2007
 Hans-Joachim Plum, Intel GmbH
@@ -96,84 +95,80 @@ Hans-Joachim Plum, Intel GmbH
 /* ===================================================================== */
 
 void IMB_allreduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                   MODES RUN_MODE, double* time)
+                   MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Allreduce
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
-  double t1, t2;
-  int    i;
+    double t1, t2;
+    int    i;
+
+    Type_Size s_size;
+    int s_num;
 
-  Type_Size s_size;
-  int s_num;
-  
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
-
-  *time = 0.;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->red_data_type,&s_size);
-  if (s_size!=0) s_num=size/s_size;
-  
-  if(c_info->rank!=-1)
-  {
-      IMB_do_n_barriers (c_info->communicator, N_BARR);
-
-      for(i=0;i< ITERATIONS->n_sample;i++)
-      {
-          t1 = MPI_Wtime();
-          ierr = MPI_Allreduce((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                               (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                               s_num,
-                               c_info->red_data_type,c_info->op_type,
-                               c_info->communicator);
-          MPI_ERRHAND(ierr);
-          t2 = MPI_Wtime();
-          *time += (t2 - t1);
-
-          CHK_DIFF("Allreduce",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-                   size, size, asize, 
-                   put, 0, ITERATIONS->n_sample, i,
-                   -1, &defect);
-          
-          IMB_do_n_barriers (c_info->communicator, c_info->sync);
-
-      }
-      *time /= ITERATIONS->n_sample;
-  }
+    ierr = 0;
+
+    *time = 0.;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->red_data_type, &s_size);
+    if (s_size != 0)
+        s_num = size / s_size;
+
+    size *= c_info->size_scale;
+
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            *time -= MPI_Wtime();
+            ierr = MPI_Allreduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                 (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                 s_num,
+                                 c_info->red_data_type,c_info->op_type,
+                                 c_info->communicator);
+            MPI_ERRHAND(ierr);
+            *time += MPI_Wtime();
+
+            CHK_DIFF("Allreduce",c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0,
+                     size, size, asize,
+                     put, 0, ITERATIONS->n_sample, i,
+                     -1, &defect);
+
+            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -184,8 +179,7 @@ void IMB_iallreduce(struct comm_info* c_info,
                     int size,
                     struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE,
-                    double* time)
-{
+                    double* time) {
     int         i = 0;
     Type_Size   s_size;
     int         s_num = 0;
@@ -196,15 +190,14 @@ void IMB_iallreduce(struct comm_info* c_info,
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->red_data_type, &s_size);
-    if (s_size != 0) {
+    if (s_size != 0)
         s_num = size / s_size;
-    }
 
     if(c_info->rank != -1) {
         IMB_iallreduce_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
@@ -212,7 +205,7 @@ void IMB_iallreduce(struct comm_info* c_info,
         /* INITIALIZATION CALL */
         IMB_cpu_exploit(t_pure, 1);
 
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
         for(i = 0; i < ITERATIONS->n_sample; i++)
         {
@@ -237,7 +230,7 @@ void IMB_iallreduce(struct comm_info* c_info,
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
             
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
         t_comp  /= ITERATIONS->n_sample;
@@ -254,8 +247,7 @@ void IMB_iallreduce_pure(struct comm_info* c_info,
                          int size,
                          struct iter_schedule* ITERATIONS,
                          MODES RUN_MODE,
-                         double* time)
-{
+                         double* time) {
     int         i = 0;
     Type_Size   s_size;
     int         s_num = 0;
@@ -264,22 +256,19 @@ void IMB_iallreduce_pure(struct comm_info* c_info,
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->red_data_type, &s_size);
-    if (s_size != 0) 
-    {
+    if (s_size != 0)
         s_num = size / s_size;
-    }
 
-    if(c_info->rank != -1) 
-    {
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
+        for (i = 0; i < ITERATIONS->n_sample; i++)
         {
             t_pure -= MPI_Wtime();
             ierr = MPI_Iallreduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
diff --git a/src/IMB_alltoall.c b/src_c/IMB_alltoall.c
similarity index 78%
rename from src/IMB_alltoall.c
rename to src_c/IMB_alltoall.c
index b2daac78..2828daf3 100644
--- a/src/IMB_alltoall.c
+++ b/src_c/IMB_alltoall.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -80,7 +79,7 @@ For more documentation than found here, see
 
 
 /* ===================================================================== */
-/* 
+/*
 IMB 3.1 changes
 July 2007
 Hans-Joachim Plum, Intel GmbH
@@ -94,85 +93,82 @@ Hans-Joachim Plum, Intel GmbH
 /* ===================================================================== */
 
 void IMB_alltoall(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                  MODES RUN_MODE, double* time)
+                  MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Alltoall
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
-  double t1, t2;
-  int    i;
+    double t1, t2;
+    int    i;
+
+    Type_Size s_size,r_size;
+    int s_num, r_num;
 
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
- 
-  *time = 0.; 
-  if(c_info->rank!=-1)
-  {
-      IMB_do_n_barriers (c_info->communicator, N_BARR);
-
-      for(i=0;i< ITERATIONS->n_sample;i++)
-      {
-          t1 = MPI_Wtime();
-          ierr = MPI_Alltoall((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                              s_num,c_info->s_data_type,
-                              (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                              r_num,c_info->r_data_type, c_info->communicator);
-          MPI_ERRHAND(ierr);
-          t2 = MPI_Wtime();
-          *time += (t2 - t1);
-
-          CHK_DIFF("Alltoall",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                   (size_t) c_info->rank* (size_t) size, 0, (size_t) c_info->num_procs* (size_t) size, 
-                   1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
-
-          IMB_do_n_barriers (c_info->communicator, c_info->sync);
-      }
-      *time /= ITERATIONS->n_sample;
-  }
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num=size/s_size;
+        r_num=size/r_size;
+    }
+
+    size *= c_info->size_scale;
+
+    *time = 0.;
+    if (c_info->rank!=-1)
+    {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        for (i = 0; i < ITERATIONS->n_sample; i++)
+        {
+            t1 = MPI_Wtime();
+            ierr = MPI_Alltoall((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                s_num, c_info->s_data_type,
+                                (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                r_num, c_info->r_data_type, c_info->communicator);
+            MPI_ERRHAND(ierr);
+            t2 = MPI_Wtime();
+            *time += (t2 - t1);
+
+            CHK_DIFF("Alltoall", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                     (size_t)c_info->rank * (size_t)size, 0, (size_t)c_info->num_procs * (size_t)size,
+                     1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
+
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -183,7 +179,7 @@ void IMB_ialltoall(struct comm_info* c_info,
                    int size,
                    struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE,
-                   double* time)
+                   double* time) {
 /*
 
 
@@ -215,7 +211,6 @@ Output variables:
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -228,22 +223,21 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->s_data_type, &s_size);
     MPI_Type_size(c_info->r_data_type, &r_size);
-    
-    if ((s_size != 0) && (r_size != 0)) 
+
+    if ((s_size != 0) && (r_size != 0))
     {
         s_num = size / s_size;
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) 
-    {
+    if (c_info->rank != -1) {
         IMB_ialltoall_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
         /* INITIALIZATION CALL */
@@ -263,11 +257,11 @@ Output variables:
                                  c_info->communicator,
                                  &request);
             MPI_ERRHAND(ierr);
-            
+
             t_comp -= MPI_Wtime();
             IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
-            
+
             MPI_Wait(&request, &status);
             t_ovrlp += MPI_Wtime();
 
@@ -275,7 +269,7 @@ Output variables:
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      ((size_t)c_info->rank * (size_t) size), 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
         t_comp  /= ITERATIONS->n_sample;
@@ -292,7 +286,7 @@ void IMB_ialltoall_pure(struct comm_info* c_info,
                         int size,
                         struct iter_schedule* ITERATIONS,
                         MODES RUN_MODE,
-                        double* time)
+                        double* time) {
 /*
 
 
@@ -325,7 +319,6 @@ Output variables:
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -336,26 +329,23 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->s_data_type, &s_size);
     MPI_Type_size(c_info->s_data_type, &r_size);
-    
-    if ((s_size != 0) && (r_size != 0)) 
-    {
+
+    if ((s_size != 0) && (r_size != 0)) {
         s_num = size / s_size;
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) 
-    {
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+    if(c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for(i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Ialltoall((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                  s_num,
@@ -371,10 +361,10 @@ Output variables:
 
             CHK_DIFF("Ialltoall_pure", c_info,
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
-                     ((size_t)c_info->rank * (size_t) size), 0, ((size_t)c_info->num_procs * (size_t)size),
+                     ((size_t)c_info->rank * (size_t)size), 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
             
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_pure /= ITERATIONS->n_sample;
     }
diff --git a/src/IMB_alltoallv.c b/src_c/IMB_alltoallv.c
similarity index 82%
rename from src/IMB_alltoallv.c
rename to src_c/IMB_alltoallv.c
index c0249e77..a64cacfd 100644
--- a/src/IMB_alltoallv.c
+++ b/src_c/IMB_alltoallv.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -97,92 +96,84 @@ void IMB_alltoallv(struct comm_info* c_info, int size, struct iter_schedule* ITE
                     MODES RUN_MODE, double* time)
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Alltoallv
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
-
 */
 {
     double t1, t2;
     int    i;
-    Type_Size s_size,r_size;
+    Type_Size s_size, r_size;
     int    s_num, r_num;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
-    /*  GET SIZE OF DATA TYPE */  
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
 
-    if ((s_size!=0) && (r_size!=0))
-    {
-        s_num=size/s_size;
-        r_num=size/r_size;
-    } 
+    size *= c_info->size_scale;
 
     /* INITIALIZATION OF DISPLACEMENT and SEND/RECEIVE COUNTS */
-    for (i=0;i<c_info->num_procs ;i++)
-    {
-        c_info->sdispl[i] = s_num*i;
+    for (i = 0; i < c_info->num_procs; i++) {
+        c_info->sdispl[i] = s_num * i;
         c_info->sndcnt[i] = s_num;
-        c_info->rdispl[i] = r_num*i;
+        c_info->rdispl[i] = r_num * i;
         c_info->reccnt[i] = r_num;
     }
 
-    *time = 0.; 
-    if(c_info->rank!=-1)
-    {
-
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+    *time = 0.;
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t1 = MPI_Wtime();
-            ierr = MPI_Alltoallv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                                c_info->sndcnt,c_info->sdispl,
+            ierr = MPI_Alltoallv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                c_info->sndcnt, c_info->sdispl,
                                 c_info->s_data_type,
-                                (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                                c_info->reccnt,c_info->rdispl,
+                                (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                c_info->reccnt, c_info->rdispl,
                                 c_info->r_data_type,
                                 c_info->communicator);
             MPI_ERRHAND(ierr);
             t2 = MPI_Wtime();
             *time += (t2 - t1);
-    
-            CHK_DIFF("Alltoallv",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                    (size_t) c_info->rank* (size_t) size, 0, 
-                    (size_t) c_info->num_procs* (size_t) size, 1, 
+
+            CHK_DIFF("Alltoallv",c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                    (size_t)c_info->rank * (size_t)size, 0,
+                    (size_t)c_info->num_procs * (size_t)size, 1,
                     put, 0, ITERATIONS->n_sample, i,
                     -2, &defect);
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         *time /= ITERATIONS->n_sample;
     }
@@ -241,31 +232,28 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->s_data_type, &s_size);
     MPI_Type_size(c_info->r_data_type, &r_size);
-    if ((s_size != 0) && (r_size != 0)) 
-    {
+    if ((s_size != 0) && (r_size != 0)) {
         s_num = size / s_size;
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) 
-    {
+    if (c_info->rank != -1) {
         /* GET PURE TIME. DISPLACEMENTS AND RECEIVE COUNTS WILL BE INITIALIZED HERE */
         IMB_ialltoallv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
         /* INITIALIZATION CALL */
         IMB_cpu_exploit(t_pure, 1);
 
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Ialltoallv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                   c_info->sndcnt,
@@ -278,11 +266,11 @@ Output variables:
                                   c_info->communicator,
                                   &request);
             MPI_ERRHAND(ierr);
-            
+
             t_comp -= MPI_Wtime();
             IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
-            
+
             MPI_Wait(&request, &status);
             t_ovrlp += MPI_Wtime();
 
@@ -290,7 +278,7 @@ Output variables:
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      ((size_t)c_info->rank * (size_t)size), 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
         t_comp  /= ITERATIONS->n_sample;
@@ -351,34 +339,30 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->s_data_type, &s_size);
     MPI_Type_size(c_info->s_data_type, &r_size);
-    if ((s_size != 0) && (r_size != 0)) 
-    {
+    if ((s_size != 0) && (r_size != 0)) {
         s_num = size / s_size;
         r_num = size / r_size;
     }
 
     /* INITIALIZATION OF DISPLACEMENT and SEND/RECEIVE COUNTS */
-    for (i = 0; i < c_info->num_procs; i++) 
-    {
+    for (i = 0; i < c_info->num_procs; i++) {
         c_info->sdispl[i] = s_num * i;
         c_info->sndcnt[i] = s_num;
         c_info->rdispl[i] = r_num * i;
         c_info->reccnt[i] = r_num;
     }
 
-    if(c_info->rank != -1) 
-    {
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+    if(c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for(i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Ialltoallv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                   c_info->sndcnt,
@@ -397,7 +381,7 @@ Output variables:
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      ((size_t)c_info->rank * (size_t)size), 0, ((size_t)c_info->num_procs * (size_t)size),
                      1, put, 0, ITERATIONS->n_sample, i, -2, &defect);
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_pure /= ITERATIONS->n_sample;
     }
diff --git a/src/IMB_appl_errors.h b/src_c/IMB_appl_errors.h
similarity index 93%
rename from src/IMB_appl_errors.h
rename to src_c/IMB_appl_errors.h
index 355a1f42..3a1221b8 100644
--- a/src/IMB_appl_errors.h
+++ b/src_c/IMB_appl_errors.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -81,9 +80,9 @@ void Errors_mpi(MPI_Comm *, int*,...);
 #define MAX_ERR_LINES 32
 struct ERR_HEADER
 {
-int n_header;
-char* Lines[MAX_ERR_LINES];
-int err_flag;
+    int n_header;
+    char* Lines[MAX_ERR_LINES];
+    int err_flag;
 };
 
 #endif
diff --git a/src/IMB_bandwidth.c b/src_c/IMB_bandwidth.c
similarity index 73%
rename from src/IMB_bandwidth.c
rename to src_c/IMB_bandwidth.c
index 01983511..d9f256a1 100644
--- a/src/IMB_bandwidth.c
+++ b/src_c/IMB_bandwidth.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ goods and services.
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -72,48 +71,42 @@ goods and services.
 
 /*************************************************************************/
 
-
-
 void IMB_uni_bandwidth(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-              MODES RUN_MODE, double* time)
+              MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       multiple processes unidirectional exchange
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
     double t1,t2;
     int i;
 
     Type_Size s_size, r_size;
-    int s_num,r_num;
+    int s_num, r_num;
     int s_tag, r_tag;
     int dest, source;
     MPI_Status stat;
@@ -123,40 +116,36 @@ Output variables:
     char ack;
     ierr = 0;
 
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    if ((s_size!=0) && (r_size!=0))
-    {
-        s_num=size/s_size;
-        r_num=size/r_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
     else
-    {
-	return;
-    }
+        return;
+
     s_tag = 1;
     r_tag = s_tag;
 
-    if(c_info->rank!=-1)
+    if (c_info->rank != -1)
         peers = c_info->num_procs / 2;
-    else
-    {
+    else {
         *time = 0.;
         return;
     }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
 
     t1 = MPI_Wtime();
-    if (c_info->rank < peers)
-    {
+    if (c_info->rank < peers) {
         dest = (c_info->rank + peers);
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for(i = 0; i < ITERATIONS->n_sample; i++) {
             for (ws = 0; ws < MAX_WIN_SIZE; ws++)
-                ierr= MPI_Isend((char*)c_info->s_buffer+ws%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                ierr = MPI_Isend((char*)c_info->s_buffer + ws % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
-                                c_info->s_data_type, 
+                                c_info->s_data_type,
                                 dest,
                                 s_tag,
                                 c_info->communicator,
@@ -166,15 +155,13 @@ Output variables:
             MPI_Recv(&ack, 1, MPI_CHAR, dest, r_tag, c_info->communicator, &stat);
         }
     }
-    else
-    {
+    else {
         source = (c_info->rank - peers);
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             for (ws = 0; ws < MAX_WIN_SIZE; ws++)
-                ierr= MPI_Irecv((char*)c_info->r_buffer+ws%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                ierr = MPI_Irecv((char*)c_info->r_buffer + ws % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                     r_num,
-                                    c_info->r_data_type, 
+                                    c_info->r_data_type,
                                     source,
                                     r_tag,
                                     c_info->communicator,
@@ -185,12 +172,11 @@ Output variables:
         }
     }
     t2 = MPI_Wtime();
-    *time=(t2 - t1)/ITERATIONS->n_sample;
-
+    *time = (t2 - t1) / ITERATIONS->n_sample;
 }
 
 void IMB_bi_bandwidth(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-             MODES RUN_MODE, double* time)
+                      MODES RUN_MODE, double* time) {
 /*
 
                       
@@ -223,8 +209,7 @@ Output variables:
 
 
 */
-{
-    double t1,t2;
+    double t1, t2;
     int i;
 
     Type_Size s_size, r_size;
@@ -232,92 +217,86 @@ Output variables:
     int s_tag, r_tag;
     int dest, source;
     MPI_Status stat;
-    const int max_win_size2 = 2*MAX_WIN_SIZE;
-    MPI_Request requests[2*MAX_WIN_SIZE];
+    const int max_win_size2 = 2 * MAX_WIN_SIZE;
+    MPI_Request requests[2 * MAX_WIN_SIZE];
 
     int ws, peers;
     char ack;
     ierr = 0;
 
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    if ((s_size!=0) && (r_size!=0))
-    {
-        s_num=size/s_size;
-        r_num=size/r_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
     else
-    {
-	return;
-    }
+        return;
+
     s_tag = 1;
     r_tag = s_tag;
 
     if (c_info->rank!=-1)
         peers = c_info->num_procs / 2;
-    else
-    {
+    else {
         *time = 0.;
         return;
     }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
 
     t1 = MPI_Wtime();
-    if (c_info->rank < peers)
-    {
+    if (c_info->rank < peers) {
         dest = (c_info->rank + peers);
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for(i = 0; i < ITERATIONS->n_sample; i++) {
             for (ws = 0; ws < MAX_WIN_SIZE; ws++)
-                ierr= MPI_Irecv((char*)c_info->r_buffer+ws%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                ierr = MPI_Irecv((char*)c_info->r_buffer + ws % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                                 r_num,
-                                c_info->r_data_type, 
+                                c_info->r_data_type,
                                 dest,
                                 r_tag,
                                 c_info->communicator,
                                 &requests[ws]);
 
             for (ws = 0; ws < MAX_WIN_SIZE; ws++)
-                ierr= MPI_Isend((char*)c_info->s_buffer+ws%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                ierr = MPI_Isend((char*)c_info->s_buffer + ws % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
-                                c_info->s_data_type, 
+                                c_info->s_data_type,
                                 dest,
                                 s_tag,
                                 c_info->communicator,
-                                &requests[ws+MAX_WIN_SIZE]);
+                                &requests[ws + MAX_WIN_SIZE]);
 
             MPI_Waitall(max_win_size2, &requests[0], MPI_STATUSES_IGNORE);
             MPI_Recv(&ack, 1, MPI_CHAR, dest, r_tag, c_info->communicator, &stat);
         }
     }
-    else
-    {
+    else {
         source = (c_info->rank - peers);
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             for (ws = 0; ws < MAX_WIN_SIZE; ws++)
-                ierr= MPI_Irecv((char*)c_info->r_buffer+ws%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                ierr = MPI_Irecv((char*)c_info->r_buffer + ws % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                         r_num,
-                        c_info->r_data_type, 
+                        c_info->r_data_type,
                         source,
                         r_tag,
                         c_info->communicator,
                         &requests[ws]);
             for (ws = 0; ws < MAX_WIN_SIZE; ws++)
-                ierr= MPI_Isend((char*)c_info->s_buffer+ws%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                ierr = MPI_Isend((char*)c_info->s_buffer + ws % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                         s_num,
-                        c_info->s_data_type, 
+                        c_info->s_data_type,
                         source,
                         s_tag,
                         c_info->communicator,
-                        &requests[ws+MAX_WIN_SIZE]);
+                        &requests[ws + MAX_WIN_SIZE]);
 
             MPI_Waitall(max_win_size2, &requests[0], MPI_STATUSES_IGNORE);
             MPI_Send(&ack, 1, MPI_CHAR, source, s_tag, c_info->communicator);
         }
     }
     t2 = MPI_Wtime();
-    *time=(t2 - t1)/ITERATIONS->n_sample;
+    *time = (t2 - t1) / ITERATIONS->n_sample;
 
 }
diff --git a/src/IMB_barrier.c b/src_c/IMB_barrier.c
similarity index 83%
rename from src/IMB_barrier.c
rename to src_c/IMB_barrier.c
index 55b8295f..fbf7893c 100644
--- a/src/IMB_barrier.c
+++ b/src_c/IMB_barrier.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -80,7 +79,7 @@ For more documentation than found here, see
 /*************************************************************************/
 
 /* ===================================================================== */
-/* 
+/*
 IMB 3.1 changes
 July 2007
 Hans-Joachim Plum, Intel GmbH
@@ -93,62 +92,54 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_barrier(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-                 MODES RUN_MODE, double* time)
+                 MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Barrier
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
-  double t1, t2;
-  int    i;
-
-  ierr = 0;
-
-  if(c_info->rank!=-1)
-  {
-      IMB_do_n_barriers (c_info->communicator, N_BARR);
-
-      t1 = MPI_Wtime();
-      for(i=0;i< ITERATIONS->n_sample;i++)
-      {
-          ierr= MPI_Barrier(c_info->communicator);
-          MPI_ERRHAND(ierr);
-      }
-      t2 = MPI_Wtime();
-      *time=(t2 - t1)/(ITERATIONS->n_sample);
-  }
-  else
-  { 
-      *time = 0.; 
-  }
+    double t1, t2;
+    int    i;
+
+    ierr = 0;
+
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        t1 = MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_Barrier(c_info->communicator);
+            MPI_ERRHAND(ierr);
+        }
+        t2 = MPI_Wtime();
+        *time = (t2 - t1) / ITERATIONS->n_sample;
+    }
+    else
+        *time = 0.;
 }
 
 #elif defined NBC // MPI1
@@ -159,8 +150,7 @@ void IMB_ibarrier(struct comm_info* c_info,
                   int size,
                   struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE,
-                  double* time)
-{
+                  double* time) {
     int         i = 0;
     MPI_Request request;
     MPI_Status  status;
@@ -182,7 +172,7 @@ void IMB_ibarrier(struct comm_info* c_info,
         IMB_do_n_barriers (c_info->communicator, N_BARR);
 
         t_ovrlp = MPI_Wtime();
-        for(i=0; i < ITERATIONS->n_sample; i++) {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             ierr = MPI_Ibarrier(c_info->communicator, &request);
             MPI_ERRHAND(ierr);
 
@@ -207,23 +197,22 @@ void IMB_ibarrier_pure(struct comm_info* c_info,
                        int size,
                        struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE,
-                       double* time)
-{
+                       double* time) {
     int         i = 0;
     MPI_Request request;
     MPI_Status  status;
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         IMB_do_n_barriers (c_info->communicator, N_BARR);
 
         t_pure = MPI_Wtime();
-        for(i = 0; i < ITERATIONS->n_sample; i++) {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             ierr = MPI_Ibarrier(c_info->communicator, &request);
             MPI_ERRHAND(ierr);
             MPI_Wait(&request, &status);
diff --git a/src/IMB_bcast.c b/src_c/IMB_bcast.c
similarity index 82%
rename from src/IMB_bcast.c
rename to src_c/IMB_bcast.c
index 39046a07..4be73bc6 100644
--- a/src/IMB_bcast.c
+++ b/src_c/IMB_bcast.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -96,40 +95,37 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_bcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-               MODES RUN_MODE, double* time)
+               MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Bcast
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
                       
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
     double t1, t2;
     int    i;
     Type_Size s_size;
@@ -137,30 +133,30 @@ Output variables:
     void* bc_buf;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /*  GET SIZE OF DATA TYPE */
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    if (s_size!=0) s_num=size/s_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    if (s_size != 0) s_num = size / s_size;
 
     *time = 0.;
 
-    if(c_info->rank!=-1)
-    {
+    size *= c_info->size_scale;
+
+    if (c_info->rank != -1) {
         int root = 0;
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             /* Provide that s_buffer is not overwritten */
             bc_buf = (root == c_info->rank) ? c_info->s_buffer : c_info->r_buffer;
 
             t1 = MPI_Wtime();
-            ierr= MPI_Bcast((char*)bc_buf+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                            s_num,c_info->s_data_type,
-                            root,c_info->communicator);
+            ierr = MPI_Bcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                             s_num, c_info->s_data_type,
+                             root, c_info->communicator);
             t2 = MPI_Wtime();
             *time += (t2 - t1);
 
@@ -187,40 +183,36 @@ void IMB_ibcast(struct comm_info* c_info,
                 int size,
                 struct iter_schedule* ITERATIONS,
                 MODES RUN_MODE,
-                double* time)
+                double* time) {
 /*
 
-                      
                       MPI-NBC benchmark kernel
                       Benchmarks MPI_Ibcast
-                      
 
 
-Input variables: 
+Input variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
 
-Output variables: 
+Output variables:
 
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
     int         i = 0,
                 root = 0;
     Type_Size   s_size;
@@ -230,11 +222,11 @@ Output variables:
     MPI_Status  status;
     double      t_pure = 0.,
                 t_comp = 0.,
-                t_ovrlp = 0.; 
+                t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
-#endif  
+    defect = 0.;
+#endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
@@ -252,8 +244,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             bc_buf = (root == c_info->rank)
                    ? c_info->s_buffer
                    : c_info->r_buffer;
@@ -297,7 +288,7 @@ void IMB_ibcast_pure(struct comm_info* c_info,
                      int size,
                      struct iter_schedule* ITERATIONS,
                      MODES RUN_MODE,
-                     double* time)
+                     double* time) {
 /*
 
 
@@ -330,8 +321,7 @@ Output variables:
 
 
 */
-{
-    int         i = 0,
+    int         i    = 0,
                 root = 0;
     Type_Size   s_size;
     int         s_num = 0;
@@ -356,27 +346,26 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for(i = 0; i < ITERATIONS->n_sample; i++) {
             bc_buf = (root == c_info->rank)
                    ? c_info->s_buffer
                    : c_info->r_buffer;
 
             t_pure -= MPI_Wtime();
-                ierr = MPI_Ibcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
-                                  s_num,
-                                  c_info->s_data_type,
-                                  root,
-                                  c_info->communicator,
-                                  &request);
-                MPI_ERRHAND(ierr);
-                MPI_Wait(&request, &status);
+            ierr = MPI_Ibcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                              s_num,
+                              c_info->s_data_type,
+                              root,
+                              c_info->communicator,
+                              &request);
+            MPI_ERRHAND(ierr);
+            MPI_Wait(&request, &status);
             t_pure += MPI_Wtime();
 
             CHK_DIFF("Ibcast_pure", c_info,
                      (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                      0, size, size, 1, put, 0, ITERATIONS->n_sample, i, root, &defect);
-            
+
             root = (root + c_info->root_shift) % c_info->num_procs;
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
diff --git a/src/IMB_benchlist.c b/src_c/IMB_benchlist.c
similarity index 53%
rename from src/IMB_benchlist.c
rename to src_c/IMB_benchlist.c
index f5c929ba..1e420059 100644
--- a/src/IMB_benchlist.c
+++ b/src_c/IMB_benchlist.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -81,143 +80,124 @@ For more documentation than found here, see
 
 #include "IMB_prototypes.h"
 
-char * NIL_COMMENT[] ={NULL};
+char * NIL_COMMENT[] = { NULL };
 
 
 
 #if 0
-void IMB_list_names(char* Bname, int** List)
+void IMB_list_names(char* Bname, int** List) {
 /*
 
 
 
-Input variables: 
+Input variables:
 
--Bname                (type char*)                      
+-Bname                (type char*)
                       Input benchmark name (or "all" for all available benchmarks)
-                      
 
 
-In/out variables: 
+In/out variables:
 
--List                 (type int**)                      
+-List                 (type int**)
                       Auxiliary list of internal numbering for input benchmark(s)
-                      
 
 
 */
-{
     char** def_cases, **General_cmt;
 
-    IMB_get_def_cases(&def_cases,&General_cmt);
+    IMB_get_def_cases(&def_cases, &General_cmt);
 
-    if( !strcmp(Bname,"all") )
-    {
-	int Ndeflt=-1, n;
+    if (!strcmp(Bname, "all")) {
+        int Ndeflt = -1, n;
 
-	while ( def_cases[++Ndeflt] );
+        while (def_cases[++Ndeflt]);
 
-	IMB_i_alloc(int, *List, Ndeflt+1,"List_Names");
+        IMB_i_alloc(int, *List, Ndeflt + 1, "List_Names");
 
-	for ( n=0; n<Ndeflt; n++ )
-	    (*List)[n] = n;
-/* IMB_3.0
-      (*List)[Ndeflt] = -1;
-*/
-	(*List)[Ndeflt] = LIST_END;
-    }
-    else
-    {
-	IMB_i_alloc(int, *List, 2, "List_Names");
-	IMB_get_def_index(*List, Bname);
-/* IMB_3.0
-      (*List)[1]=-1;
-*/
+        for (n = 0; n < Ndeflt; n++)
+            (*List)[n] = n;
+        /* IMB_3.0
+              (*List)[Ndeflt] = -1;
+              */
+        (*List)[Ndeflt] = LIST_END;
+    } else {
+        IMB_i_alloc(int, *List, 2, "List_Names");
+        IMB_get_def_index(*List, Bname);
+        /* IMB_3.0
+              (*List)[1]=-1;
+              */
 
-	(*List)[1]=LIST_END;
+        (*List)[1] = LIST_END;
     }
 }
 #endif
 
-int IMB_get_bmark_index(char* name)
+int IMB_get_bmark_index(char* name) {
 /*
 
+Input variables:
 
-
-Input variables: 
-
--name                 (type char*)                      
+-name                 (type char*)
                       Input benchmark name
-                      
-
-
-                      
-
 
 */
-{
     char** all_cases;
     int ncases, index;
 
     ncases = IMB_get_all_cases(&all_cases);
 
-    for( index=0; index<ncases; index++)
+    for (index = 0; index < ncases; index++)
     {
-	char* TMP1 = IMB_str(all_cases[index]);
-	char *TMP2 = IMB_str(name);
-	int  iret  = IMB_strcasecmp(TMP1,TMP2);
+        char* TMP1 = IMB_str(all_cases[index]);
+        char *TMP2 = IMB_str(name);
+        int  iret = IMB_strcasecmp(TMP1, TMP2);
 
-	IMB_v_free((void**)&TMP1); 
-	IMB_v_free((void**)&TMP2); 
+        IMB_v_free((void**)&TMP1);
+        IMB_v_free((void**)&TMP2);
 
-	if(iret == 0) break;
+        if (iret == 0) break;
 
     } /* for */
 
-    return ( index < ncases )? index : LIST_INVALID;
+    return (index < ncases) ? index : LIST_INVALID;
 }
-      
 
-void IMB_construct_blist_default(struct Bench** P_BList)
-{
+void IMB_construct_blist_default(struct Bench** P_BList) {
     struct Bench* Bmark;
     char** def_cases, **General_cmt;
     int i;
     int NumBench = IMB_get_def_cases(&def_cases, &General_cmt);
 
-    *P_BList = (struct Bench*)	IMB_v_alloc((1+NumBench)*sizeof(struct Bench), "Construct_Blist 1");
+    *P_BList = (struct Bench*)IMB_v_alloc((1 + NumBench) * sizeof(struct Bench), "Construct_Blist 1");
 
-    for( i=0; i<NumBench; i++)
+    for (i = 0; i < NumBench; i++)
     {
-	Bmark = &(*P_BList)[i];
-	Bmark->name = IMB_str(def_cases[i]);
+        Bmark = &(*P_BList)[i];
+        Bmark->name = IMB_str(def_cases[i]);
 
-	IMB_lwr(Bmark->name);
+        IMB_lwr(Bmark->name);
 
-	Bmark->bench_comments = &NIL_COMMENT[0];
-	Bmark->scale_time = 1.0;
-	Bmark->scale_bw   = 1.0;
-	Bmark->success    = 1;
-	Bmark->sample_failure = 0;
-	IMB_set_bmark(Bmark);
+        Bmark->bench_comments = &NIL_COMMENT[0];
+        Bmark->scale_time     = 1.0;
+        Bmark->scale_bw       = 1.0;
+        Bmark->success        = 1;
+        Bmark->sample_failure = 0;
+        IMB_set_bmark(Bmark);
 
     }
 
-    (*P_BList)[NumBench].name=NULL;
+    (*P_BList)[NumBench].name = NULL;
 }
 
 
-void IMB_construct_blist(struct Bench* Bmark, const char* bname)
-{
-
-
-    Bmark->name = IMB_str((char *) bname);
+void IMB_construct_blist(struct Bench* Bmark, const char* bname) {
+    Bmark->name = IMB_str((char *)bname);
     IMB_lwr(Bmark->name);
 
     Bmark->bench_comments = &NIL_COMMENT[0];
-    Bmark->scale_time = 1.0;
-    Bmark->scale_bw   = 1.0;
-    Bmark->success    = 1;
+    Bmark->scale_time     = 1.0;
+    Bmark->scale_bw       = 1.0;
+    Bmark->success        = 1;
     Bmark->sample_failure = 0;
     IMB_set_bmark(Bmark);
 }
@@ -226,39 +206,32 @@ void IMB_construct_blist(struct Bench* Bmark, const char* bname)
 void IMB_construct_blist(struct Bench** P_BList, int n_args, char* name)
 /*
 
-                      
-                      Sets up the list of requested benchmarks 
+                      Sets up the list of requested benchmarks
                       (represented as list of struct Bench structures).
                       In one call, 1 benchmark is included.
-                      
-
 
-Input variables: 
+Input variables:
 
--n_args               (type int)                      
+-n_args               (type int)
                       Overall number of benchmarks to be run (0 means "all")
-                      
 
--name                 (type char*)                      
+-name                 (type char*)
                       Name of benchmark to be included in list
-                      
 
 
-Output variables: 
+Output variables:
 
--P_BList              (type struct Bench**)                      
+-P_BList              (type struct Bench**)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
-                      Updated benchmark list
-                      
 
+                      Updated benchmark list
 
 */
 {
 
-    static int Ndeflt = -1 ;
+    static int Ndeflt = -1;
     static int n_cases;
 
     struct Bench* Bmark;
@@ -268,199 +241,173 @@ Output variables:
 
     int* List;
     int plc;
- 
+
     IMB_get_def_cases(&def_cases, &General_cmt);
- 
-    if( Ndeflt < 0 )
-    {
-	Ndeflt=-1;
-	while( def_cases[++Ndeflt] ) ;
 
-	*P_BList = (struct Bench*)IMB_v_alloc((1+n_args+Ndeflt)*sizeof(struct Bench), "Construct_Blist 1");
- 
-	n_cases=0;
+    if (Ndeflt < 0) {
+        Ndeflt = -1;
+        while (def_cases[++Ndeflt]);
+
+        *P_BList = (struct Bench*)IMB_v_alloc((1 + n_args + Ndeflt) * sizeof(struct Bench), "Construct_Blist 1");
+
+        n_cases = 0;
     }
 
     Bname = IMB_str(name);
     IMB_lwr(Bname);
- 
+
     IMB_list_names(Bname, &List);
 
-/* IMB_3.0
- for( plc=0; List[plc]>=0 ; plc++ )
-*/
-    for( plc=0; List[plc]!=LIST_END ; plc++ )
-    {
-	Bmark = (*P_BList)+n_cases;
-
-/* IMB_3.0 */
-	if (  List[plc]>=0 ) 
-	{
-	    Bmark->name = IMB_str(def_cases[List[plc]]);
-	}
-	else 
-	{
-	    Bmark->name = IMB_str(Bname);
-	}
-	IMB_lwr(Bmark->name);
-
-	Bmark->bench_comments = &NIL_COMMENT[0];
-	Bmark->scale_time = 1.0;
-	Bmark->scale_bw   = 1.0;
-	Bmark->success    = 1;
-/* IMB 3.1 << */
-	Bmark->sample_failure = 0;
-/* >> IMB 3.1  */
- 
-	IMB_set_bmark(Bmark);
-
-/* IMB_3.0
-	 if( Bmark->RUN_MODES[0].type == BTYPE_INVALID ) strcpy(Bmark->name,name);
-*/
-	n_cases++;
+    /* IMB_3.0
+     for( plc=0; List[plc]>=0 ; plc++ )
+     */
+    for (plc = 0; List[plc] != LIST_END; plc++) {
+        Bmark = (*P_BList) + n_cases;
+
+        /* IMB_3.0 */
+        if (List[plc] >= 0) {
+            Bmark->name = IMB_str(def_cases[List[plc]]);
+        }
+        else {
+            Bmark->name = IMB_str(Bname);
+        }
+        IMB_lwr(Bmark->name);
+
+        Bmark->bench_comments = &NIL_COMMENT[0];
+        Bmark->scale_time     = 1.0;
+        Bmark->scale_bw       = 1.0;
+        Bmark->success        = 1;
+        /* IMB 3.1 << */
+        Bmark->sample_failure = 0;
+        /* >> IMB 3.1  */
+
+        IMB_set_bmark(Bmark);
+
+        /* IMB_3.0
+             if( Bmark->RUN_MODES[0].type == BTYPE_INVALID ) strcpy(Bmark->name,name);
+             */
+        n_cases++;
     } /* for*/
 
-    (*P_BList)[n_cases].name=NULL;
+    (*P_BList)[n_cases].name = NULL;
 
-     IMB_v_free((void**)&Bname);
-     IMB_v_free ((void**)&List);
+    IMB_v_free((void**)&Bname);
+    IMB_v_free((void**)&List);
 }
 #endif
 
 
 
-void IMB_destruct_blist(struct Bench ** P_BList)
+void IMB_destruct_blist(struct Bench ** P_BList) {
 /*
 
-                      
                       Completely destructs benchmark list
-                      
-
 
-In/out variables: 
+In/out variables:
 
--P_BList              (type struct Bench **)                      
+-P_BList              (type struct Bench **)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
+
                       All substructures plus list itself are free-d
                       and NULL initialized
-                      
-
 
 */
-{
 /****************************************************************
 Freeing of the Benchmark list
 *****************************************************************/
 /* IMB_3.0: take care of empty BList */
-    if( *P_BList != (struct Bench*)NULL )
-    {
-	int i;
-	i=0;
-
-	while( (*P_BList)[i].name )
-	{
-	    IMB_v_free ((void**)&((*P_BList)[i++].name));
-	}
-	IMB_v_free((void**)P_BList);
+    if (*P_BList != (struct Bench*)NULL) {
+        int i;
+        i = 0;
+
+        while ((*P_BList)[i].name) {
+            IMB_v_free((void**)&((*P_BList)[i++].name));
+        }
+        IMB_v_free((void**)P_BList);
     }
 }
 
-
-
-
-void IMB_print_blist(struct comm_info * c_info, struct Bench *BList)
+void IMB_print_blist(struct comm_info * c_info, struct Bench *BList) {
 /*
 
-                      
                       Displays requested benchmark scenario on stdout
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info *)                      
+-c_info               (type struct comm_info *)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--BList                (type struct Bench *)                      
+-BList                (type struct Bench *)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
-
 
 */
-{
     int j, ninvalid;
-    char*nn,*cmt;
+    char*nn, *cmt;
     char** def_cases, **General_cmt;
 
     IMB_get_def_cases(&def_cases, &General_cmt);
 
-    if( General_cmt[0] != NULL )
-    {
-	fprintf(unit,"# Remarks on the current Version:\n\n");
-	j=0;
+    if (General_cmt[0] != NULL) {
+        fprintf(unit, "# Remarks on the current Version:\n\n");
+        j = 0;
 
-	while ( (nn=General_cmt[j++]) )
-		fprintf(unit,"# %s\n",nn);
+        while ((nn = General_cmt[j++]))
+            fprintf(unit, "# %s\n", nn);
     }
 
-    j=0; ninvalid=0;
+    j = 0;
+    ninvalid = 0;
 
-    while( BList[j].name )
-    {
-	if( BList[j].RUN_MODES[0].type == BTYPE_INVALID )
-	{
-	    ninvalid++;
-
-	    if( ninvalid==1 )
-		fprintf(unit,"\n# Attention, invalid benchmark name(s):\n");
-     
-	    fprintf(unit,"# %s\n",BList[j].name);
-	    IMB_v_free ((void**)&(BList[j].name));
-	    BList[j].name = IMB_str("");
-	}
-	j++;
+    while (BList[j].name) {
+        if (BList[j].RUN_MODES[0].type == BTYPE_INVALID) {
+            ninvalid++;
+
+            if (ninvalid == 1)
+                fprintf(unit, "\n# Attention, invalid benchmark name(s):\n");
+
+            fprintf(unit, "# %s\n", BList[j].name);
+            IMB_v_free((void**)&(BList[j].name));
+            BList[j].name = IMB_str("");
+        }
+        j++;
     }
 
     /* IMB_3.0 */
-    if( ninvalid>0 )
-    {
-	/* IMB 3.1 << */
-	int i=0;
-	fprintf(unit,"\n# List of valid benchmarks:\n#\n");
-	/* >> IMB 3.1  */
-	while( def_cases[i] ){fprintf(unit,"# %s\n",def_cases[i++]);}
+    if (ninvalid > 0) {
+        /* IMB 3.1 << */
+        int i = 0;
+        fprintf(unit, "\n# List of valid benchmarks:\n#\n");
+        /* >> IMB 3.1  */
+        while (def_cases[i]) {
+            fprintf(unit, "# %s\n", def_cases[i++]);
+        }
     }
 
-    if( ninvalid < j)
-    {
+    if (ninvalid < j) {
+
+        fprintf(unit, "\n# List of Benchmarks to run:\n\n");
+
+        j = 0;
+        while ((nn = BList[j].name)) {
+            if (BList[j].RUN_MODES[0].type != BTYPE_INVALID) {
+                if (c_info->group_mode >= 0)
+                    fprintf(unit, "# (Multi-)%s\n", nn);
+                else
+                    fprintf(unit, "# %s\n", nn);
+
+                if (*(BList[j].bench_comments))
+                    fprintf(unit, "#     Comments on this Benchmark:\n");
 
-	fprintf(unit,"\n# List of Benchmarks to run:\n\n");
-
-	j=0;
-	while((nn=BList[j].name))
-	{
-	    if( BList[j].RUN_MODES[0].type != BTYPE_INVALID )
-	    {
-		if( c_info->group_mode >= 0 )
-		    fprintf(unit,"# (Multi-)%s\n",nn);
-		else
-		    fprintf(unit,"# %s\n",nn);
-
-		if ( *(BList[j].bench_comments) )
-		    fprintf(unit,"#     Comments on this Benchmark:\n");
-
-		while ( (cmt = *(BList[j].bench_comments++)) )
-		    fprintf(unit,"#     %s\n",cmt);
-	    }
-	    j++;
-	}
+                while ((cmt = *(BList[j].bench_comments++)))
+                    fprintf(unit, "#     %s\n", cmt);
+            }
+            j++;
+        }
     }
 
 }
diff --git a/src/IMB_benchmark.h b/src_c/IMB_benchmark.h
similarity index 83%
rename from src/IMB_benchmark.h
rename to src_c/IMB_benchmark.h
index 96298a8a..b019c79e 100644
--- a/src/IMB_benchmark.h
+++ b/src_c/IMB_benchmark.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -69,8 +68,7 @@ For more documentation than found here, see
 
 /* Classification of benchmarks */
 
-typedef enum 
-{ 
+typedef enum {
     BTYPE_INVALID=-1,
     SingleTransfer,
     ParallelTransfer,
@@ -82,16 +80,14 @@ typedef enum
 } BTYPES;
 
 
-typedef struct cmode 
-{
+typedef struct cmode {
     int AGGREGATE ;   /* -1/0/1 for default/NON AGGREGATE/AGGREGATE */
     int NONBLOCKING  ;   /* 0/1 for no/yes */
     int BIDIR;           /* 0/1 for no/yes */
-    BTYPES type;      
+    BTYPES type;
 } *MODES;
 
-typedef enum
-{
+typedef enum {
     imode_off         = 0,
     imode_dynamic     = 1,
     imode_multiple_np = 2,
@@ -100,13 +96,12 @@ typedef enum
 } IMODE;
 
 /* IMB 3.1 << */
-struct iter_schedule
-{
+struct iter_schedule {
     int     msgspersample, msgs_nonaggr, overall_vol;
     /* evtl override for default parameters MSGSPERSAMPLE, MSGS_NONAGGR, OVERALL_VOL */
 
     int     n_sample, n_sample_prev;
-    
+
     /* dynamic adaptation eventually */
     IMODE   iter_policy;   /* enum for request */
     int*    numiters;      /* #iterations in case of -msglen request */
@@ -131,14 +126,13 @@ typedef enum { put, get, no } DIRECTION;
 
 #ifdef MPIIO
 
-typedef enum 
-{ 
-    nothing=-1,
-    private, 
-    explicit, 
-    indv_block, 
-    indv_cyclic, 
-    shared 
+typedef enum {
+    nothing = -1,
+    priv,
+    explic,
+    indv_block,
+    indv_cyclic,
+    shared
 } POSITIONING;
 #else
 
@@ -154,34 +148,32 @@ typedef int POSITIONING;
 #define SAMPLE_FAILED_TIME_OUT      (-111113)
 /* >> IMB 3.1  */
 
-struct Bench
-{
+struct Bench {
     char*  name;
-    char** bench_comments;       				/* Comments from header => Comments.h */
+    char** bench_comments;                                  /* Comments from header => Comments.h */
 
     DIRECTION access;
 
-    int reduction;               				/* reduction-type y/n */
+    int reduction;                                          /* reduction-type y/n */
 
     int N_Modes;
     struct cmode RUN_MODES[X_MODES];
 
-    void (*Benchmark)(struct comm_info* c_info,int size,	/* Pointer to function runnning the benchmark */
+    void (*Benchmark)(struct comm_info* c_info,int size,    /* Pointer to function runnning the benchmark */
 /* IMB 3.1 << */
-		      struct iter_schedule* ITERATIONS,MODES RUN_MODE,double* time);
+                      struct iter_schedule* ITERATIONS,MODES RUN_MODE,double* time);
 /* >> IMB 3.1  */
-                              
 
-    double 	scale_time, scale_bw;				/* Scaling of timings and bandwidth */
-    int		Ntimes;
+    double  scale_time, scale_bw;                           /* Scaling of timings and bandwidth */
+    int     Ntimes;
 /* IMB 3.1 << */
-    int		sample_failure;
-// only for -DCHECK purposes: 
-    int 	success;
+    int     sample_failure;
+// only for -DCHECK purposes:
+    int     success;
 /* >> IMB 3.1  */
 
 #ifdef MPI1
-    int		select_source;	/* to distinguish PingPong/PingPing with and without MPI_ANY_SOURCE*/
+    int     select_source;                                  /* to distinguish PingPong/PingPing with and without MPI_ANY_SOURCE*/
 #endif
 
 #ifdef MPIIO
@@ -190,7 +182,7 @@ struct Bench
 };
 /* IMB_3.0 */
 
-#define LIST_END 	-2
-#define LIST_INVALID 	-1
+#define LIST_END        -2
+#define LIST_INVALID    -1
 
 #endif
diff --git a/src/IMB_bnames_ext.h b/src_c/IMB_bnames_ext.h
similarity index 89%
rename from src/IMB_bnames_ext.h
rename to src_c/IMB_bnames_ext.h
index 3fcfabfa..2bafcdf5 100644
--- a/src/IMB_bnames_ext.h
+++ b/src_c/IMB_bnames_ext.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -66,22 +65,22 @@ For more documentation than found here, see
 
 /* NAMES OF BENCHMARKS (DEFAULT CASE)*/
 char *DEFC[] = {
-   "Window",
-   "Unidir_Get",
-   "Unidir_Put",
-   "Bidir_Get",
-   "Bidir_Put",
-   "Accumulate"
+    "Window",
+    "Unidir_Get",
+    "Unidir_Put",
+    "Bidir_Get",
+    "Bidir_Put",
+    "Accumulate"
 };
 
 
 /* NAMES OF BENCHMARKS (ALL CASE)*/
 char *ALLC[] = {
-   "Window",
-   "Unidir_Get",
-   "Unidir_Put",
-   "Bidir_Get",
-   "Bidir_Put",
-   "Accumulate"
+    "Window",
+    "Unidir_Get",
+    "Unidir_Put",
+    "Bidir_Get",
+    "Bidir_Put",
+    "Accumulate"
 };
 
diff --git a/src/IMB_bnames_io.h b/src_c/IMB_bnames_io.h
similarity index 66%
rename from src/IMB_bnames_io.h
rename to src_c/IMB_bnames_io.h
index d43b6bbe..d199e53e 100644
--- a/src/IMB_bnames_io.h
+++ b/src_c/IMB_bnames_io.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -66,82 +65,82 @@ For more documentation than found here, see
 
 /* NAMES OF BENCHMARKS (RUNNING BY DEFAULT)*/
 char *DEFC[] = {
-   "S_Write_Indv",
-   "S_IWrite_Indv",
-   "S_Write_Expl",
-   "S_IWrite_Expl",
-   "P_Write_Indv",
-   "P_IWrite_Indv",
-   "P_Write_Shared",
-   "P_IWrite_Shared",
-   "P_Write_Priv",
-   "P_IWrite_Priv",
-   "P_Write_Expl",
-   "P_IWrite_Expl",
-   "C_Write_Indv",
-   "C_IWrite_Indv",
-   "C_Write_Shared",
-   "C_IWrite_Shared",
-   "C_Write_Expl",
-   "C_IWrite_Expl",
-   "S_Read_Indv",
-   "S_IRead_Indv",
-   "S_Read_Expl",
-   "S_IRead_Expl",
-   "P_Read_Indv",
-   "P_IRead_Indv",
-   "P_Read_Shared",
-   "P_IRead_Shared",
-   "P_Read_Priv",
-   "P_IRead_Priv",
-   "P_Read_Expl",
-   "P_IRead_Expl",
-   "C_Read_Indv",
-   "C_IRead_Indv",
-   "C_Read_Shared",
-   "C_IRead_Shared",
-   "C_Read_Expl",
-   "C_IRead_Expl",
-   "Open_Close"
+    "S_Write_Indv",
+    "S_IWrite_Indv",
+    "S_Write_Expl",
+    "S_IWrite_Expl",
+    "P_Write_Indv",
+    "P_IWrite_Indv",
+    "P_Write_Shared",
+    "P_IWrite_Shared",
+    "P_Write_Priv",
+    "P_IWrite_Priv",
+    "P_Write_Expl",
+    "P_IWrite_Expl",
+    "C_Write_Indv",
+    "C_IWrite_Indv",
+    "C_Write_Shared",
+    "C_IWrite_Shared",
+    "C_Write_Expl",
+    "C_IWrite_Expl",
+    "S_Read_Indv",
+    "S_IRead_Indv",
+    "S_Read_Expl",
+    "S_IRead_Expl",
+    "P_Read_Indv",
+    "P_IRead_Indv",
+    "P_Read_Shared",
+    "P_IRead_Shared",
+    "P_Read_Priv",
+    "P_IRead_Priv",
+    "P_Read_Expl",
+    "P_IRead_Expl",
+    "C_Read_Indv",
+    "C_IRead_Indv",
+    "C_Read_Shared",
+    "C_IRead_Shared",
+    "C_Read_Expl",
+    "C_IRead_Expl",
+    "Open_Close"
 };
 
 /* NAMES OF BENCHMARKS (ALL POSSIBLE CASES)*/
 char *ALLC[] = {
-   "S_Write_Indv",
-   "S_IWrite_Indv",
-   "S_Write_Expl",
-   "S_IWrite_Expl",
-   "P_Write_Indv",
-   "P_IWrite_Indv",
-   "P_Write_Shared",
-   "P_IWrite_Shared",
-   "P_Write_Priv",
-   "P_IWrite_Priv",
-   "P_Write_Expl",
-   "P_IWrite_Expl",
-   "C_Write_Indv",
-   "C_IWrite_Indv",
-   "C_Write_Shared",
-   "C_IWrite_Shared",
-   "C_Write_Expl",
-   "C_IWrite_Expl",
-   "S_Read_Indv",
-   "S_IRead_Indv",
-   "S_Read_Expl",
-   "S_IRead_Expl",
-   "P_Read_Indv",
-   "P_IRead_Indv",
-   "P_Read_Shared",
-   "P_IRead_Shared",
-   "P_Read_Priv",
-   "P_IRead_Priv",
-   "P_Read_Expl",
-   "P_IRead_Expl",
-   "C_Read_Indv",
-   "C_IRead_Indv",
-   "C_Read_Shared",
-   "C_IRead_Shared",
-   "C_Read_Expl",
-   "C_IRead_Expl",
-   "Open_Close"
+    "S_Write_Indv",
+    "S_IWrite_Indv",
+    "S_Write_Expl",
+    "S_IWrite_Expl",
+    "P_Write_Indv",
+    "P_IWrite_Indv",
+    "P_Write_Shared",
+    "P_IWrite_Shared",
+    "P_Write_Priv",
+    "P_IWrite_Priv",
+    "P_Write_Expl",
+    "P_IWrite_Expl",
+    "C_Write_Indv",
+    "C_IWrite_Indv",
+    "C_Write_Shared",
+    "C_IWrite_Shared",
+    "C_Write_Expl",
+    "C_IWrite_Expl",
+    "S_Read_Indv",
+    "S_IRead_Indv",
+    "S_Read_Expl",
+    "S_IRead_Expl",
+    "P_Read_Indv",
+    "P_IRead_Indv",
+    "P_Read_Shared",
+    "P_IRead_Shared",
+    "P_Read_Priv",
+    "P_IRead_Priv",
+    "P_Read_Expl",
+    "P_IRead_Expl",
+    "C_Read_Indv",
+    "C_IRead_Indv",
+    "C_Read_Shared",
+    "C_IRead_Shared",
+    "C_Read_Expl",
+    "C_IRead_Expl",
+    "Open_Close"
 };
diff --git a/src/IMB_bnames_mpi1.h b/src_c/IMB_bnames_mpi1.h
similarity index 79%
rename from src/IMB_bnames_mpi1.h
rename to src_c/IMB_bnames_mpi1.h
index 1d920976..deb62401 100644
--- a/src/IMB_bnames_mpi1.h
+++ b/src_c/IMB_bnames_mpi1.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -66,49 +65,49 @@ For more documentation than found here, see
 
 /* NAMES OF BENCHMARKS (DEFAULT CASE)*/
 char *DEFC[] = {
-   "PingPong" 
-  ,"PingPing" 
-  ,"Sendrecv"
-  ,"Exchange" 
-  ,"Allreduce" 
-  ,"Reduce" 
-  ,"Reduce_scatter" 
-  ,"Allgather" 
-  ,"Allgatherv" 
-  ,"Gather" 
-  ,"Gatherv" 
-  ,"Scatter" 
-  ,"Scatterv" 
-  ,"Alltoall"
-  ,"Alltoallv"
-  ,"Bcast" 
-  ,"Barrier" 
+    "PingPong",
+    "PingPing",
+    "Sendrecv",
+    "Exchange",
+    "Allreduce",
+    "Reduce",
+    "Reduce_scatter",
+    "Allgather",
+    "Allgatherv",
+    "Gather",
+    "Gatherv",
+    "Scatter",
+    "Scatterv",
+    "Alltoall",
+    "Alltoallv",
+    "Bcast",
+    "Barrier"
 };
 
 /* NAMES OF BENCHMARKS (ALL CASE)*/
 char *ALLC[] = {
-   "PingPongSpecificSource"
-  ,"PingPongAnySource"
-  ,"PingPingSpecificSource"
-  ,"PingPingAnySource"
-  ,"PingPong" 
-  ,"PingPing" 
-  ,"Sendrecv"
-  ,"Exchange" 
-  ,"Allreduce" 
-  ,"Reduce" 
-  ,"Reduce_scatter" 
-  ,"Allgather" 
-  ,"Allgatherv" 
-  ,"Gather" 
-  ,"Gatherv" 
-  ,"Scatter" 
-  ,"Scatterv" 
-  ,"Alltoall"
-  ,"Alltoallv"
-  ,"Bcast" 
-  ,"Barrier" 
-  ,"Uniband" 
-  ,"Biband" 
+    "PingPongSpecificSource",
+    "PingPongAnySource",
+    "PingPingSpecificSource",
+    "PingPingAnySource",
+    "PingPong",
+    "PingPing",
+    "Sendrecv",
+    "Exchange",
+    "Allreduce",
+    "Reduce",
+    "Reduce_scatter",
+    "Allgather",
+    "Allgatherv",
+    "Gather",
+    "Gatherv",
+    "Scatter",
+    "Scatterv",
+    "Alltoall",
+    "Alltoallv",
+    "Bcast",
+    "Barrier",
+    "Uniband",
+    "Biband"
 };
 
diff --git a/src/IMB_bnames_nbc.h b/src_c/IMB_bnames_nbc.h
similarity index 79%
rename from src/IMB_bnames_nbc.h
rename to src_c/IMB_bnames_nbc.h
index 40b3d66c..2bbfe207 100644
--- a/src/IMB_bnames_nbc.h
+++ b/src_c/IMB_bnames_nbc.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -66,48 +65,48 @@ For more documentation than found here, see
 
 /* NAMES OF BENCHMARKS (DEFAULT CASE)*/
 char *DEFC[] = {
-   "Ibcast"
-  ,"Iallgather"
-  ,"Iallgatherv"
-  ,"Igather"
-  ,"Igatherv"
-  ,"Iscatter"
-  ,"Iscatterv"
-  ,"Ialltoall"
-  ,"Ialltoallv"
-  ,"Ireduce"
-  ,"Ireduce_scatter"
-  ,"Iallreduce"
-  ,"Ibarrier"
+    "Ibcast",
+    "Iallgather",
+    "Iallgatherv",
+    "Igather",
+    "Igatherv",
+    "Iscatter",
+    "Iscatterv",
+    "Ialltoall",
+    "Ialltoallv",
+    "Ireduce",
+    "Ireduce_scatter",
+    "Iallreduce",
+    "Ibarrier"
 };
 
 /* NAMES OF BENCHMARKS (ALL CASE)*/
 char *ALLC[] = {
-   "Ibcast"
-  ,"Ibcast_pure"
-  ,"Iallgather"
-  ,"Iallgather_pure"
-  ,"Iallgatherv"
-  ,"Iallgatherv_pure"
-  ,"Igather"
-  ,"Igather_pure"
-  ,"Igatherv"
-  ,"Igatherv_pure"
-  ,"Iscatter"
-  ,"Iscatter_pure"
-  ,"Iscatterv"
-  ,"Iscatterv_pure"
-  ,"Ialltoall"
-  ,"Ialltoall_pure"
-  ,"Ialltoallv"
-  ,"Ialltoallv_pure"
-  ,"Ireduce"
-  ,"Ireduce_pure"
-  ,"Ireduce_scatter"
-  ,"Ireduce_scatter_pure"
-  ,"Iallreduce"
-  ,"Iallreduce_pure"
-  ,"Ibarrier"
-  ,"Ibarrier_pure"
+    "Ibcast",
+    "Ibcast_pure",
+    "Iallgather",
+    "Iallgather_pure",
+    "Iallgatherv",
+    "Iallgatherv_pure",
+    "Igather",
+    "Igather_pure",
+    "Igatherv",
+    "Igatherv_pure",
+    "Iscatter",
+    "Iscatter_pure",
+    "Iscatterv",
+    "Iscatterv_pure",
+    "Ialltoall",
+    "Ialltoall_pure",
+    "Ialltoallv",
+    "Ialltoallv_pure",
+    "Ireduce",
+    "Ireduce_pure",
+    "Ireduce_scatter",
+    "Ireduce_scatter_pure",
+    "Iallreduce",
+    "Iallreduce_pure",
+    "Ibarrier",
+    "Ibarrier_pure"
 };
 
diff --git a/src/IMB_bnames_rma.h b/src_c/IMB_bnames_rma.h
similarity index 77%
rename from src/IMB_bnames_rma.h
rename to src_c/IMB_bnames_rma.h
index 5813f5c9..1f435315 100644
--- a/src/IMB_bnames_rma.h
+++ b/src_c/IMB_bnames_rma.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -66,50 +65,47 @@ For more documentation than found here, see
 
 /* NAMES OF BENCHMARKS (DEFAULT CASE)*/
 char *DEFC[] = {
-   "Unidir_put",
-   "Unidir_get",
-   "Bidir_put",
-   "Bidir_get",
-   "One_put_all",
-   "One_get_all",
-   "All_put_all",
-   "All_get_all",
-   "Put_local",
-   "Put_all_local",
-   "Exchange_put",
-   "Exchange_get",
-   "Accumulate",
-   "Get_accumulate",
-   "Fetch_and_op",
-   "Compare_and_swap",
-   "Truly_passive_put"
+    "Unidir_put",
+    "Unidir_get",
+    "Bidir_put",
+    "Bidir_get",
+    "One_put_all",
+    "One_get_all",
+    "All_put_all",
+    "All_get_all",
+    "Put_local",
+    "Put_all_local",
+    "Exchange_put",
+    "Exchange_get",
+    "Accumulate",
+    "Get_accumulate",
+    "Fetch_and_op",
+    "Compare_and_swap",
+    "Truly_passive_put"
 };
 
 
-/* Get_local and Get_all_local are not included to the def 
+/* Get_local and Get_all_local are not included to the def
  * case, because they are supposed to be very similar to
  * Unidir_get and One_get_all correspondingly */
 char *ALLC[] = {
-   "Unidir_put",
-   "Unidir_get",
-   "Bidir_put",
-   "Bidir_get",
-   "One_put_all",
-   "One_get_all",
-   "All_put_all",
-   "All_get_all",
-   "Put_local",
-   "Get_local",      
-   "Put_all_local",
-   "Get_all_local",
-   "Exchange_put",
-   "Exchange_get",
-   "Accumulate",
-   "Get_accumulate",
-   "Fetch_and_op",
-   "Compare_and_swap",
-   "Truly_passive_put"
+    "Unidir_put",
+    "Unidir_get",
+    "Bidir_put",
+    "Bidir_get",
+    "One_put_all",
+    "One_get_all",
+    "All_put_all",
+    "All_get_all",
+    "Put_local",
+    "Get_local",
+    "Put_all_local",
+    "Get_all_local",
+    "Exchange_put",
+    "Exchange_get",
+    "Accumulate",
+    "Get_accumulate",
+    "Fetch_and_op",
+    "Compare_and_swap",
+    "Truly_passive_put"
 };
-
-
-
diff --git a/src_c/IMB_cache.h b/src_c/IMB_cache.h
new file mode 100644
index 00000000..97dc007a
--- /dev/null
+++ b/src_c/IMB_cache.h
@@ -0,0 +1,52 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#define CACHE_SIZE 1024
+#define CACHE_LINE_SIZE 64
diff --git a/src_c/IMB_chk_diff.c b/src_c/IMB_chk_diff.c
new file mode 100644
index 00000000..c6897eb9
--- /dev/null
+++ b/src_c/IMB_chk_diff.c
@@ -0,0 +1,1348 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+
+ A collection of auxiliary functions ONLY for the CHECK mode of IMB
+ ===================================================================
+
+ File: IMB_chk_diff.c 
+
+ Implemented functions: 
+
+ IMB_chk_dadd;
+ IMB_ddiff;
+ IMB_show;
+ IMB_err_msg;
+ IMB_chk_diff;
+ IMB_cmp_cat;
+ IMB_chk_contiguous;
+ IMB_chk_distr;
+ IMB_chk_contained;
+ IMB_compute_crc;
+
+ ***************************************************************************/
+
+
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+
+#include "IMB_prototypes.h"
+
+#include <limits.h>
+
+
+void IMB_chk_dadd(void* AUX, int Locsize, size_t buf_pos,
+                  int rank0, int rank1) {
+/*
+
+                      Auxiliary, only for checking mode;
+                      Creates reference accumulated values in a given
+                      buffer section, accumulated over given processor ranks
+
+Input variables:
+
+-Locsize              (type int)
+                      Size of buffer section to check
+
+
+-buf_pos              (type int)
+                      Start position of buffer section
+
+
+-rank0                (type int)
+-rank1                (type int)
+                      Process' values between rank0 and rank1 are accumulated
+
+
+
+In/out variables:
+
+-AUX                  (type void*)
+                      Contains accumulated values
+
+*/
+    /* Sum up all process' data buffers in certain window */
+    int  rank;
+    size_t i;
+
+    for (i = 0; i < Locsize / asize; i++)
+        ((assign_type*)AUX)[i] = 0.;
+
+    for (rank = rank0; rank <= rank1; rank++) {
+        for (i = 0; i < Locsize / asize; i++)
+            ((assign_type*)AUX)[i] += BUF_VALUE(rank, buf_pos / asize + i);
+    }
+}
+
+
+double IMB_ddiff(assign_type *A, assign_type *B, size_t len,
+                 size_t *fault_pos) {
+/*
+
+                          Compares the values of 2 buffers A, B and returns max. diff
+
+Input variables:
+
+-A                        (type assign_type *)
+                          Buffer of values
+
+-B                        (type assign_type *)
+                          Another buffer of values to be checked against A
+
+-len                      (type int)
+                          Length (in assign_type items) of A, B
+
+Output variables:
+
+-fault_pos                (type int *)
+                          Position of first non tolerable deviation
+
+Return value              (type double)
+                          Deviation of A and B
+
+*/
+    /* max. relative difference of vectors A/B */
+    double D, d1, rel;
+    size_t i;
+
+    D = 0.;
+
+    d1 = -1.;
+    *fault_pos = CHK_NO_FAULT;
+
+    if (len > 0) {
+        for (i = 0; i < len && d1 <= TOL; i++) {
+            if (A[i] != 0.)
+                rel = A_ABS(A[i]);
+            else
+                rel = 1.;
+            d1 = A_ABS(A[i] - B[i]) / rel;
+        }
+        D = d1;
+
+        if (D > TOL) {
+            D = 1.;
+            IMB_Assert(i > 0);
+            *fault_pos = (i - 1)*asize;
+        }
+    }
+    return D;
+}
+
+
+void IMB_show(char* text, struct comm_info* c_info, void* buf,
+              size_t loclen, size_t totlen, int j_sample,
+              POSITIONING fpos) {
+/*
+
+                      Shows an excerpt of erroneous buffer if occurs in check mode
+
+Input variables:
+
+-text                 (type char*)
+                      Accompanying text to put out
+
+-loclen               (type int)
+                      Local length of buffer
+
+-totlen               (type int)
+                      Total length of buffer (for gathered or shared access buffers)
+
+-j_sample             (type int)
+                      Number of sample the error occurred
+
+-fpos                 (type POSITIONING)
+                      File positionning (if relevant)
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+-buf                  (type void*)
+                      Given check buffer
+
+*/
+    size_t i;
+
+    fprintf(unit, "Process %d: %s", c_info->rank, text);
+    fprintf(unit, "\n");
+
+#ifdef DEBUG
+    size_t j;
+    fprintf(dbg_file, "Process %d: %s", c_info->rank, text);
+    fprintf(dbg_file, "\n");
+#endif /*DEBUG*/
+
+#ifdef MPIIO
+    {
+        MPI_Offset Offset;
+        switch (fpos) {
+            case indv_block:
+                Offset = (MPI_Offset)(j_sample * totlen);
+                break;
+
+            case explic:
+                Offset = c_info->split.Offset + (MPI_Offset)(j_sample * totlen);
+                break;
+
+            case priv:
+                Offset = (MPI_Offset)(j_sample * loclen);
+                break;
+
+            case shared:
+                Offset = (MPI_Offset)(-1);
+                break;
+        }
+
+        if (fpos == shared)
+            fprintf(unit,
+#ifdef WIN_IMB
+            "Overall size = %I64u,"
+            " Portion = %I64u,"
+#else
+            "Overall size = %lu,"
+            " Portion = %lu,"
+#endif /* WIN_IMB*/
+            " #sample= %d\n",
+            totlen, loclen, j_sample);
+        else if (fpos != -1)
+            fprintf(unit,
+#ifdef WIN_IMB
+            "Overall size = %I64u,"
+            " Portion = %I64u,"
+#else
+            "Overall size = %lu,"
+            " Portion = %lu,"
+#endif /* WIN_IMB*/
+            " Startpos = %ld\n",
+            totlen, loclen, (long)Offset);
+
+#ifdef DEBUG
+        if (fpos == shared)
+            fprintf(dbg_file,
+#ifdef WIN_IMB
+            "Overall size = %I64u,"
+            " Portion = %I64u,"
+#else
+            "Overall size = %lu,"
+            " Portion = %lu,"
+#endif /*WIN_IMB*/
+            " #sample= %d\n",
+            totlen, loclen, j_sample);
+        else if (fpos != -1)
+            fprintf(dbg_file,
+#ifdef WIN_IMB
+            "Overall size = %I64u,"
+            " Portion = %I64u,"
+#else
+            "Overall size = %lu,"
+            " Portion = %lu,"
+#endif /*WIN_IMB*/
+            " Startpos = %ld\n",
+            totlen, loclen, (long)Offset);
+#endif /*DEBUG*/
+
+    }
+#endif /*MPIIO*/
+
+    if (loclen < asize) {
+        if (loclen == 0) {
+            fprintf(unit, "Buffer empty\n");
+#ifdef DEBUG
+            fprintf(dbg_file, "Buffer empty\n");
+#endif
+        } else {
+            fprintf(unit, "Buffer in bytewise int representation: ");
+
+            for (i = 0; i < loclen; i++)
+                fprintf(unit, "%d ", ((char*)buf)[i]);
+
+            fprintf(unit, "\n");
+#ifdef DEBUG
+            fprintf(dbg_file, "Buffer in bytewise int representation: ");
+
+            for (i = 0; i < loclen; i++)
+                fprintf(dbg_file, "%d ", ((char*)buf)[i]);
+            fprintf(dbg_file, "\n");
+#endif /*DEBUG*/
+        }
+    } else {
+        if (loclen >= 2 * asize)
+            fprintf(unit, "Buffer, 1st and last entry: ");
+        else
+            fprintf(unit, "Buffer entry: ");
+
+#ifdef BUFFERS_INT
+        fprintf(unit, "%d ", ((assign_type*)buf)[0]);
+        if (loclen >= 2 * asize)
+            fprintf(unit, "%d ", ((assign_type*)buf)[loclen / asize - 1]);
+#endif
+
+#ifdef BUFFERS_FLOAT
+        fprintf(unit, "%f ", ((assign_type*)buf)[0]);
+        if (loclen >= 2 * asize)
+            fprintf(unit, "%f ", ((assign_type*)buf)[loclen / asize - 1]);
+#endif
+
+        fprintf(unit, "\n");
+
+#ifdef DEBUG
+        if (err_flag)
+            for (j = 0; j < loclen / asize; j += 5) {
+#ifdef BUFFERS_INT
+                for (i = j; i < min(loclen / asize, j + 5); i++)
+                    fprintf(dbg_file, "%d ", ((assign_type*)buf)[i]);
+#endif /*BUFFERS_INT*/
+#ifdef BUFFERS_FLOAT
+                for (i = j; i < min(loclen / asize, j + 5); i++)
+                    fprintf(dbg_file, "%f ", ((assign_type*)buf)[i]);
+#endif /*BUFFERS_FLOAT*/
+                fprintf(dbg_file, "\n");
+            } /*for*/*/
+#endif /*DEBUG*/
+    }
+    fflush(unit);
+
+#ifdef DEBUG
+    fflush(dbg_file);
+#endif
+
+}
+
+void IMB_err_msg(struct comm_info* c_info, char* text, size_t totsize,
+                 int j_sample) {
+/*
+
+                      Outputs an brief error diagnostics if occurs
+
+Input variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+-text                 (type char*)
+                      Accompanying text
+
+-totsize              (type int)
+                      Size of the erroneous buffer
+
+-j_sample             (type int)
+                      Number of sample the error occured in
+
+*/
+    fprintf(unit, "%d: Error %s,"
+#ifdef WIN_IMB
+        "size = %I64u,"
+#else
+        "size = %lu,"
+#endif
+        "sample #%d\n",
+        c_info->rank, text, totsize, j_sample);
+}
+
+#ifdef CHECK
+
+void IMB_chk_diff(char* text, struct comm_info* c_info, void* RECEIVED,
+                  size_t buf_pos, int Locsize, size_t Totalsize,
+                  int unit_size, DIRECTION mode, POSITIONING fpos,
+                  int n_sample, int j_sample, int source,
+                  double* diff) {
+/*
+
+                          Checks a received buffer against expected ref values
+
+Input variables:
+
+-text                     (type char*)
+                          Accompanying text
+
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+
+-RECEIVED                 (type void*)
+                          The buffer to be checked
+
+
+-buf_pos                  (type int)
+                          Beginning position (in units -> unit_size)
+
+
+-Locsize                  (type int)
+                          Local buffer size
+
+
+-Totalsize                (type int)
+                          Total buffer size (in case of gathered buffers)
+
+
+-unit_size                (type int)
+                          Base unit for positioning
+
+
+-mode                     (type DIRECTION)
+                          Direction of the action that took place
+
+
+-fpos                     (type POSITIONING)
+                          File positioning of the action that took place (if relevant)
+
+
+-n_sample                 (type int)
+                          # overall samples
+
+
+-j_sample                 (type int)
+                          current sample
+
+
+-source                   (type int)
+                          Sending process (if relevant)
+
+
+
+Output variables:
+
+-diff                     (type double*)
+                          The error against expected values
+
+*/
+#ifdef MPIIO
+    MPI_File    restore;
+    MPI_Status  stat;
+    double      def_tmp;
+    int         j, j1, j2, ierr, rank, allpos;
+    size_t      pos1, pos2;
+    int*        rankj;
+    size_t*     lenj;
+#endif
+
+    double defloc;
+
+    size_t faultpos, pos;
+
+    int    *all_ranks, Npos;
+    size_t *lengths;
+
+    if (err_flag) return;
+
+    defloc = 0.;
+    faultpos = CHK_NO_FAULT;
+
+    if (Totalsize == 0) {
+        *diff = 0.;
+        return;
+    }
+
+#ifdef MPIIO
+
+    MPI_Barrier(c_info->File_comm);
+
+    if (mode == put) {
+        if (c_info->File_rank == 0) {
+
+            IMB_alloc_buf(c_info, "Write check", 0, Totalsize);
+
+            ierr = MPI_File_open(MPI_COMM_SELF, c_info->filename,
+                                 c_info->amode, c_info->info, &restore);
+            IMB_err_hand(1, ierr);
+
+            if (source == -3) {
+                IMB_i_alloc(size_t, lengths, c_info->File_num_procs*n_sample + n_sample - 1, "chk_diff 1");
+                IMB_i_alloc(int, all_ranks, c_info->File_num_procs*n_sample + n_sample - 1, "chk_diff 2");
+            } else {
+                IMB_i_alloc(size_t, lengths, c_info->File_num_procs, "chk_diff 1a");
+                IMB_i_alloc(int, all_ranks, c_info->File_num_procs, "chk_diff 2a");
+            }
+
+            lenj  = lengths;
+            rankj = all_ranks;
+            allpos = 0;
+
+            if (j_sample < 0) {
+                j1 = 0;
+                j2 = n_sample - 1;
+            } else {
+                j1 = j_sample;
+                j2 = j_sample;
+            }
+
+            ierr = MPI_File_seek(restore, (MPI_Offset)(j1*Totalsize), MPI_SEEK_SET);
+            MPI_ERRHAND(ierr);
+
+            for (j = j1; j <= j2 && faultpos == CHK_NO_FAULT /*faultpos<0*/; j++) {
+                IMB_Assert(Totalsize <= INT_MAX);
+
+                ierr = MPI_File_read(restore, c_info->r_buffer,
+                                    (int)Totalsize, c_info->etype, &stat);
+
+                MPI_ERRHAND(ierr);
+                RECEIVED = c_info->r_buffer;
+
+                if (source == -3) {
+                    IMB_cmp_cat(c_info, RECEIVED, Totalsize, buf_pos, unit_size, 1,
+                                lenj, rankj, &Npos, &faultpos, &def_tmp);
+
+                    lenj += Npos;
+                    rankj += Npos;
+                    allpos += Npos;
+                } else {
+                    IMB_cmp_cat(c_info, RECEIVED, Totalsize, buf_pos, unit_size, 0,
+                                lenj, rankj, &Npos, &faultpos, &def_tmp);
+                }
+
+                defloc = max(defloc, def_tmp);
+
+            } /*for( j=j...*/
+
+            MPI_File_close(&restore);
+
+
+            j_sample = j - 1;
+
+            IMB_free_aux();
+
+            if ( /*faultpos >= 0*/ faultpos != CHK_NO_FAULT) {
+
+                IMB_err_msg(c_info, text, Totalsize, j_sample);
+                fprintf(unit,
+                    "Error: restored buffer from output file, invalid portion starting at pos."
+#ifdef WIN_IMB
+                    "  %I64u\n",
+#else
+                    "  %lu\n",
+#endif
+                    (j_sample*Totalsize) + faultpos);
+
+                AUX = (void*)(((char*)RECEIVED) + faultpos);
+                IMB_show("Erroneous data:", c_info, AUX, Totalsize - faultpos, Totalsize - faultpos, j_sample, nothing);
+
+            } else {
+                if (source == -3) {
+                    IMB_chk_distr(c_info, Totalsize, n_sample, lengths, all_ranks, allpos, &def_tmp);
+
+                    if (def_tmp > 0.) {
+                        IMB_err_msg(c_info, text, Totalsize, j_sample);
+                        IMB_show("restored buffer from output file, has permuted data: ",
+                                 c_info, RECEIVED, Totalsize, Totalsize, j_sample, nothing);
+                    }
+                }
+
+                defloc = max(defloc, def_tmp);
+
+            } /*if( faultpos >= 0 ) */
+
+            IMB_del_r_buf(c_info);
+            IMB_v_free((void**)&lengths);
+            IMB_v_free((void**)&all_ranks);
+        } /*if( c_info -> File_rank == 0 )*/
+
+        fflush(unit);
+    }   /*if( mode == put )*/
+
+    if (mode == get) {
+        size_t file_pos;
+        file_pos = j_sample * Totalsize;
+
+        *diff = 0.;
+
+        IMB_alloc_aux(Totalsize, " chk_diff 5");
+        IMB_init_file_content(AUX, file_pos, file_pos + Totalsize - 1);
+
+        IMB_chk_contained(RECEIVED, Locsize, AUX, Totalsize, &pos, &faultpos, &def_tmp,
+                          "Compare received portion with file Content");
+
+        IMB_get_rank_portion(c_info->File_rank, c_info->File_num_procs, Totalsize, asize,
+                             &pos1, &pos2);
+
+        if ( /*faultpos >= 0*/ faultpos != CHK_NO_FAULT) { /* the type of faultpos is changed to size_t*/
+            err_flag = 1;
+            defloc = 1;
+            IMB_err_msg(c_info, text, Totalsize, j_sample);
+            RECEIVED = (void*)((char*)RECEIVED + faultpos);
+
+            fprintf(unit,
+                "File position: "
+#ifdef WIN_IMB
+                "%I64u\n",
+#else
+                "%lu\n",
+#endif
+                file_pos + pos + faultpos);
+
+            IMB_show("Read invalid portion: ", c_info, RECEIVED,
+                     Locsize - faultpos, Totalsize, j_sample, fpos);
+
+            AUX = (void*)((char*)AUX + pos + faultpos);
+
+            IMB_show("Expected portion: ", c_info, AUX,
+                     Locsize - pos - faultpos, Locsize - pos - faultpos, j_sample, nothing);
+
+            MPI_Gather(&pos, 1, MPI_UNSIGNED_LONG, c_info->rdispl, 1, MPI_INT, 0, c_info->File_comm);
+        } else {
+            if (source == -2 && Locsize > 0) {
+                IMB_get_rank_portion(c_info->File_rank, c_info->File_num_procs, Totalsize, asize,
+                                     &pos1, &pos2);
+                if (pos1 != pos)
+                    defloc = 1;
+            }
+
+            if (source == -3) {
+                /* Check permuted buffer */
+
+                MPI_Gather(&pos, 1, MPI_INT, c_info->rdispl, 1, MPI_INT, 0, c_info->File_comm);
+                MPI_Gather(&Locsize, 1, MPI_INT, c_info->reccnt, 1, MPI_INT, 0, c_info->File_comm);
+
+                if (c_info->File_rank == 0)
+                    IMB_chk_contiguous(c_info, c_info->rdispl, c_info->reccnt, &defloc);
+                else
+                    defloc = 0.;
+
+            }
+
+            if (defloc > 0.)
+                IMB_err_msg(c_info, "Wrong portion ordering in read buffer", Totalsize, j_sample);
+        } /*if( faultpos >= 0 )*/
+
+        defloc = max(defloc, def_tmp);
+    } /*if( mode == get )*/
+
+    MPI_Barrier(c_info->File_comm);
+
+#else /*not  MPIIO*/
+
+    if (source >= 0) {
+        IMB_alloc_aux(Totalsize, "chk_diff 6");
+        IMB_ass_buf(AUX, source, buf_pos,
+                    (buf_pos + Totalsize > 0) ? buf_pos + Totalsize - 1 : 0, 1);
+
+
+        if (Totalsize < asize) {
+            IMB_chk_contained(RECEIVED, Totalsize, AUX, Totalsize, &pos, &faultpos, &defloc,
+                              "Compare received with expected portion");
+
+            if ( /*faultpos>=0*/ faultpos != CHK_NO_FAULT) { /* type of faultpos is changed to size_t*/
+                faultpos = 0;
+                defloc   = 1.;
+            }
+        } else
+            defloc = IMB_ddiff((assign_type *)AUX, (assign_type *)RECEIVED, Totalsize / asize, &faultpos);
+
+    } else if (source == -1) {
+        IMB_alloc_aux(Locsize, "chk_diff 7");
+
+        IMB_chk_dadd(AUX, Locsize, buf_pos, 0, c_info->num_procs - 1);
+
+        defloc = IMB_ddiff((assign_type *)AUX, (assign_type *)RECEIVED, Locsize / asize, &faultpos);
+
+    } else {
+        if (source == -2) {
+            lengths = NULL;
+            all_ranks = NULL;
+
+            IMB_cmp_cat(c_info, RECEIVED, Totalsize, buf_pos, unit_size, 0,
+                        lengths, all_ranks, &Npos, &faultpos, &defloc);
+        }
+
+    }
+
+    if ( /*faultpos>=0*/ faultpos != CHK_NO_FAULT) { /* type of faultpos is changed to size_t*/
+        void* tmp = (void*)(((char *)RECEIVED) + faultpos);
+
+        IMB_err_msg(c_info, text, Totalsize, j_sample);
+        IMB_show("Got invalid buffer: ", c_info, tmp, asize, asize, j_sample, -1);
+
+        fprintf(unit,
+#ifdef WIN_IMB
+            "pos: %I64u\n"
+#else
+            "pos: %lu\n"
+#endif /*WIN_IMB*/
+            , faultpos);
+
+        tmp = (void*)(((char *)AUX) + faultpos);
+
+        IMB_show("Expected    buffer: ", c_info, tmp, asize, asize, j_sample, -1);
+
+        defloc = 1;
+    }
+    IMB_free_aux();
+#endif /*MPIIO*/
+
+
+    if (defloc > TOL)
+        err_flag = 1;
+
+    *diff = max(*diff, defloc);
+}
+
+
+void IMB_cmp_cat(struct comm_info *c_info, void* RECEIVED, size_t size,
+                 size_t bufpos, int unit_size, int perm,
+                 size_t* lengths, int*ranks, int* Npos,
+                 size_t *faultpos, double* diff) {
+/*
+
+                      Checks a received buffer which is a concatenation of
+                      several processes' buffers
+
+Input variables:
+
+-c_info               (type struct comm_info *)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+-RECEIVED             (type void*)
+                      The buffer to be checked
+
+-size                 (type int)
+                      Size of the buffer
+
+-bufpos               (type int)
+                      First position to check (in units -> unit_size)
+
+-unit_size            (type int)
+                      Base unit of positioning
+
+-perm                 (type int)
+                      Logical flag: 1 if the different rank's portions
+                      are potentially in non natural order (relevant for
+                      shared file accesses)
+
+Output variables:
+
+-lengths              (type int*)
+                      An array of lengths (of a number of erroneous portions)
+
+-ranks                (type int*)
+                      An array of ranks (the erroneous portions belong to)
+
+-Npos                 (type int*)
+                      Numer of erroneous portions found (=size of 'lengths' and 'ranks' arrays)
+
+-faultpos             (type int *)
+                      Position of first found fault
+
+-diff                 (type double*)
+                      Diff value
+
+*/
+    int    rank, NP;
+    size_t pos1, pos2, pos, rsize, rem_size;
+
+    int chk_ok;
+    assign_type *a, *r;
+
+    double tmp_diff;
+
+    *diff = 0.;
+    *faultpos = CHK_NO_FAULT;
+
+    if (size == 0) return;
+
+#ifdef MPIIO
+    NP = c_info->File_num_procs;
+#else
+    NP = c_info->num_procs;
+#endif
+
+    rsize = (size + asize - 1) / asize * asize;
+
+    IMB_alloc_aux(rsize, "chk_diff 8");
+
+    r = (assign_type*)AUX;
+    a = (assign_type*)RECEIVED;
+
+    chk_ok = 0;
+
+    if (perm) {
+        *Npos = 0;
+        pos = 0;
+
+        /* Check beginning of buffer */
+
+        for (rank = 0; rank < NP && !chk_ok; rank++) {
+            if (size > 0) {
+                IMB_get_rank_portion(rank, NP, size, unit_size, &pos1, &pos2);
+
+                rsize = pos2 - pos1 + 1;
+                IMB_ass_buf(AUX, rank, 0, (rsize > 0) ? rsize - 1 : 0, 1);
+            } else {
+                rsize = 0;
+                pos2 = pos1 = 0;
+            }
+
+
+
+            IMB_chk_contained(RECEIVED, min(asize, rsize), AUX, rsize,
+                              &pos, faultpos, &tmp_diff, NULL);
+
+            /*if(*faultpos < 0 && pos>=0)*/
+            /* the type of faultpos and pos is changed to size_t */
+            if (*faultpos == CHK_NO_FAULT) {
+                if (rsize <= asize)
+                    chk_ok = 1;
+                else {
+                    rem_size = rsize - pos;
+                    IMB_chk_contained((void*)(r + pos / asize), rem_size, RECEIVED, rem_size, &pos1,
+                                      faultpos, &tmp_diff, "Check of first part of received buffer");
+
+                    /*if( *faultpos < 0 && pos1>=0 )*/
+                    /* the type of faultpos and pos is changed to size_t */
+                    if (*faultpos == CHK_NO_FAULT) {
+                        lengths[*Npos] = rem_size;
+                        ranks[*Npos] = rank;
+                        pos = rem_size;
+                        (*Npos)++;
+                        chk_ok = 1;
+                    }
+                } /*if( rsize <= asize )*/
+            } /*if( *faultpos == CHK_NO_FAULT)*/
+        } /*for( rank=0...*/
+
+        if (!chk_ok) {
+            *faultpos = 0;
+            *diff     = 1.;
+        }
+
+        while ((pos < size) && chk_ok) {
+            chk_ok = 0;
+
+            for (rank = 0; rank < NP && !chk_ok; rank++) {
+                IMB_get_rank_portion(rank, NP, size, unit_size, &pos1, &pos2);
+
+                IMB_Assert(pos2 >= pos1);
+                rsize = pos2 - pos1 + 1;
+
+                /*if( rsize > 0 )*/
+                //{
+
+                rem_size = min(rsize, size - pos);
+
+                IMB_ass_buf(AUX, rank, 0, (rsize>0) ? rsize - 1 : 0, 1);
+
+                IMB_chk_contained(AUX, rem_size, (void*)(a + pos / asize), rem_size, &pos1,
+                                  faultpos, &tmp_diff, NULL);
+
+                /*if( *faultpos < 0 && pos1 >= 0 ) */
+                /* the type of faultpos and pos is changed to size_t */
+                if (*faultpos == CHK_NO_FAULT) {
+                    lengths[*Npos] = rem_size;
+                    ranks[*Npos] = rank;
+                    pos = pos + rsize;
+                    (*Npos)++;
+                    chk_ok = 1;
+                }
+                //}   /* end if(rsize>0) */
+            }   /* end for(rank..) */
+
+            if (!chk_ok) {
+                *faultpos = pos;
+                *diff     = 1;
+            }
+        } /* end while */
+    } else { /* end if(perm) */
+        size_t curr = 0;
+        void*  tmp;
+
+        for (rank = 0; rank < NP; rank++) {
+            IMB_get_rank_portion(rank, NP, size, unit_size, &pos1, &pos2);
+
+            if (pos2 >= pos1)
+                rsize = pos2 - pos1 + 1;
+            else
+                rsize = 0;
+
+            tmp = (void*)(((char*)RECEIVED) + curr);
+
+            IMB_ass_buf(AUX, rank, bufpos,
+                        (bufpos + rsize>0) ? bufpos + rsize - 1 : 0, 1);
+
+            IMB_chk_contained(AUX, rsize, tmp, rsize, &pos1, faultpos, &tmp_diff, "");
+            *diff = max(*diff, tmp_diff);
+
+            /*if(*faultpos<0 && pos1>= 0 )*/
+            /* the type of faultpos and pos is changed to size_t */
+            if (*faultpos == CHK_NO_FAULT)
+                curr += rsize;
+            else {
+                *faultpos += curr;
+                *diff = 1;
+                break;
+            }
+        } /*for( rank=0*/
+    } /* else if(!perm) */
+}
+
+
+
+
+void IMB_chk_contiguous(struct comm_info *c_info, int* rdispl, int* sizes,
+                        double*diff) {
+/*
+
+                          Checks whether arrays of displacements/sizes form a
+                          contiguous buffer
+
+Input variables:
+
+-c_info                   (type struct comm_info *)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-rdispl                   (type int*)
+                          Array of displacements (one for each process)
+
+-sizes                    (type int*)
+                          Array of sizes (one for each process)
+
+Output variables:
+
+-diff                     (type double*)
+                          0 if contiguous, 1 else
+
+*/
+    int i, j, NP, rank, p;
+
+#ifdef MPIIO
+    NP = c_info->File_num_procs;
+#else
+    NP = c_info->num_procs;
+#endif
+
+    for (i = 0; i < NP; i++) {
+        for (j = i; j < NP; j++)
+            if (rdispl[j] < rdispl[i]) {
+                p         = rdispl[i];
+                rdispl[i] = rdispl[j];
+                rdispl[j] = p;
+                p         = sizes[i];
+                sizes[i] = sizes[j];
+                sizes[j] = p;
+            }
+    }
+
+    p = 0;
+    *diff = 0.;
+
+    for (rank = 0; rank < NP; rank++) {
+        if (rdispl[rank] == p || sizes[rank] == 0)
+            p = p + sizes[rank];
+        else
+            *diff = 1.;
+    }
+
+    if (*diff > TOL) {
+        fprintf(unit, "check of contiguity of received buffer portions failed\n");
+        fprintf(unit, "Got the following portions/displacements:\n");
+
+        for (rank = 0; rank < NP; rank++)
+            fprintf(unit, "%d / %d; ", sizes[rank], rdispl[rank]);
+        fprintf(unit, "\n");
+    }
+}
+
+
+
+
+void IMB_chk_distr(struct comm_info *c_info, size_t size, int n_sample,
+                   size_t* lengths, int* ranks, int Npos,
+                   double *diff) {
+/*
+
+                      (Only for MPI-IO shared file pointer accesses)
+                      Checks whether a found set of section lengths/ranks in
+                      a file meets expectations
+
+Input variables:
+
+-c_info               (type struct comm_info *)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+
+-size                 (type int)
+                      Size of buffer
+
+
+-n_sample             (type int)
+                      Number of samples expected in file
+
+
+-lengths              (type int*)
+                      Array of section lengths found
+
+
+-ranks                (type int*)
+                      Array of ranks belonging to sections
+
+
+-Npos                 (type int)
+                      Number of sections
+
+Output variables:
+
+-diff                 (type double *)
+                      0 if set is consistent, 1 else
+
+*/
+    int i, NP, rank;
+    size_t pos1, pos2;
+
+    *diff = 0.;
+
+    if (size == 0) return;
+
+#ifdef MPIIO
+    NP = c_info->File_num_procs;
+#else
+    NP = c_info->num_procs;
+#endif
+
+    for (rank = 0; rank < NP; rank++)
+        c_info->reccnt[rank] = 0;
+
+    i = 0;
+    while (i < Npos) {
+        rank = ranks[i];
+
+        IMB_get_rank_portion(rank, NP, size, asize, &pos1, &pos2);
+
+        if (pos2 - pos1 + 1 == lengths[i])
+            c_info->reccnt[rank]++;
+        else if (i < Npos) {
+            if (ranks[i + 1] == rank && pos2 - pos1 + 1 == (lengths[i] + (lengths[i + 1])))
+                c_info->reccnt[rank]++; i++;
+        }
+
+        i++;
+    } /*while*/
+
+    for (rank = 0; rank < NP; rank++) {
+        IMB_get_rank_portion(rank, NP, size, asize, &pos1, &pos2);
+        if (pos2 >= pos1 && c_info->reccnt[rank] != n_sample)
+            *diff = 1;
+    }
+
+    if (*diff > TOL) {
+        fprintf(unit, "check of contiguity of received buffer portions failed\n");
+        fprintf(unit, "Got the following portions/from process:\n");
+
+        for (i = 0; i < Npos; i++) {
+            fprintf(unit,
+#ifdef WIN_IMB
+                "%I64u / %d; ",
+#else
+                "%lu / %d; ",
+#endif
+                lengths[i], ranks[i]);
+        }
+        fprintf(unit, "\n");
+    }
+}
+
+void IMB_chk_contained(void* part, size_t p_size, void* whole,
+                       size_t w_size, size_t* pos, size_t* fpos,
+                       double* D, char*msg) {
+/*
+
+                      Checks whether a buffer part is contained in a larger buffer
+                      (exploits uniqueness of buffer values, so check is trivial)
+
+Input variables:
+
+-part                 (type void*)
+                      Partial buffer
+
+-p_size               (type int)
+                      Size of partial buffer
+
+-whole                (type void*)
+                      Whole buffer
+
+-w_size               (type int)
+                      Size of whole buffer
+
+-msg                  (type char*)
+                      Accompanying message
+
+Output variables:
+
+-pos                  (type int*)
+                      Position where partial buffer begins in whole buffer
+                      if search was successful
+
+-fpos                 (type int*)
+                      Position where first fault occurred when start position was
+                      found, but later an error occurred
+
+-D                    (type double*)
+                      0 if check positive, 1 else
+
+*/
+    assign_type *a_part, *a_whole;
+    long pcrc, wcrc;
+    size_t w_len, p_len;
+
+    a_part = (assign_type*)part;
+    a_whole = (assign_type*)whole;
+
+    *fpos = CHK_NO_FAULT;  /* instead of -1*/
+    *D = 0.;
+
+    if ( /*p_size <= 0*/ p_size == 0) /*!!! the type of p_size is changed to unsigned size_t*/
+        *pos = 0;
+    else if (p_size > w_size) {
+        *pos = 0; *fpos = 0;
+    } else {
+        if (p_size < asize) {
+            pcrc = IMB_compute_crc((char*)part, p_size);
+
+            *pos = 0;
+            wcrc = pcrc - 1;
+
+            while (*pos <= w_size - p_size && wcrc != pcrc) {
+                void* h;
+                h = (void*)(((char*)whole) + *pos);
+                wcrc = IMB_compute_crc((char*)h, p_size);
+
+                if (wcrc != pcrc) (*pos)++;
+            }
+
+            if (*pos <= w_size - p_size)
+                *D = 0.;
+            else {
+                *pos  = 0;
+                *fpos = 0;
+                *D    = 1.;
+            }
+        } else { /*if( p_size < asize )*/
+            *pos = 0;
+            w_len = w_size / asize;
+            p_len = p_size / asize;
+
+            while (*pos <= w_len - p_len && A_ABS(a_part[0] - a_whole[*pos]) > TOL)
+                (*pos)++;
+
+            if (*pos <= w_len - p_len)
+                *D = IMB_ddiff(a_part, a_whole + *pos, p_len, fpos);
+            else {
+                *D = 1.;
+                *pos = 0;
+                *fpos = 0;
+            }
+
+            *pos *= asize;
+        } /*if !( p_size < asize )*/
+    }
+
+    if (*fpos != CHK_NO_FAULT /*>= 0*/)
+        *D = 1.;
+}
+
+
+
+/********************* CHECKSUM ********************/
+
+/* Most of following CRC-32 stuff is from zmodem source code */
+
+/* I claim no copyright over the contents of this file.  -- Rahul Dhesi */
+
+/*
+Checksum:  951252172      (check or update this with "brik")
+*/
+#define INITCRC 0xFFFFFFFFL
+/*
+ * Copyright (C) 1986 Gary S. Brown.  You may use this program, or
+ * code or tables extracted from it, as desired without restriction.
+ */
+
+/* First, the polynomial itself and its table of feedback terms.  The  */
+/* polynomial is                                                       */
+/* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */
+/* Note that we take it "backwards" and put the highest-order term in  */
+/* the lowest-order bit.  The X^32 term is "implied"; the LSB is the   */
+/* X^31 term, etc.  The X^0 term (usually shown as "+1") results in    */
+/* the MSB being 1.                                                    */
+
+/* Note that the usual hardware shift register implementation, which   */
+/* is what we're using (we're merely optimizing it by doing eight-bit  */
+/* chunks at a time) shifts bits into the lowest-order term.  In our   */
+/* implementation, that means shifting towards the right.  Why do we   */
+/* do it this way?  Because the calculated CRC must be transmitted in  */
+/* order from highest-order term to lowest-order term.  UARTs transmit */
+/* characters in order from LSB to MSB.  By storing the CRC this way,  */
+/* we hand it to the UART in the order low-byte to high-byte; the UART */
+/* sends each low-bit to hight-bit; and the result is transmission bit */
+/* by bit from highest- to lowest-order term without requiring any bit */
+/* shuffling on our part.  Reception works similarly.                  */
+
+/* The feedback terms table consists of 256, 32-bit entries.  Notes:   */
+/*                                                                     */
+/*     The table can be generated at runtime if desired; code to do so */
+/*     is shown later.  It might not be obvious, but the feedback      */
+/*     terms simply represent the results of eight shift/xor opera-    */
+/*     tions for all combinations of data and CRC register values.     */
+/*                                                                     */
+/*     The values must be right-shifted by eight bits by the "updcrc"  */
+/*     logic; the shift must be unsigned (bring in zeroes).  On some   */
+/*     hardware you could probably optimize the shift in assembler by  */
+/*     using byte-swap instructions.                                   */
+
+
+static long crc_32_tab[] = { /* CRC polynomial 0xedb88320 */
+    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+    0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+    0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+    0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+    0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+    0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+    0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+    0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+    0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+    0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+    0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+    0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+    0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+    0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+    0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+    0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+    0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+    0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+    0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+    0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+    0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+    0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+    0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+    0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+    0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+    0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+    0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+    0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+    0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+    0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+    0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+    0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+    0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+    0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+    0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+    0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+    0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+    0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+    0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+    0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+    0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+    0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+    0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+    0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+    0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+    0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+    0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+    0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+    0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+    0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+    0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+
+
+long IMB_compute_crc (register char* buf, register size_t size) {
+/*
+
+In/out variables:
+
+-buf                  (type register char*)
+-size                 (type register int)
+
+Return value          (type long)
+
+*/
+    long crccode = INITCRC;
+
+    if ( /*size <= 0*/ size == 0)   /*!!! the type of size is modified to unsigned size_t*/
+        crccode = 0;
+    else {
+        int i;
+        for (i = 0; i < size; i++)
+            crccode = crc_32_tab[(int)((crccode) ^ (buf[i])) & 0xff] ^
+                      (((crccode) >> 8) & 0x00FFFFFFL);
+    }
+
+    return(crccode);
+}
+
+#endif
diff --git a/src_c/IMB_comm_info.h b/src_c/IMB_comm_info.h
new file mode 100644
index 00000000..07b07448
--- /dev/null
+++ b/src_c/IMB_comm_info.h
@@ -0,0 +1,176 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+
+ ***************************************************************************/
+
+
+
+
+
+#ifndef _COMM_INFO_H
+#define _COMM_INFO_H
+
+#include "IMB_declare.h"
+
+typedef enum {
+    CT_BASE        = 0,
+    CT_BASE_VEC    = 1,
+    CT_RESIZE      = 2,
+    CT_RESIZE_VEC  = 3
+} CONTIG_TYPES;
+
+#ifdef MPIIO
+typedef struct { int Locsize; MPI_Offset Offset; int Totalsize;} SPLITTING;
+#endif
+
+struct comm_info {
+/* Communication information as for MPI-1/2 parts */
+
+    int         w_num_procs;        /* number of procs in COMM_WORLD            */
+    int         w_rank;             /* rank of actual process in COMM_WORLD     */
+
+    int         NP;                 /* #processes participating in benchmarks   */
+    int         px, py;             /* processes are part of px x py topology   */
+
+    MPI_Comm    communicator;       /* underlying communicator for benchmark(s) */
+
+    int         num_procs;          /* number of processes in communicator      */
+    int         rank;               /* rank of actual process in communicator   */
+    int         root_shift;         /* switch for root change at each iteration */
+    int         sync;               /* switch for rank synchronization after each iter */
+    int         size_scale;
+
+    CONTIG_TYPES    contig_type;
+    MPI_Datatype    s_data_type;    /* data type of sent data                   */
+    MPI_Datatype    r_data_type;    /* data type of received data               */
+
+    MPI_Datatype    red_data_type;  /* data type of reduced data               */
+    MPI_Op      op_type;            /* operation type                          */
+
+    int         zero_size;
+    int         pair0, pair1;       /* process pair                            */
+    int         select_tag;         /* 0/1 for tag selection off/on            */
+    int         select_source;      /* 0/1 for sender selection off/on         */
+
+    void*       s_buffer;           /* send    buffer                          */
+    assign_type*    s_data;         /* assign_type equivalent of s_buffer      */
+    size_t      s_alloc;            /* #bytes allocated in s_buffer            */
+    void*       r_buffer;           /* receive buffer                          */
+    assign_type*    r_data;         /* assign_type equivalent of r_buffer      */
+    size_t          r_alloc;        /* #bytes allocated in r_buffer            */
+
+    /* IMB 3.1 << */
+    float       max_mem, used_mem;  /* max. allowed / used GBytes for all      */
+    /* message  buffers                        */
+    /* >> IMB 3.1  */
+
+    int     n_lens;         /* # of selected lengths by -msglen option */
+    int*    msglen;         /* list of  "       "                  "   */
+
+    int     group_mode;     /* Mode of running groups (<0,0,>0)        */
+    int     n_groups;       /* No. of independent groups               */
+    int     group_no;       /* own group index                         */
+    int*    g_sizes;        /* array of group sizes                    */
+    int*    g_ranks;        /* w_ranks constituting the groups         */
+
+    int*    sndcnt;         /* send count argument for global ops.     */
+    int*    sdispl;         /* displacement argument for global ops.   */
+    int*    reccnt;         /* recv count argument for global ops.     */
+    int*    rdispl;         /* displacement argument for global ops.   */
+
+    /* IMB 3.2.3 << */
+    int     min_msg_log;
+    int     max_msg_log;
+    /* >> IMB 3.2.3  */
+
+    MPI_Errhandler  ERR;
+
+#ifdef MPIIO
+    /*   FILE INFORMATION     */
+    char*       filename;
+    MPI_Comm    File_comm;
+    int         File_num_procs;
+    int         all_io_procs;
+    int         File_rank;
+
+    MPI_File    fh;
+
+    MPI_Datatype    etype;
+    Type_Size       e_size;
+    MPI_Datatype    filetype;
+
+    SPLITTING   split;
+    int         amode;
+    MPI_Info    info;
+
+    /* View: */
+    MPI_Offset  disp;
+    char*       datarep;
+    MPI_Datatype    view;
+    MPI_Errhandler  ERRF;
+#endif
+
+#if (defined EXT || defined RMA)
+    MPI_Win         WIN;
+    MPI_Info        info;
+    MPI_Errhandler  ERRW;
+#endif
+
+};
+
+#endif
diff --git a/src/IMB_comments.h b/src_c/IMB_comments.h
similarity index 96%
rename from src/IMB_comments.h
rename to src_c/IMB_comments.h
index 2b7d2ee9..72a1ba53 100644
--- a/src/IMB_comments.h
+++ b/src_c/IMB_comments.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
diff --git a/src/IMB_cpu_exploit.c b/src_c/IMB_cpu_exploit.c
similarity index 91%
rename from src/IMB_cpu_exploit.c
rename to src_c/IMB_cpu_exploit.c
index 223da1f3..8e08e0db 100644
--- a/src/IMB_cpu_exploit.c
+++ b/src_c/IMB_cpu_exploit.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In
     doc/IMB_Users_Guide.pdf
@@ -70,7 +69,12 @@ For more documentation than found here, see
 #include "IMB_declare.h"
 #include "IMB_prototypes.h"
 
-void IMB_cpu_exploit(float target_secs, int initialize)
+double IMB_cpu_exploit_reworked(float target_secs, int initialize) {
+    IMB_cpu_exploit(target_secs, initialize);
+    return MFlops;
+}
+
+void IMB_cpu_exploit(float target_secs, int initialize) {
 /*
 
 
@@ -92,7 +96,7 @@ Input variables:
 
 
 */
-{
+
 /*
 in: target_secs: desired runtime (about) of the current call
     initialize: 1/0 for first/following call with this value of target_secs
@@ -120,7 +124,7 @@ in: target_secs: desired runtime (about) of the current call
         for (repeat = 0; repeat < Nrep; repeat++) {
             for (i=0; i<SIZE; i++) {
                 for (j=0; j<SIZE; j++) {
-                    x[i] = x[i] + a[i][j]*y[j];
+                    x[i] = x[i] + a[i][j] * y[j];
                 }
             }
         }
@@ -132,7 +136,7 @@ in: target_secs: desired runtime (about) of the current call
     }
 
     if (initialize) {
-        target_reps = max(1, (int)(target_secs*Nrep));
+        target_reps = max(1, (int)(target_secs * Nrep));
         t1 = MPI_Wtime();
 
         for (repeat = 0; repeat < target_reps; repeat++) {
@@ -144,12 +148,12 @@ in: target_secs: desired runtime (about) of the current call
         }
         t2 = MPI_Wtime();
 
-        tCPU = 1000000.*(t2-t1);
+        tCPU = 1000000. * (t2-t1);
     } else {
         for( repeat=0; repeat < target_reps; repeat++ ) {
             for (i=0; i<SIZE; i++) {
                 for (j=0; j<SIZE; j++) {
-                    x[i] = x[i] + a[i][j]*y[j];
+                    x[i] = x[i] + a[i][j] * y[j];
                 }
             }
         }
diff --git a/src/IMB_declare.c b/src_c/IMB_declare.c
similarity index 92%
rename from src/IMB_declare.c
rename to src_c/IMB_declare.c
index c3115471..bba46975 100644
--- a/src/IMB_declare.c
+++ b/src_c/IMB_declare.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -98,7 +97,7 @@ char aux_string[out_fields*ow_format];
 char format [out_fields*7];
 
 /* ARRAY OF CASES, EITHER DEFAULT OR ARGUMENT LIST */
-char **cases ;           
+char **cases ;
 
 /* Error status  */
 
@@ -121,6 +120,6 @@ double MFlops = -1.;
 int num_alloc=0, num_free=0;
 
 #ifdef USE_MPI_INIT_THREAD
-int mpi_thread_environment = MPI_THREAD_SINGLE;
-int mpi_thread_desired	   = MPI_THREAD_MULTIPLE;
+int mpi_thread_environment  = MPI_THREAD_SINGLE;
+int mpi_thread_desired      = MPI_THREAD_MULTIPLE;
 #endif /*USE_MPI_INIT_THREAD*/
diff --git a/src/IMB_declare.h b/src_c/IMB_declare.h
similarity index 94%
rename from src/IMB_declare.h
rename to src_c/IMB_declare.h
index 83af716f..fe0dc37c 100644
--- a/src/IMB_declare.h
+++ b/src_c/IMB_declare.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -177,8 +176,10 @@ extern double *all_defect;
 /* IMB 3.1 << */
 /* include Windows case */
 #ifndef WIN_IMB
+#ifndef __cplusplus
 #define min(a,b) ((a)<(b)?(a):(b))
 #define max(a,b) ((a)>(b)?(a):(b))
+#endif
 #endif /*WIN_IMB*/
 /* >> IMB 3.1  */
 
@@ -240,11 +241,11 @@ extern int IMB_internal_barrier;
 
 #define IMB_Assert(expr)  assert(expr)
 
-#define IMB_i_alloc(type, B, Len, where ) \
-	{	\
-    	    IMB_Assert(Len>0); \
-	    (B) = (type*) IMB_v_alloc(sizeof(type)*(Len), where); \
-	}
+#define IMB_i_alloc(type, B, Len, where )                       \
+    {                                                           \
+        IMB_Assert(Len>0);                                      \
+        (B) = (type*) IMB_v_alloc(sizeof(type)*(Len), where);   \
+    }
 
 #define IMB_do_n_barriers(comm, iter)       \
     {                                       \
@@ -261,9 +262,9 @@ extern int IMB_internal_barrier;
     }                                       \
 
 #ifdef WIN_IMB
-#define IMB_strcasecmp(s1, s2)	stricmp( (s1), (s2))
+#define IMB_strcasecmp(s1, s2)  stricmp( (s1), (s2))
 #else /*linux*/
-#define IMB_strcasecmp(s1, s2)	strcasecmp( (s1), (s2))
+#define IMB_strcasecmp(s1, s2)  strcasecmp( (s1), (s2))
 #endif /*WIN_IMB*/
 
 #endif /*#ifndef _DECLARE_H*/
diff --git a/src/IMB_err_check.h b/src_c/IMB_err_check.h
similarity index 94%
rename from src/IMB_err_check.h
rename to src_c/IMB_err_check.h
index 734582a2..2e72bc91 100644
--- a/src/IMB_err_check.h
+++ b/src_c/IMB_err_check.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
diff --git a/src/IMB_err_handler.c b/src_c/IMB_err_handler.c
similarity index 60%
rename from src/IMB_err_handler.c
rename to src_c/IMB_err_handler.c
index e5546bc0..6c23eff6 100644
--- a/src/IMB_err_handler.c
+++ b/src_c/IMB_err_handler.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -85,257 +84,198 @@ For more documentation than found here, see
 
 
 
-void IMB_err_hand(int ERR_IS_MPI, int ERR_CODE )
+void IMB_err_hand(int ERR_IS_MPI, int ERR_CODE) {
 /*
 
-                      
-                      Handles input error code. 
+                      Handles input error code.
                       Retrieves error string (MPI_Error_string) if is an MPI error code
                       Calls MPI_Abort
-                      
 
+Input variables:
 
-Input variables: 
-
--ERR_IS_MPI           (type int)                      
+-ERR_IS_MPI           (type int)
                       Logical flag: error code belongs to MPI or not
-                      
 
--ERR_CODE             (type int)                      
+-ERR_CODE             (type int)
                       Input error code. If an MPI error code, the string is retrieved.
                       Anyway MPI_Abort is called
-                      
-
 
 */
-{
-  int i_break;
-  
-  if(! ERR_IS_MPI )
-    {
-      i_break=1;
-      fprintf(stderr,"Application error code %d occurred\n",ERR_CODE);
-      switch(ERR_CODE)
-	{
-	case APPL_ERR_INVCASE: fprintf(stderr,"INVALID benchmark name\n");
-	  break;
-	case APPL_ERR_MALLOC : fprintf(stderr,"Malloc failed \n");
-	  break;
-	case APPL_ERR_CINFO  : fprintf(stderr,"Invalid \"c_info\" setup\n");
-	  break;
-	}
-      ERR_CODE=MPI_ERR_INTERN;
-    }
-  else if ( ERR_CODE != MPI_SUCCESS )
-    {
-      char aux_string[MPI_MAX_ERROR_STRING];
-      int L;
-      i_break=1;
-      fprintf(stderr,"MPI error  %d occurred\n",ERR_CODE);
-      MPI_Error_string(ERR_CODE,aux_string, &L);
-      fprintf(stderr,"%s\n",aux_string);
-    }
-  else i_break=0;
-  
-  if(i_break )
-    { 
-      MPI_Abort(MPI_COMM_WORLD, ERR_CODE);
+    int i_break;
+
+    if (!ERR_IS_MPI) {
+        i_break = 1;
+        fprintf(stderr, "Application error code %d occurred\n", ERR_CODE);
+        switch (ERR_CODE) {
+            case APPL_ERR_INVCASE: fprintf(stderr, "INVALID benchmark name\n");
+                break;
+            case APPL_ERR_MALLOC: fprintf(stderr, "Malloc failed \n");
+                break;
+            case APPL_ERR_CINFO: fprintf(stderr, "Invalid \"c_info\" setup\n");
+                break;
+        }
+        ERR_CODE = MPI_ERR_INTERN;
+    } else if (ERR_CODE != MPI_SUCCESS) {
+        char aux_string[MPI_MAX_ERROR_STRING];
+        int L;
+        i_break = 1;
+        fprintf(stderr, "MPI error  %d occurred\n", ERR_CODE);
+        MPI_Error_string(ERR_CODE, aux_string, &L);
+        fprintf(stderr, "%s\n", aux_string);
     }
-}
-
-
+    else i_break = 0;
 
+    if (i_break)
+        MPI_Abort(MPI_COMM_WORLD, ERR_CODE);
+}
 
-void IMB_errors_mpi(MPI_Comm * comm, int* ierr, ...)
+void IMB_errors_mpi(MPI_Comm * comm, int* ierr, ...) {
 /*
 
-                      
                       Error handler callback for MPI-1 errors
-                      
 
+Input variables:
 
-Input variables: 
-
--comm                 (type MPI_Comm *)                      
+-comm                 (type MPI_Comm *)
                       Communicator which is in error
-                      
 
--ierr                 (type int*)                      
+-ierr                 (type int*)
                       MPI error code
-                      
-
 
 */
-{
-IMB_err_hand(1,*ierr);
+    IMB_err_hand(1, *ierr);
 }
 
 #ifdef EXT
 
-void IMB_errors_win(MPI_Win * WIN, int* ierr, ...)
+void IMB_errors_win(MPI_Win * WIN, int* ierr, ...) {
 /*
 
-                      
                       Error handler callback for onesided communications errors
-                      
 
+Input variables:
 
-Input variables: 
-
--WIN                  (type MPI_Win *)                      
+-WIN                  (type MPI_Win *)
                       MPI Window which is in error
-                      
 
--ierr                 (type int*)                      
+-ierr                 (type int*)
                       MPI error code
-                      
-
 
 */
-{
-IMB_err_hand(1,*ierr);
+    IMB_err_hand(1, *ierr);
 }
 #endif
 
 #ifdef MPIIO
 
-void IMB_errors_io (MPI_File * fh, int* ierr, ...)
+void IMB_errors_io(MPI_File * fh, int* ierr, ...) {
 /*
 
-                      
                       Error handler callback for MPI-IO errors
-                      
 
+Input variables:
 
-Input variables: 
-
--fh                   (type MPI_File *)                      
+-fh                   (type MPI_File *)
                       MPI File which is in error
-                      
 
--ierr                 (type int*)                      
+-ierr                 (type int*)
                       MPI error code
-                      
-
 
 */
-{
-IMB_err_hand(1,*ierr);
+    IMB_err_hand(1, *ierr);
 }
 #endif
 
 
 
 
-void IMB_init_errhand(struct comm_info* c_info)
+void IMB_init_errhand(struct comm_info* c_info) {
 /*
 
-                      
                       Creates MPI error handler component of c_info by MPI_<>_create_errhandler
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-                      Error handler component is created 
+
+                      Error handler component is created
                       (c_info->ERR for MPI-1; c_info->ERRW for EXT; c_info->ERRF for MPIIO)
-                      
+
 
 
 */
-{
 #ifdef SET_ERRH
 
-MPI_Errhandler_create(IMB_errors_mpi,&c_info->ERR);
+    MPI_Errhandler_create(IMB_errors_mpi, &c_info->ERR);
 
 #ifdef EXT
-MPI_Win_create_errhandler(IMB_errors_win, &c_info->ERRW);
+    MPI_Win_create_errhandler(IMB_errors_win, &c_info->ERRW);
 #endif
 
 #ifdef MPIIO
-MPI_File_create_errhandler(IMB_errors_io, &c_info->ERRF);
+    MPI_File_create_errhandler(IMB_errors_io, &c_info->ERRF);
 #endif
 
 #endif
 }
 
-
-
-
-void IMB_set_errhand(struct comm_info* c_info)
+void IMB_set_errhand(struct comm_info* c_info) {
 /*
 
-                      
                       Sets MPI error handler component of c_info by MPI_<>_set_errhandler
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-                      Error handler component is set
-                      
 
+                      Error handler component is set
 
 */
-{
 #ifdef SET_ERRH
 
-if( c_info->communicator != MPI_COMM_NULL )
-MPI_Errhandler_set(c_info->communicator, c_info->ERR);
+    if (c_info->communicator != MPI_COMM_NULL)
+        MPI_Errhandler_set(c_info->communicator, c_info->ERR);
 
 #ifdef EXT
-if( c_info->WIN != MPI_WIN_NULL )
-MPI_Win_set_errhandler(c_info->WIN, c_info->ERRW);
+    if (c_info->WIN != MPI_WIN_NULL)
+        MPI_Win_set_errhandler(c_info->WIN, c_info->ERRW);
 #endif
 
 #ifdef MPIIO
-if( c_info->fh != MPI_FILE_NULL )
-MPI_File_set_errhandler(c_info->fh, c_info->ERRF);
+    if (c_info->fh != MPI_FILE_NULL)
+        MPI_File_set_errhandler(c_info->fh, c_info->ERRF);
 #endif
 
 #endif
 }
 
-
-
-
-void IMB_del_errhand(struct comm_info* c_info)
+void IMB_del_errhand(struct comm_info* c_info) {
 /*
 
-                      
                       Deletes MPI error handler component of c_info by MPI_Errhandler_free
-                      
 
+In/out variables:
 
-In/out variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-                      MPI error handler component of c_info is deleted
-                      
 
+                      MPI error handler component of c_info is deleted
 
 */
-{
 #ifdef SET_ERRH
-MPI_Errhandler_free(&c_info->ERR);
+    MPI_Errhandler_free(&c_info->ERR);
 
 #ifdef EXT
-MPI_Errhandler_free(&c_info->ERRW);
+    MPI_Errhandler_free(&c_info->ERRW);
 #endif
 
 #ifdef MPIIO
-MPI_Errhandler_free(&c_info->ERRF);
+    MPI_Errhandler_free(&c_info->ERRF);
 #endif
 
 #endif
diff --git a/src/IMB_exchange.c b/src_c/IMB_exchange.c
similarity index 55%
rename from src/IMB_exchange.c
rename to src_c/IMB_exchange.c
index ab030c48..9f1e55de 100644
--- a/src/IMB_exchange.c
+++ b/src_c/IMB_exchange.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -50,7 +49,7 @@ goods and services.
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -91,122 +90,110 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_exchange(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                  MODES RUN_MODE, double* time)
+                  MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Chainwise exchange; MPI_Isend (left+right) + MPI_Recv (right+left)
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
+Output variables:
 
-Output variables: 
-
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
 
 */
-{
-  double t1, t2;
-  int  i;
-  
-  Type_Size s_size, r_size;
-  int s_num, r_num;
-  int s_tag, r_tag;
-  int left, right;
-  MPI_Status  stat[2];
-  MPI_Request request[2];
-  
-#ifdef CHECK 
-  defect=0;
+    int  i;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int s_tag, r_tag;
+    int left, right;
+    MPI_Status  stat[2];
+    MPI_Request request[2];
+
+#ifdef CHECK
+    defect = 0;
 #endif
-  ierr = 0;
+    ierr = 0;
 
-  /*GET SIZE OF DATA TYPE's in s_size and r_size*/
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-    {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    } 
-  s_tag = 1;
-  r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;
-
-  
-  if(c_info->rank != -1)
+    /*GET SIZE OF DATA TYPE's in s_size and r_size*/
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0))
     {
-      if(c_info->rank < c_info->num_procs-1)   right  = c_info->rank+1;
-      if(c_info->rank > 0)                     left   = c_info->rank-1;
-      
-      if(c_info->rank == c_info->num_procs-1)  right  = 0;
-      if(c_info->rank == 0)                    left   = c_info->num_procs-1 ;
-      
-      if((c_info->rank >= 0) && (c_info->rank <= c_info->num_procs-1))
-	{
-          for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-	  t1 = MPI_Wtime();
-	  for(i=0; i< ITERATIONS->n_sample; i++)
-	    { 
-	      ierr= MPI_Isend((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                              s_num,c_info->s_data_type,
-			      right,s_tag,c_info->communicator,&request[0]);
-	      MPI_ERRHAND(ierr);
-	      ierr= MPI_Isend((char*)c_info->s_buffer+size+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                              s_num,c_info->s_data_type,
-			      left ,s_tag,c_info->communicator,&request[1]);
-	      MPI_ERRHAND(ierr);
-
-	      ierr= MPI_Recv( (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                              r_num,c_info->r_data_type,
-			      left ,r_tag,c_info->communicator,stat);
-	      MPI_ERRHAND(ierr);
-
-              CHK_DIFF("Exchange",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                       0, size, size, 1, 
-                       put, 0, ITERATIONS->n_sample, i,
-                       left, &defect);
-
-	      ierr= MPI_Recv( (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                              r_num,c_info->r_data_type,
-			      right,r_tag,c_info->communicator,stat);
-	      MPI_ERRHAND(ierr);
-	      
-              CHK_DIFF("Exchange",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                       s_num, size, size, 1, 
-                       put, 0, ITERATIONS->n_sample, i,
-                       right, &defect);
-
-	      ierr= MPI_Waitall(2,request,stat);
-	      MPI_ERRHAND(ierr);   
-	    }
-	  t2 = MPI_Wtime();
-	  *time=(t2 - t1)/ITERATIONS->n_sample;
-	}
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
-  else
-    { 
-      *time = 0.; 
+    s_tag = 1;
+    r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;
+
+    size *= c_info->size_scale;
+
+    *time = 0;
+    if (c_info->rank != -1) {
+        if (c_info->rank < c_info->num_procs - 1)   right = c_info->rank + 1;
+        if (c_info->rank > 0)                       left = c_info->rank - 1;
+        if (c_info->rank == c_info->num_procs - 1)  right = 0;
+        if (c_info->rank == 0)                      left = c_info->num_procs - 1;
+
+        if ((c_info->rank >= 0) && (c_info->rank <= c_info->num_procs - 1)) {
+            for (i = 0; i < N_BARR; i++)
+                MPI_Barrier(c_info->communicator);
+
+            *time -= MPI_Wtime();
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                ierr = MPI_Isend((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                 s_num, c_info->s_data_type,
+                                 right, s_tag, c_info->communicator, &request[0]);
+                MPI_ERRHAND(ierr);
+                ierr = MPI_Isend((char*)c_info->s_buffer + size + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                 s_num, c_info->s_data_type,
+                                 left, s_tag, c_info->communicator, &request[1]);
+                MPI_ERRHAND(ierr);
+
+                ierr = MPI_Recv((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                r_num, c_info->r_data_type,
+                                left, r_tag, c_info->communicator, stat);
+                MPI_ERRHAND(ierr);
+
+                CHK_DIFF("Exchange", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                         0, size, size, 1,
+                         put, 0, ITERATIONS->n_sample, i,
+                         left, &defect);
+
+                ierr = MPI_Recv((char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                r_num, c_info->r_data_type,
+                                right, r_tag, c_info->communicator, stat);
+                MPI_ERRHAND(ierr);
+
+                CHK_DIFF("Exchange", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                         s_num, size, size, 1,
+                         put, 0, ITERATIONS->n_sample, i,
+                         right, &defect);
+
+                ierr = MPI_Waitall(2, request, stat);
+                MPI_ERRHAND(ierr);
+            }
+            *time += MPI_Wtime();
+        }
     }
+    *time /= ITERATIONS->n_sample;
 }
 
 
diff --git a/src_c/IMB_g_info.c b/src_c/IMB_g_info.c
new file mode 100644
index 00000000..f796fa7c
--- /dev/null
+++ b/src_c/IMB_g_info.c
@@ -0,0 +1,279 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_g_info.c 
+
+ Implemented functions: 
+
+ IMB_general_info;
+ IMB_make_sys_info;
+ IMB_end_msg;
+
+ ***************************************************************************/
+
+
+
+
+char* VERSION="2019";
+
+#include <stdio.h>
+#include <time.h>
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+extern FILE* unit;
+
+
+
+
+void IMB_general_info() {
+/*
+
+                      Prints to stdout some basic information
+                      (Version, time, system (see 'IMB_make_sys_info'))
+
+*/
+    /*void IMB_make_sys_info();*/
+    time_t T;
+
+    time(&T);
+    fprintf(unit, "#------------------------------------------------------------\n");
+
+#ifdef MPI1
+    fprintf(unit, "#    Intel(R) MPI Benchmarks %s, MPI-1 part    \n", VERSION);
+#elif defined EXT
+    fprintf(unit, "#    Intel(R) MPI Benchmarks %s, MPI-2 part    \n", VERSION);
+#elif defined MPIIO
+    fprintf(unit, "#    Intel(R) MPI Benchmarks %s, MPI-IO part   \n", VERSION);
+#elif defined NBC
+    fprintf(unit, "#    Intel(R) MPI Benchmarks %s, MPI-NBC part  \n", VERSION);
+#elif defined RMA
+    fprintf(unit, "#    Intel(R) MPI Benchmarks %s, MPI-RMA part  \n", VERSION);
+#endif
+
+
+    fprintf(unit, "#------------------------------------------------------------\n");
+    fprintf(unit, "# Date                  : %s", asctime(localtime(&T)));
+
+    IMB_make_sys_info();
+    fprintf(unit, "\n");
+}
+
+/* IMB 3.1 << */
+/* include WIN case */
+#ifndef WIN_IMB
+#include <sys/utsname.h>
+#else
+#include <Windows.h>
+#define INFO_BUFFER_SIZE 32767
+#endif
+/* >> IMB 3.1  */
+
+
+void IMB_make_sys_info() {
+/*
+
+                      Prints to stdout some basic information about the system
+                      (outcome of the 'uname' command)
+
+*/
+    int dont_care, mpi_subversion, mpi_version;
+    /* IMB 3.1 << */
+#ifndef WIN_IMB
+    struct utsname info;
+    uname(&info);
+    dont_care = MPI_Get_version(&mpi_version, &mpi_subversion);
+
+    fprintf(unit, "# Machine               : %s\n", info.machine);
+    fprintf(unit, "# System                : %s\n", info.sysname);
+    fprintf(unit, "# Release               : %s\n", info.release);
+    fprintf(unit, "# Version               : %s\n", info.version);
+#else
+    /* include WIN case */
+    OSVERSIONINFOEX info;
+    TCHAR infoBuf[INFO_BUFFER_SIZE];
+    DWORD bufCharCount = INFO_BUFFER_SIZE;
+    char *substr_ptr;
+
+    dont_care = MPI_Get_version(&mpi_version, &mpi_subversion);
+
+    info.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
+    GetVersionEx((OSVERSIONINFO *)&info);
+
+    bufCharCount = ExpandEnvironmentStrings("%PROCESSOR_IDENTIFIER%", infoBuf, INFO_BUFFER_SIZE);
+
+    /* Replace  "Intel64" by "Intel(R) 64" */
+    substr_ptr = strstr(infoBuf, "Intel64");
+    if (substr_ptr != NULL)
+        fprintf(unit, "# Machine               : Intel(R) 64%s\n", substr_ptr + strlen("Intel64"));
+    else {
+        /* Replace  "EM64T" by "Intel(R) 64" */
+        substr_ptr = strstr(infoBuf, "EM64T");
+        if (substr_ptr != NULL)
+            fprintf(unit, "# Machine               : Intel(R) 64%s\n", substr_ptr + strlen("EM64T"));
+        else
+            fprintf(unit, "# Machine               : %s\n", infoBuf);
+    }
+
+    if (info.dwMajorVersion == 4)
+        switch (info.dwMinorVersion) {
+            case 90:
+                fprintf(unit, "# System                : Windows Me\n");
+                break;
+            case 10:
+                fprintf(unit, "# System                : Windows 98\n");
+                break;
+            case 0:
+                fprintf(unit, "# System                : Windows NT 4.0\n");
+                break;
+            default:
+                break;
+    }
+    else if (info.dwMajorVersion == 5)
+        switch (info.dwMinorVersion) {
+            case 2:
+                fprintf(unit, "# System                : Windows 2003\n");
+                break;
+            case 1:
+                fprintf(unit, "# System                : Windows XP\n");
+                break;
+            case 0:
+                fprintf(unit, "# System                : Windows 2000\n");
+                break;
+            default:
+                break;
+    }
+    else if (info.dwMajorVersion == 6)
+        switch (info.dwMinorVersion) {
+            case 0:
+                if (info.wProductType == VER_NT_WORKSTATION)
+                    fprintf(unit, "# System                : Windows Vista\n");
+                else
+                    fprintf(unit, "# System                : Windows Server 2008\n");
+                break;
+            default:
+                break;
+    }
+
+    fprintf(unit, "# Release               : %-d.%-d.%-d\n", info.dwMajorVersion,
+            info.dwMinorVersion, info.dwBuildNumber);
+    fprintf(unit, "# Version               : %s\n", info.szCSDVersion);
+#endif
+    /* >> IMB 3.1  */
+    fprintf(unit, "# MPI Version           : %-d.%-d\n", mpi_version, mpi_subversion);
+    fprintf(unit, "# MPI Thread Environment: ");
+
+#ifdef USE_MPI_INIT_THREAD
+    switch (mpi_thread_environment)
+    {
+        case MPI_THREAD_SINGLE:
+            fprintf(unit, "MPI_THREAD_SINGLE\n");
+            break;
+
+        case MPI_THREAD_FUNNELED:
+            fprintf(unit, "MPI_THREAD_FUNNELED\n");
+            break;
+
+        case MPI_THREAD_SERIALIZED:
+            fprintf(unit, "MPI_THREAD_SERIALIZED\n");
+            break;
+
+        default:
+            fprintf(unit, "MPI_THREAD_MULTIPLE\n");
+            break;
+    }
+#endif
+
+    // IMB 3.2 add on: Version information to stdout
+    if (strcmp(VERSION, "3.2") > 0) {
+        fprintf(unit, "\n\n# New default behavior from Version 3.2 on:\n\n");
+        fprintf(unit, "\
+                      # the number of iterations per message size is cut down \n\
+                      # dynamically when a certain run time (per message size sample) \n\
+                      # is expected to be exceeded. Time limit is defined by variable \n\
+                      # \"SECS_PER_SAMPLE\" (=> IMB_settings.h) \n\
+                      # or through the flag => -time \n\
+                        ");
+    }
+}
+
+void IMB_end_msg(struct comm_info* c_info) {
+/*
+
+                      Prints to stdout an eventual end message (currently empty)
+
+Input variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+*/
+    FILE* u;
+
+    if (c_info) {
+        if (c_info->w_rank == 0) {
+            for (u = stdout; u; u = (u == unit ? NULL : unit))
+                fprintf(u, "\n\n# All processes entering MPI_Finalize\n\n");
+        }
+    } else
+        fprintf(stderr, "\n\n# IMB has MPI_Finalize-d\n\n");
+}
diff --git a/src/IMB_gather.c b/src_c/IMB_gather.c
similarity index 75%
rename from src/IMB_gather.c
rename to src_c/IMB_gather.c
index 984d5616..41459858 100644
--- a/src/IMB_gather.c
+++ b/src_c/IMB_gather.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -89,85 +88,76 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_gather(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                MODES RUN_MODE, double* time)
+                MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Gather
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
+Output variables:
 
-Output variables: 
-
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
-
 */
-{
-  int    i;
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  double t1, t2;
+    int    i;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    double t1, t2;
 
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
 
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
+    size *= c_info->size_scale;
 
     *time = 0.;
-    if(c_info->rank!=-1)
-    {
+    if (c_info->rank != -1) {
         int root = 0;
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0;i<ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t1 = MPI_Wtime();
-            ierr = MPI_Gather ((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                                s_num,c_info->s_data_type,
-                                (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                                r_num, c_info->r_data_type, root,
-                                c_info->communicator);
+            ierr = MPI_Gather((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                              s_num, c_info->s_data_type,
+                              (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                              r_num, c_info->r_data_type, root,
+                              c_info->communicator);
             MPI_ERRHAND(ierr);
             t2 = MPI_Wtime();
-            *time += (t2-t1);
+            *time += (t2 - t1);
 
 #ifdef CHECK
-            if( c_info->rank == root )
-            {
-                 CHK_DIFF("Gather",c_info, 
-                          (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-                          0, (size_t) c_info->num_procs* (size_t) size, 1, 
-                          put, 0, ITERATIONS->n_sample, i, -2, &defect);
+            if (c_info->rank == root) {
+                CHK_DIFF("Gather", c_info,
+                         (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0,
+                         0, (size_t)c_info->num_procs * (size_t)size, 1,
+                         put, 0, ITERATIONS->n_sample, i, -2, &defect);
             }
 #endif
             root = (root + c_info->root_shift) % c_info->num_procs;
@@ -185,22 +175,18 @@ void IMB_igather(struct comm_info* c_info,
                  int size,
                  struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE,
-                 double* time)
+                 double* time) {
 /*
 
-
                       MPI-NBC benchmark kernel
                       Benchmarks MPI_Igather
 
-
-
 Input variables:
 
 -c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
 
-
 -size                 (type int)
                       Basic message size in bytes
 
@@ -209,15 +195,12 @@ Input variables:
 
 -RUN_MODE             (type MODES)
 
-
 Output variables:
 
 -time                 (type double*)
                       Timing result per sample
 
-
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -230,19 +213,19 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    if ((s_size!=0) && (r_size!=0)) {
-        s_num=size/s_size;
-        r_num=size/r_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         IMB_igather_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
@@ -251,8 +234,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Igather((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                s_num,
@@ -267,25 +249,25 @@ Output variables:
             MPI_ERRHAND(ierr);
 
             t_comp -= MPI_Wtime();
-                IMB_cpu_exploit(t_pure, 0);
+            IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
 
             MPI_Wait(&request, &status);
             t_ovrlp += MPI_Wtime();
 
 #ifdef CHECK
-             if (c_info->rank == root) {
-                  CHK_DIFF("Igather", c_info,
-                           (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
-                           0, 0, ((size_t)c_info->num_procs * (size_t)size), 1,
-                           put, 0, ITERATIONS->n_sample, i, -2, &defect);
-             }
+            if (c_info->rank == root) {
+                CHK_DIFF("Igather", c_info,
+                         (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                         0, 0, ((size_t)c_info->num_procs * (size_t)size), 1,
+                         put, 0, ITERATIONS->n_sample, i, -2, &defect);
+            }
 #endif // CHECK
             root = (root + c_info->root_shift) % c_info->num_procs;
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
-        t_comp  /= ITERATIONS->n_sample;
+        t_comp /= ITERATIONS->n_sample;
     }
 
     time[0] = t_pure;
@@ -299,22 +281,18 @@ void IMB_igather_pure(struct comm_info* c_info,
                       int size,
                       struct iter_schedule* ITERATIONS,
                       MODES RUN_MODE,
-                      double* time)
+                      double* time) {
 /*
 
-
                       MPI-NBC benchmark kernel
                       Benchmarks IMB_Igather_pure
 
-
-
 Input variables:
 
 -c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
 
-
 -size                 (type int)
                       Basic message size in bytes
 
@@ -323,15 +301,12 @@ Input variables:
 
 -RUN_MODE             (type MODES)
 
-
 Output variables:
 
 -time                 (type double*)
                       Timing result per sample
 
-
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -342,7 +317,7 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -354,13 +329,12 @@ Output variables:
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Igather((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                s_num,
@@ -375,12 +349,12 @@ Output variables:
             MPI_Wait(&request, &status);
             t_pure += MPI_Wtime();
 #ifdef CHECK
-             if (c_info->rank == root) {
-                  CHK_DIFF("Igather_pure", c_info,
-                           (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
-                           0, 0, ((size_t)c_info->num_procs * (size_t)size), 1,
-                           put, 0, ITERATIONS->n_sample, i, -2, &defect);
-             }
+            if (c_info->rank == root) {
+                CHK_DIFF("Igather_pure", c_info,
+                         (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                         0, 0, ((size_t)c_info->num_procs * (size_t)size), 1,
+                         put, 0, ITERATIONS->n_sample, i, -2, &defect);
+            }
 #endif // CHECK
             root = (root + c_info->root_shift) % c_info->num_procs;
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
diff --git a/src/IMB_gatherv.c b/src_c/IMB_gatherv.c
similarity index 81%
rename from src/IMB_gatherv.c
rename to src_c/IMB_gatherv.c
index f03ce9bc..41aa8ee0 100644
--- a/src/IMB_gatherv.c
+++ b/src_c/IMB_gatherv.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -89,94 +88,84 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_gatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                 MODES RUN_MODE, double* time)
+                 MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-1 benchmark kernel
                       Benchmarks MPI_Gatherv
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
+Output variables:
 
-Output variables: 
-
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
-
 */
-{
-  double t1, t2;
-  int    i;
-  Type_Size s_size,r_size;
-  int s_num, r_num;
+    double t1, t2;
+    int    i;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
 
 #ifdef CHECK
-defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
+    ierr = 0;
 
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
 
-  /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
+    for (i = 0; i < c_info->num_procs; i++) {
+        c_info->rdispl[i] = r_num*i;
+        c_info->reccnt[i] = r_num;
+    }
 
-  for (i=0;i<c_info->num_procs ;i++)
-  {
-      c_info->rdispl[i] = r_num*i;
-      c_info->reccnt[i] = r_num;
-  }
+    size *= c_info->size_scale;
 
     *time = 0.;
-    if(c_info->rank!=-1)
-    {
+    if (c_info->rank != -1) {
         int root = 0;
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0;i<ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t1 = MPI_Wtime();
-            ierr = MPI_Gatherv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                                s_num,c_info->s_data_type,
-                                (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                                c_info->reccnt,c_info->rdispl,
-                                c_info->r_data_type,
-                                root,
-                                c_info->communicator);
+            ierr = MPI_Gatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                               s_num, c_info->s_data_type,
+                               (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                               c_info->reccnt, c_info->rdispl,
+                               c_info->r_data_type,
+                               root,
+                               c_info->communicator);
             MPI_ERRHAND(ierr);
             t2 = MPI_Wtime();
             *time += (t2 - t1);
 #ifdef CHECK
-            if( c_info->rank == root )
-            {
-                 CHK_DIFF("Gatherv",c_info, 
-                          (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-                          0, (size_t) c_info->num_procs * (size_t) size, 1, 
-                          put, 0, ITERATIONS->n_sample, i,
-                          -2, &defect);
+            if (c_info->rank == root) {
+                CHK_DIFF("Gatherv", c_info,
+                         (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0,
+                         0, (size_t)c_info->num_procs * (size_t)size, 1,
+                         put, 0, ITERATIONS->n_sample, i,
+                         -2, &defect);
             }
 #endif
             root = (root + c_info->root_shift) % c_info->num_procs;
@@ -192,22 +181,18 @@ void IMB_igatherv(struct comm_info* c_info,
                   int size,
                   struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE,
-                  double* time)
+                  double* time) {
 /*
 
-
                       MPI-NBC benchmark kernel
                       Benchmarks MPI_Igatherv
 
-
-
 Input variables:
 
 -c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
 
-
 -size                 (type int)
                       Basic message size in bytes
 
@@ -217,15 +202,12 @@ Input variables:
 -RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
-
 Output variables:
 
 -time                 (type double*)
                       Timing result per sample
 
-
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -238,19 +220,19 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->s_data_type, &s_size);
     MPI_Type_size(c_info->r_data_type, &r_size);
-    if ((s_size!=0) && (r_size!=0)) {
+    if ((s_size != 0) && (r_size != 0)) {
         s_num = size / s_size;
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */
         IMB_igatherv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
@@ -260,8 +242,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Igatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
@@ -277,7 +258,7 @@ Output variables:
             MPI_ERRHAND(ierr);
 
             t_comp -= MPI_Wtime();
-                IMB_cpu_exploit(t_pure, 0);
+            IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
 
             MPI_Wait(&request, &status);
@@ -294,7 +275,7 @@ Output variables:
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
-        t_comp  /= ITERATIONS->n_sample;
+        t_comp /= ITERATIONS->n_sample;
     }
 
     time[0] = t_pure;
@@ -308,22 +289,18 @@ void IMB_igatherv_pure(struct comm_info* c_info,
                        int size,
                        struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE,
-                       double* time)
+                       double* time) {
 /*
 
-
                       MPI-NBC benchmark kernel
                       Benchmarks IMB_Igatherv_pure
 
-
-
 Input variables:
 
 -c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
 
-
 -size                 (type int)
                       Basic message size in bytes
 
@@ -333,15 +310,12 @@ Input variables:
 -RUN_MODE             (type MODES)
                       (only MPI-2 case: see [1])
 
-
 Output variables:
 
 -time                 (type double*)
                       Timing result per sample
 
-
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -352,7 +326,7 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -370,12 +344,11 @@ Output variables:
         c_info->reccnt[i] = r_num;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Igatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
diff --git a/src_c/IMB_init.c b/src_c/IMB_init.c
new file mode 100644
index 00000000..1b06c675
--- /dev/null
+++ b/src_c/IMB_init.c
@@ -0,0 +1,1622 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_init.c 
+
+ Modifications IMB_2.3 => IMB_3.0:
+ Better argument checking and error messages
+ Include -h flag for help
+ 2 new auxiliary functions:
+ IMB_chk_arg_int
+ IMB_chk_arg_file
+ 
+
+ Implemented functions: 
+
+ IMB_basic_input;
+ IMB_chk_arg_int
+ IMB_chk_arg_file
+ IMB_chk_arg_thread_level
+ IMB_get_rank_portion;
+ IMB_init_communicator;
+ IMB_set_communicator;
+ IMB_valid;
+ IMB_set_default;
+
+ ***************************************************************************/
+
+
+
+#include <ctype.h>
+
+#include "IMB_settings.h"
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+/* IMB 3.1 << */
+#include "IMB_mem_info.h"
+/* >> IMB 3.1  */
+
+#include "IMB_prototypes.h"
+
+#define MAX_INT_LOG (31)
+
+
+char *duplicated_benchmark_names[1000] = { NULL, };
+unsigned int duplicated_benchmark_names_cnt = 0;
+
+struct Blist_item {
+    char *bname;
+    int   next_index;
+};
+
+
+static struct Blist_item* pool = NULL;
+static int pool_size = 0;
+static int curr_pos = 0;
+
+
+/**** static functions declarations*****/
+static int IMB_chk_arg_int(int* val, char ***argv, int *argc, int iarg);
+static int IMB_chk_arg_file(FILE** fd, char ***argv, int *argc, int iarg);
+#ifdef USE_MPI_INIT_THREAD
+static int IMB_chk_arg_thread_level(int* val, char **argv, int argc, int iarg);
+#endif /*USE_MPI_INIT_THREAD*/
+
+static void IMB_init_Blist_item_pool();
+static void IMB_free_Blist_item_pool();
+
+static void IMB_add_to_list_tail(const char*, int*, int*, int *);
+static void IMB_print_list(int list_head_index);
+static void IMB_remove_invalid_items(int* p_list_head, int* p_list_tail, int* n_cases);
+static void IMB_remove_item_from_list(const char* name, int* p_list_head, int* p_list_tail, int *n_cases);
+
+/********************************************************************/
+static int IMB_chk_arg_int(int* val, char ***argv, int *argc, int iarg) {
+    /* Checks command line argument for being nonnegative integer */
+    int ok;
+
+    ok = 1;
+    if (iarg < *argc) {
+        int tst = IMB_str_atoi((*argv)[iarg]);
+
+        if (tst >= 0)
+            *val = tst;
+        else
+            ok = 0;
+    } else
+        ok = 0;
+
+    return ok;
+}
+
+static int IMB_chk_arg_file(FILE** fd, char ***argv, int *argc, int iarg) {
+    /* Checks command line argument for being a file */
+    int ok;
+
+    *fd = (FILE*)NULL;
+
+    ok = 1;
+    if (iarg < *argc) {
+        FILE* tst = (FILE*)fopen((*argv)[iarg], "r");
+
+        if (tst)
+            *fd = tst;
+        else
+            ok = 0;
+    } else
+        ok = 0;
+
+    return ok;
+}
+
+#ifdef WIN_IMB
+#define STRCASECMP(s1,s2) _stricmp((s1),(s2))
+#else /*linux*/
+#define STRCASECMP(s1,s2) strcasecmp((s1),(s2))
+#endif
+#ifdef USE_MPI_INIT_THREAD
+static int IMB_chk_arg_thread_level(int* val, char **argv, int argc, int iarg) {
+    /* Checks command line argument for being nonnegative integer */
+    int ok;
+
+    ok = 1;
+    if (iarg < argc) {
+        if (!STRCASECMP(argv[iarg], "single")) {
+            *val = MPI_THREAD_SINGLE;
+        } else if (!STRCASECMP(argv[iarg], "funneled")) {
+            *val = MPI_THREAD_FUNNELED;
+        } else if (!STRCASECMP(argv[iarg], "serialized")) {
+            *val = MPI_THREAD_SERIALIZED;
+        } else if (!STRCASECMP(argv[iarg], "multiple")) {
+            *val = MPI_THREAD_MULTIPLE;
+        } else
+            ok = 0;
+    } else
+        ok = 0;
+
+    return ok;
+}
+
+void IMB_chk_arg_level_of_threading(char ***argv, int *argc) {
+    int iarg;
+    int thread_level;
+
+    iarg = 1;
+
+    while (iarg <= *argc - 1) {
+        if (!strcmp((*argv)[iarg], "-thread_level")) {
+            if (!IMB_chk_arg_thread_level(&thread_level, *argv, *argc, iarg + 1)) {
+                // just ignore it
+                break;
+            }
+            mpi_thread_desired = thread_level;
+            break;
+        }
+        iarg++;
+    }
+}
+
+#endif /*#ifdef USE_MPI_INIT_THREAD*/
+
+static IMODE string_to_iter_policy(const char* str) {
+    IMODE i = imode_invalid;
+    size_t len = strlen(str);
+
+    if (strncmp(str, "off", min(len, 3)) == 0) {
+        i = imode_off;
+    } else if (strncmp(str, "dynamic", min(len, 6)) == 0) {
+        i = imode_dynamic;
+    } else if (strncmp(str, "multiple_np", min(len, 11)) == 0) {
+        i = imode_multiple_np;
+    } else if (strncmp(str, "auto", min(len, 4)) == 0) {
+        i = imode_auto;
+    }
+
+    return i;
+}
+
+int static IMB_chk_arg_switch (char *val) {
+    int ret = -1;
+
+    if (val != NULL) {
+        if (0 == STRCASECMP(val, "enable") ||
+            0 == STRCASECMP(val, "yes") ||
+            0 == STRCASECMP(val, "on") ||
+            0 == strcmp(val, "1")) {
+            ret = 1;
+        } else if (0 == STRCASECMP(val, "disable") ||
+                 0 == STRCASECMP(val, "no") ||
+                 0 == STRCASECMP(val, "off") ||
+                 0 == strcmp(val, "0")) {
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+
+#define N_baseinfo 18
+/* IMB 3.1 << */
+#define N_base_f_info 3  /* for float data */
+/* >> IMB 3.1  */
+
+/* IMB 3.1 << */
+/*
+   new "ITERATIONS" object for repetition count scheduling
+
+   major changes in interpreting the command line
+*/
+
+int IMB_basic_input(struct comm_info* c_info, struct Bench** P_BList,
+                    struct iter_schedule* ITERATIONS,
+                    int *argc, char ***argv, int* NP_min) {
+/* >> IMB 3.1  */
+/*
+
+
+
+   Input variables:
+
+   -argc                 (type int *)
+   Number of command line arguments
+
+
+   -argv                 (type char ***)
+   List of command line arguments
+
+
+
+   Output variables:
+
+   -NP_min               (type int*)
+   Minimum number of processes to run (-npmin command line argument)
+
+
+   -P_BList              (type struct Bench**)
+   (For explanation of struct Bench type:
+   describes all aspects of modes of a benchmark;
+   see [1] for more information)
+
+   Address of list of benchmarks to run;
+   list is set up.
+
+
+   -c_info               (type struct comm_info*)
+   Collection of all base data for MPI;
+   see [1] for more information
+
+
+
+*/
+    int i, n_cases, n_lens, iarg, iarg_msg;
+    int deflt;
+    int * ALL_INFO;
+    //char** DEFC, **CMT;
+    /* IMB 3.1 << */
+    float ALL_F_INFO[N_base_f_info];
+    /* >> IMB 3.1  */
+    int ok;
+    /* IMB_3.0 */
+    int help_only;
+
+    int Blist_head, Blist_incl_head, Blist_excl_head;
+    int Blist_tail, Blist_incl_tail, Blist_excl_tail;
+    int n_cases_incl, n_cases_excl;
+    help_only = 0;
+
+    *P_BList = (struct Bench *)NULL;
+
+    /* run time control as default */
+    ITERATIONS->n_sample = 0;
+    ITERATIONS->off_cache = 0;
+    ITERATIONS->cache_size = -1;
+    ITERATIONS->cache_line_size = 0;
+    ITERATIONS->s_offs = ITERATIONS->r_offs = 0;
+    ITERATIONS->s_cache_iter = ITERATIONS->r_cache_iter = 1;
+    ITERATIONS->msgspersample = MSGSPERSAMPLE;
+    ITERATIONS->msgs_nonaggr = MSGS_NONAGGR;
+    ITERATIONS->overall_vol = OVERALL_VOL;
+    ITERATIONS->secs = SECS_PER_SAMPLE;
+    ITERATIONS->iter_policy = ITER_POLICY;
+    ITERATIONS->numiters = (int*)NULL;
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &c_info->w_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &c_info->w_num_procs);
+
+    unit = stdout;
+
+    if (c_info->w_rank == 0 && strlen(OUTPUT_FILENAME) > 0)
+        unit = fopen(OUTPUT_FILENAME, "w");
+
+    deflt = 0;
+    ok = 0;
+    iarg_msg = -1;
+
+    c_info->group_mode = -1;
+#ifdef MPIIO
+    *NP_min = 1;
+#else
+    *NP_min = 2;
+#endif
+
+    if (c_info->w_rank == 0) {
+        /* Interpret command line */
+        n_lens = 0;
+        n_cases = n_cases_incl = n_cases_excl = 0;
+
+        IMB_init_Blist_item_pool();
+        Blist_head = Blist_incl_head = Blist_excl_head = Blist_tail = Blist_incl_tail = Blist_excl_tail = -1;
+
+        if (*argc <= 1) {
+            /* Take default */
+            deflt = 1;
+        } else {
+            iarg = 1;
+
+            while (iarg <= *argc - 1) {
+
+                if (!strcmp((*argv)[iarg], "-h") || !strcmp((*argv)[iarg], "-help")) {
+
+                    help_only = 1;
+                    break;
+
+                } else if (!strcmp((*argv)[iarg], "-npmin")) {
+                    /* IMB_3.0: Better arg checking for following cases */
+                    if (!IMB_chk_arg_int(NP_min, argv, argc, iarg + 1) || (*NP_min <= 0)) {
+                        ok = -1;
+                        fprintf(stderr, "Invalid argument after \"npmin\"\n");
+                        break;
+                    }
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-multi")) {
+
+                    int tst;
+                    if (!IMB_chk_arg_int(&tst, argv, argc, iarg + 1)) {
+                        ok = -1;
+                    } else if (tst == 0 || tst == 1) {
+                        c_info->group_mode = tst;
+                    } else {
+                        ok = -1;
+                    }
+
+                    if (ok == -1) {
+                        fprintf(stderr, "Invalid argument after \"multi\"\n");
+                        break;
+                    }
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-off_cache")) {
+                    int ierr, cls;
+                    float cs;
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"off_cache\"\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ierr = sscanf((*argv)[iarg + 1], "%f,%d", &cs, &cls);
+                    if (ierr == 1) {
+                        if (cs < 0.) cs = CACHE_SIZE;
+                        cls = CACHE_LINE_SIZE;
+                    } else if (ierr != 2) {
+                        fprintf(stderr, "Invalid off_cache selection\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ITERATIONS->cache_size = cs;
+                    ITERATIONS->cache_line_size = cls;
+                    ITERATIONS->off_cache = 1;
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-iter")) {
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"iter\"\n");
+                        ok = -1;
+                        break;
+                    } else {
+                        int int_counter = 0;
+                        int param_counter = 0;
+                        const int n_param = 3; /* comma separated parameters*/
+                        char* param = (*argv)[iarg + 1];
+                        char const* token = NULL;
+
+                        for (token = strtok(param, ",");
+                             token && param_counter < n_param;
+                             token = strtok(NULL, ","), ++param_counter) {
+                            if (isdigit(*token)) {
+                                ++int_counter;
+                                switch (int_counter) {
+                                    case 1:  sscanf(token, "%d", &ITERATIONS->msgspersample);
+                                        break;
+                                    case 2:  sscanf(token, "%d", &ITERATIONS->overall_vol);
+                                        ITERATIONS->overall_vol *= (1024 * 1024);
+                                        break;
+                                    case 3:  sscanf(token, "%d", &ITERATIONS->msgs_nonaggr);
+                                        break;
+                                    //default: ITERATIONS->iter_policy = imode_invalid;        break;
+                                }
+                            } //else {
+                            //ITERATIONS->iter_policy = string_to_iter_policy(token);
+                            //}
+                            else {
+                                fprintf(stderr, "Invalid iter selection\n");
+                                ok = -1;
+                                break;
+                            }
+                        }
+                    }
+
+//                    if (ITERATIONS->iter_policy == imode_invalid) {
+//                        fprintf(stderr,"Invalid iter selection\n");
+//                        ok = -1;
+//                        break;
+//                    }
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-iter_policy")) {
+                    int ierr;
+                    char iter_policy[32];
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"iter_policy\"\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ierr = sscanf((*argv)[iarg + 1], "%31s", iter_policy);
+                    iter_policy[31] = '\0';
+
+                    if (ierr == 1)
+                        ITERATIONS->iter_policy = string_to_iter_policy(iter_policy);
+
+                    if (ierr != 1 || ok == -1 || ITERATIONS->iter_policy == imode_invalid) {
+                        fprintf(stderr, "Invalid -iter_policy selection\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    iarg++;
+                } else if (!strcmp((*argv)[iarg], "-time")) {
+                    int ierr;
+                    float secs;
+
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"iter\"\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ierr = sscanf((*argv)[iarg + 1], "%f", &secs);
+                    if (ierr != 1) {
+                        fprintf(stderr, "Invalid -time selection\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ITERATIONS->secs = secs;
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-mem")) {
+
+                    int ierr;
+                    float GB;
+
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"-mem\"\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ierr = sscanf((*argv)[iarg + 1], "%f", &GB);
+                    if (ierr != 1) {
+                        fprintf(stderr, "Invalid -time selection\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    c_info->max_mem = GB;
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-map")) {
+                    int ierr;
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"map\"\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ierr = sscanf((*argv)[iarg + 1], "%d%c%d", &c_info->px, (char*)&i, &c_info->py);
+                    if (ierr < 3 || c_info->px*c_info->py < c_info->w_num_procs) {
+                        fprintf(stderr, "Invalid map selection\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-msglen")) {
+                    FILE*t;
+                    if (!IMB_chk_arg_file(&t, argv, argc, iarg + 1)) {
+                        ok = -1;
+                        fprintf(stderr, "Filename after \"msglen\" flag invalid\n");
+                        break;
+                    }
+
+                    iarg_msg = iarg + 1;
+
+                    if (t) {
+                        char inp_line[72];
+
+                        while (fgets(inp_line, 72, t)) {
+                            if (inp_line[0] != '#' && strlen(inp_line) > 1)
+                                n_lens++;
+                        }
+                        fclose(t);
+                    }
+
+                    if (n_lens == 0) {
+                        fprintf(stderr, "Sizes file %s invalid or doesnt exist\n", (*argv)[iarg_msg]);
+                        ok = -1;
+                    }
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-input")) {
+                    FILE*t;
+                    if (!IMB_chk_arg_file(&t, argv, argc, iarg + 1)) {
+                        ok = -1;
+                        fprintf(stderr, "Filename after \"input\" flag invalid\n");
+                        break;
+                    }
+
+
+                    if (t) {
+                        char inp_line[72], nam[32];
+                        while (fgets(inp_line, 72, t)) {
+                            if (inp_line[0] != '#' && strlen(inp_line) - 1) {
+                                sscanf(inp_line, "%32s", nam);
+                                if (n_cases >= 1000) {
+                                    fprintf(unit, "Too many benchmark cases\n");
+                                    fflush(stderr);
+                                    ok = -1;
+                                    break;
+                                }
+                                IMB_add_to_list_tail(nam, &Blist_head, &Blist_tail, &n_cases);
+                            }
+                        }
+                        fclose(t);
+                    } else fprintf(unit, "Input file %s doesnt exist\n", (*argv)[iarg + 1]);
+
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-include")) {
+                    if ((iarg + 1 >= *argc) || ((*argv)[iarg + 1][0] == '-')) {
+                        fprintf(stderr, "Missing argument after \"include\"\n");
+                        fflush(stderr);
+                        ok = -1;
+                        break;
+                    }
+
+                    IMB_add_to_list_tail((*argv)[iarg + 1], &Blist_incl_head, &Blist_incl_tail, &n_cases_incl);
+                    iarg++;
+
+                } else if (!strcmp((*argv)[iarg], "-exclude")) {
+                    if ((iarg + 1 >= *argc) || ((*argv)[iarg + 1][0] == '-')) {
+                        fprintf(stderr, "Missing argument after \"exclude\"\n");
+                        fflush(stderr);
+                        ok = -1;
+                        break;
+                    }
+
+                    IMB_add_to_list_tail((*argv)[iarg + 1], &Blist_excl_head, &Blist_excl_tail, &n_cases_excl);
+                    iarg++;
+                }
+                /* IMB 3.2.3 << */
+                else if (!strcmp((*argv)[iarg], "-msglog")) {
+
+                    int ierr, max_log, min_log;
+                    if (iarg + 1 >= *argc) {
+                        fprintf(stderr, "Missing argument after \"-msglog\"\n");
+                        ok = -1;
+                        break;
+                    }
+
+                    ierr = sscanf((*argv)[iarg + 1], "%d:%d", &min_log, &max_log);
+
+                    if (ierr == 2) {
+
+                        if ((min_log >= 0) &&
+                            (max_log > 0) &&
+                            (min_log < MAX_INT_LOG) &&
+                            (max_log<MAX_INT_LOG) &&
+                            (max_log>min_log)) {
+                            c_info->min_msg_log = min_log;
+                            c_info->max_msg_log = max_log;
+                        } else {
+                            ok = -1;
+                            break;
+                        }
+                    } else if (ierr == 1) {
+                        if (min_log>0)
+                            c_info->max_msg_log = min_log;
+                        else {
+                            ok = -1;
+                            break;
+                        }
+                    }
+
+                    if (ierr == 0)
+                        ok = -1;
+
+                    if (ok == -1) {
+                        fprintf(stderr, "Invalid -msglog argument, must be <num1>:<num2>\n");
+                        fprintf(stderr, "where num1 and num2 are positive integer numbers, and num2>num1\n");
+                        break;
+                    }
+
+                    iarg++;
+                }
+#ifdef USE_MPI_INIT_THREAD
+                else if (!strcmp((*argv)[iarg], "-thread_level")) {
+                    int thread_level;
+                    if (!IMB_chk_arg_thread_level(&thread_level, *argv, *argc, iarg + 1)) {
+                        ok = -1;
+                        fprintf(stderr, "Invalid -thread_level argument, must be single/funneled/serialized/multiple\n");
+                        break;
+                    }
+
+                    mpi_thread_desired = thread_level;
+
+                    iarg++;
+
+                }
+#endif
+                /* >> IMB 3.2.3  */
+#if (defined MPI1 || defined NBC )
+                else if (!strcmp((*argv)[iarg], "-root_shift")) {
+                    int val = -1;
+
+                    if (iarg + 1 < *argc)
+                        val = IMB_chk_arg_switch((*argv)[iarg + 1]);
+
+                    if (val == -1) {
+                        fprintf(stderr, "Invalid -root_shift argument \n");
+                        ok = -1;
+                        break;
+                    } else {
+                        c_info->root_shift = val;
+                    }
+                    iarg++;
+                } else if (!strcmp((*argv)[iarg], "-sync")) {
+                    int val = -1;
+
+                    if (iarg + 1 < *argc)
+                        val = IMB_chk_arg_switch((*argv)[iarg + 1]);
+
+                    if (val == -1) {
+                        fprintf(stderr, "Invalid -sync argument \n");
+                        ok = -1;
+                        break;
+                    } else {
+                        c_info->sync = val;
+                    }
+                    iarg++;
+                }
+#endif
+                else if (!strcmp((*argv)[iarg], "-imb_barrier")) {
+                    int val = -1;
+
+                    if (iarg + 1 < *argc)
+                        val = IMB_chk_arg_switch((*argv)[iarg + 1]);
+
+                    if (val == -1) {
+                        fprintf(stderr, "Invalid -imb_barrier argument \n");
+                        ok = -1;
+                        break;
+                    } else {
+                        IMB_internal_barrier = val;
+                    }
+                    iarg++;
+                } else {
+                    /*It must be the name of one of benchmark*/
+                    IMB_add_to_list_tail((*argv)[iarg], &Blist_head, &Blist_tail, &n_cases);
+                }
+
+                iarg++;
+            } /*while( iarg <= *argc-1 )*/
+        } /* else if( *argc > 1 )*/
+
+        /* IMB_3.0 */
+        if (help_only || ok < 0) {
+
+            /* Set flag "not ok" => help mode in main */
+            n_cases = 0;
+            IMB_i_alloc(int, ALL_INFO, N_baseinfo, "Basic_Input");
+            ok = -3;
+        } else {
+            /* remove wrong items*/
+            if (n_cases > 0)
+                IMB_remove_invalid_items(&Blist_head, &Blist_tail, &n_cases);
+
+            if (n_cases_excl > 0)
+                IMB_remove_invalid_items(&Blist_excl_head, &Blist_excl_tail, &n_cases_excl);
+
+            if (n_cases_incl > 0)
+                IMB_remove_invalid_items(&Blist_incl_head, &Blist_incl_tail, &n_cases_incl);
+
+            if (n_cases == 0 && n_cases_excl == 0 && n_cases_incl == 0)
+                deflt = 1;
+
+            if (deflt)
+                IMB_construct_blist_default(P_BList);
+            else {
+                if (n_cases == 0) {
+                    char** def_cases, **General_cmt;
+                    int i, n = 0;
+
+                    n_cases = IMB_get_def_cases(&def_cases, &General_cmt);
+
+                    for (i = 0; i < n_cases; i++)
+                        IMB_add_to_list_tail(def_cases[i], &Blist_head, &Blist_tail, &n);
+                }
+
+                /* Add benchmarks specified by option -include*/
+                if (n_cases_incl > 0) {
+                    struct Blist_item* include_tail = &pool[Blist_incl_tail];
+
+                    include_tail->next_index = Blist_head;
+                    Blist_head = Blist_incl_head;
+
+                    n_cases += n_cases_incl;
+                }
+
+                /* Remove benchmarks specified by option -exclude*/
+                if (n_cases_excl > 0) {
+                    int curr_index = Blist_excl_head;
+                    struct Blist_item*  curr_item;
+
+                    while (curr_index != -1) {
+                        curr_item = &pool[curr_index];
+
+                        IMB_remove_item_from_list(curr_item->bname, &Blist_head, &Blist_tail, &n_cases);
+
+                        curr_index = curr_item->next_index;
+                    }
+
+                }
+
+                if (n_cases > 0) {
+                    int i = 0;
+                    int curr_index = Blist_head;
+                    struct Blist_item* blist_item;
+
+
+                    *P_BList = (struct Bench*)IMB_v_alloc((1 + n_cases)*sizeof(struct Bench), "Construct_Blist 2");
+
+                    while (curr_index != -1) {
+                        blist_item = &pool[curr_index];
+
+                        IMB_construct_blist(&(*P_BList)[i], blist_item->bname);
+
+                        curr_index = blist_item->next_index;
+                        i++;
+                    }
+                    (*P_BList)[n_cases].name = NULL;
+                } else {
+                    ok = -1;
+                    *P_BList = (struct Bench*)IMB_v_alloc(sizeof(struct Bench), "Construct_Blist 2");
+                    (*P_BList)[0].name = NULL;
+                }
+            }
+
+            IMB_free_Blist_item_pool();
+
+            if (iarg_msg >= 0) {
+                FILE*t = fopen((*argv)[iarg_msg], "r");
+                c_info->n_lens = n_lens;
+
+                if (t && n_lens > 0) {
+                    char inp_line[72], S[32];
+                    int sz, isz;
+
+                    IMB_i_alloc(int, c_info->msglen, n_lens, "Basic_Input");
+
+                    isz = -1;
+
+                    while (fgets(inp_line, 72, t)) {
+                        S[0] = '\0';
+                        if (inp_line[0] != '#' && strlen(inp_line) - 1) {
+                            int ierr;
+                            sz = 0;
+
+                            ierr = sscanf(&inp_line[0], "%d%s", &sz, &S[0]);
+                            if (ierr <= 0 || ierr == EOF || sz < 0)
+                                ierr = -1;
+                            else if (ierr == 2) {
+                                if (S[0] == 'k' || S[0] == 'K') {
+                                    sz = sz * 1024;
+                                } else if (S[0] == 'm' || S[0] == 'M') {
+                                    sz = sz * 1024 * 1024;
+                                } else
+                                    ierr = -1;
+                            } /*else if(ierr==2) */
+
+                            if (ierr > 0) {
+                                isz++;
+                                c_info->msglen[isz] = sz;
+                            } else
+                                fprintf(stderr, "Invalid line in file %s\n", (*argv)[iarg_msg]);
+                        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
+                    } /*while(fgets(inp_line,72,t))*/
+
+                    n_lens = c_info->n_lens = isz + 1;
+                    fclose(t);
+
+                    if (n_lens == 0) {
+                        fprintf(stderr, "Sizes File %s invalid or doesnt exist\n", (*argv)[iarg_msg]);
+                        ok = -1;
+                    }
+                } /*if( t && n_lens>0 )*/
+            } /*if( iarg_msg>=0 )*/
+
+            IMB_i_alloc(int, ALL_INFO, N_baseinfo + n_cases, "Basic_Input");
+
+            if (!deflt) {
+                i = 0;
+                n_cases = 0;
+
+                while ((*P_BList)[i].name) {
+                    int index;
+                    index = IMB_get_bmark_index((*P_BList)[i].name);
+                    //IMB_get_def_index(&index,(*P_BList)[i].name );
+
+                    /* IMB_3.0
+                       if( index >= 0 )
+                       */
+                    /*if( index  != LIST_END )*/
+                    ALL_INFO[N_baseinfo + n_cases++] = index;
+
+                    i++;
+                } /*while( (*P_BList)[i].name )*/
+
+            } /*if( !deflt )*/
+
+            /* IMB_3.0 end "!help_only" */
+        } /* else if !( help_only || ok<0 ) */
+
+
+        /* IMB 3.1 << */
+        ALL_INFO[0] = *NP_min;
+        ALL_INFO[1] = c_info->group_mode;
+        ALL_INFO[2] = deflt;
+        ALL_INFO[3] = ITERATIONS->cache_line_size;
+        ALL_INFO[4] = ITERATIONS->msgspersample;
+        ALL_INFO[5] = ITERATIONS->overall_vol;
+        ALL_INFO[6] = ITERATIONS->msgs_nonaggr;
+        ALL_INFO[7] = ITERATIONS->iter_policy;
+        ALL_INFO[8] = n_cases;
+        ALL_INFO[9] = c_info->n_lens;
+        ALL_INFO[10] = c_info->px;
+        ALL_INFO[11] = c_info->py;
+        ALL_INFO[12] = c_info->min_msg_log;
+        ALL_INFO[13] = c_info->max_msg_log;
+        ALL_INFO[14] = c_info->root_shift;
+        ALL_INFO[15] = c_info->sync;
+        ALL_INFO[16] = ok;
+        ALL_INFO[17] = IMB_internal_barrier;
+
+        ALL_F_INFO[0] = ITERATIONS->cache_size;
+        ALL_F_INFO[1] = ITERATIONS->secs;
+        ALL_F_INFO[2] = c_info->max_mem;
+
+        MPI_Bcast(ALL_F_INFO, N_base_f_info, MPI_FLOAT, 0, MPI_COMM_WORLD);
+        /* >> IMB 3.1  */
+        MPI_Bcast(ALL_INFO, N_baseinfo, MPI_INT, 0, MPI_COMM_WORLD);
+
+        if (ok < 0) return ok;
+
+        if (n_cases > 0 && !deflt)
+            MPI_Bcast(ALL_INFO + N_baseinfo, n_cases, MPI_INT, 0, MPI_COMM_WORLD);
+
+        if (n_lens > 0) {
+            MPI_Bcast(c_info->msglen, n_lens, MPI_INT, 0, MPI_COMM_WORLD);
+            /* Used for dynamic caclulations on the number iterations */
+            if (ITERATIONS->iter_policy != imode_off && ITERATIONS->iter_policy != imode_invalid)
+                IMB_i_alloc(int, ITERATIONS->numiters, n_lens, "Basic_Input");
+        }
+
+        IMB_v_free((void**)&ALL_INFO);
+
+    } else { /* w_rank > 0 */
+        /* Receive input arguments */
+        int TMP[N_baseinfo];
+
+        /* IMB 3.1 << */
+        MPI_Bcast(ALL_F_INFO, N_base_f_info, MPI_FLOAT, 0, MPI_COMM_WORLD);
+        /* >> IMB 3.1  */
+        MPI_Bcast(TMP, N_baseinfo, MPI_INT, 0, MPI_COMM_WORLD);
+
+        *NP_min = TMP[0];
+        c_info->group_mode = TMP[1];
+        deflt = TMP[2];
+        /* IMB 3.1 << */
+
+        ITERATIONS->cache_line_size = TMP[3];
+        ITERATIONS->msgspersample = TMP[4];
+        ITERATIONS->overall_vol = TMP[5];
+        ITERATIONS->msgs_nonaggr = TMP[6];
+        ITERATIONS->iter_policy = TMP[7];
+        n_cases = TMP[8];
+        n_lens = TMP[9];
+        c_info->px = TMP[10];
+        c_info->py = TMP[11];
+        c_info->min_msg_log = TMP[12];
+        c_info->max_msg_log = TMP[13];
+        c_info->root_shift = TMP[14];
+        c_info->sync = TMP[15];
+        ok = TMP[16];
+        IMB_internal_barrier = TMP[17];
+
+        ITERATIONS->cache_size = ALL_F_INFO[0];
+        ITERATIONS->off_cache = (ITERATIONS->cache_size < 0.) ? 0 : 1;
+        ITERATIONS->secs = ALL_F_INFO[1];
+        c_info->max_mem = ALL_F_INFO[2];
+        /* >> IMB 3.1  */
+
+        if (ok < 0) return ok;
+
+        if (deflt)
+            IMB_construct_blist_default(P_BList);
+        else if (n_cases>0) {
+            char** ALLC;
+
+            IMB_i_alloc(int, ALL_INFO, n_cases, "Basic_Input");
+            MPI_Bcast(ALL_INFO, n_cases, MPI_INT, 0, MPI_COMM_WORLD);
+
+            //IMB_get_def_cases(&DEFC,&CMT);
+            IMB_get_all_cases(&ALLC);
+
+            *P_BList = (struct Bench*)    IMB_v_alloc((1 + n_cases)*sizeof(struct Bench), "Construct_Blist 1");
+
+            for (i = 0; i < n_cases; i++)
+                /* IMB_3.0 */
+                IMB_construct_blist(&(*P_BList)[i], ALLC[ALL_INFO[i]]);
+
+            (*P_BList)[n_cases].name = NULL;
+
+            IMB_v_free((void**)&ALL_INFO);
+        }
+
+        if (n_lens > 0) {
+            c_info->n_lens = n_lens;
+
+            IMB_i_alloc(int, c_info->msglen, n_lens, "Basic_Input");
+            MPI_Bcast(c_info->msglen, n_lens, MPI_INT, 0, MPI_COMM_WORLD);
+
+            if (ITERATIONS->iter_policy != imode_off && ITERATIONS->iter_policy != imode_invalid)
+                IMB_i_alloc(int, ITERATIONS->numiters, n_lens, "Basic_Input");
+        } /*if( n_lens>0 ) */
+    }
+
+#ifdef DEBUG
+    {
+        int i;
+
+        if (n_lens > 0) {
+            fprintf(dbg_file, "Got msglen:\n");
+
+            for (i = 0; i < n_lens; i++)
+                fprintf(stderr, "%d ", c_info->msglen[i]);
+        }
+
+        fprintf(dbg_file, "\n\n");
+        fprintf(dbg_file, "px py = %d %d\n", c_info->px, c_info->py);
+        fprintf(dbg_file, "\n\n");
+    }
+#endif /*DEBUG*/
+
+#ifndef EXT
+    if (do_nonblocking)
+        IMB_cpu_exploit(TARGET_CPU_SECS, 1);
+#endif
+
+    return 0;
+}
+
+
+void IMB_get_rank_portion(int rank, int NP, size_t size,
+        size_t unit_size, size_t* pos1, size_t* pos2) {
+/*
+
+   Splits <size> into even contiguous pieces among processes
+
+   Input variables:
+
+   -rank                 (type int)
+   Process' rank
+
+   -NP                   (type int)
+   Number of processes
+
+   -size                 (type int)
+   Portion to split
+
+   -unit_size            (type int)
+   Base unit for splitting
+
+   Output variables:
+
+   -pos1                 (type int*)
+   -pos2                 (type int*)
+   Process' portion is from unit pos1 to pos2
+
+*/
+    size_t ne, baslen;
+    int    mod;
+
+    ne = (size+unit_size-1)/unit_size;
+    baslen = ne/NP;
+    mod    = (int) ne%NP;
+
+    if( rank < mod ) {
+        *pos1 = rank*(baslen+1)*unit_size;
+        *pos2 = *pos1-1+(baslen+1)*unit_size;
+    } else {
+        *pos1 = (rank*baslen + mod)*unit_size;
+        *pos2 = *pos1-1 + baslen*unit_size;
+    }
+
+    *pos2 = min(*pos2,size-1);
+}
+
+/********************************************************************/
+
+int IMB_init_communicator(struct comm_info* c_info, int NP) {
+/*
+
+       Input variables:
+
+       -NP                   (type int)
+       Number of all started processes
+
+       In/out variables:
+
+       -c_info               (type struct comm_info*)
+       Collection of all base data for MPI;
+       see [1] for more information
+
+       Communicator of active processes gets initialized;
+       grouping of processes (in the 'multi' case) in communicators
+
+       Return value          (type int)
+       Non currently used error exit (value is always 0)
+
+*/
+    int i, snd, cnt, proc, *aux_ptr;
+
+    MPI_Group group, w_group;
+    MPI_Status stat;
+
+    c_info->NP = NP;                         /* NUMBER OF OVERALL PROCESSES */
+    IMB_set_communicator(c_info);     /* GROUP MANAGEMENT               */
+
+    /* INITIALIZATION  WITHIN THE ACTUAL COMMUNICATOR */
+    if (c_info->communicator != MPI_COMM_NULL) {
+        MPI_Comm_size(c_info->communicator, &(c_info->num_procs));
+        MPI_Comm_rank(c_info->communicator, &(c_info->rank));
+
+        c_info->pair0 = 0;
+        c_info->pair1 = c_info->num_procs - 1;
+
+        c_info->select_tag = 0;
+        /*c_info->select_source = 0;*/
+    } else
+        c_info->rank = -1;
+
+    if (c_info->communicator == MPI_COMM_WORLD) {
+        c_info->n_groups = 1;
+        c_info->g_sizes[0] = c_info->w_num_procs;
+
+        for (i = 0; i < c_info->w_num_procs; i++)
+            c_info->g_ranks[i] = i;
+        IMB_set_errhand(c_info);
+        return 0;
+    }
+
+    /* Collect global group information */
+    // The idea of this code is to collect the information on:
+    // 1) number of groups, stored in c_info->n_groups variable on rank 0
+    // 2) sizes of those groups, stored in c_info->g_sizes[] on ranks 0
+    // 3) rank numbers in MPI_COMM_WORLD numbering of all ranks in groups
+    // Mostly this info is for output usage
+    if (c_info->rank == 0) {
+        /* group leaders provide group ranks */
+        MPI_Comm_group(MPI_COMM_WORLD, &w_group);
+        MPI_Comm_group(c_info->communicator, &group);
+
+        for (i = 0; i < c_info->num_procs; i++)
+            c_info->g_sizes[i] = i;
+
+        /* TRANSLATION OF RANKS */
+        MPI_Group_translate_ranks(group, c_info->num_procs,
+                                  c_info->g_sizes, w_group,
+                                  c_info->g_ranks);
+        //print_array(">> c_info->g_ranks", c_info->g_ranks, c_info->num_procs);
+        snd = c_info->num_procs;
+    } else {
+        *c_info->g_ranks = -1;
+        snd = 1;
+    }
+
+    /* w_rank 0 collects in g_ranks ranks of single groups */
+    if (c_info->w_rank == 0) {
+        if (c_info->rank == 0) {
+            c_info->n_groups = 1;
+            c_info->g_sizes[0] = c_info->num_procs;
+            aux_ptr = c_info->g_ranks + c_info->g_sizes[0];
+        } else {
+            c_info->n_groups = 0;
+            aux_ptr = c_info->g_ranks;
+        }
+
+        for (proc = 1; proc < c_info->w_num_procs; proc++) {
+            /* Recv group ranks or -1  */
+            cnt = (int)(c_info->g_ranks + c_info->w_num_procs - aux_ptr);
+            /* July 2002 fix V2.2.1 (wrong logistics), next 23 lines */
+
+            if (cnt <= 0) {
+                /* all leaders have sent, recv dummies (-1) from others! */
+                cnt = 1;
+                MPI_Recv(&i, cnt, MPI_INT, proc, 1000, MPI_COMM_WORLD, &stat);
+            } else {
+
+                MPI_Recv(aux_ptr, cnt, MPI_INT, proc, 1000, MPI_COMM_WORLD, &stat);
+
+                //print_array(">> aux_ptr", aux_ptr, cnt);
+
+                if (*aux_ptr >= 0) {
+                    /* Message was from a group leader  */
+                    c_info->n_groups++;
+                    MPI_Get_count(&stat, MPI_INT, &c_info->g_sizes[c_info->n_groups - 1]);
+                    aux_ptr += c_info->g_sizes[c_info->n_groups - 1];
+                }
+            }
+            /* end fix V2.2.1 */
+        } /*for( proc=1; proc<c_info->w_num_procs; proc++ )*/
+    } else {  /* w_rank != 0 */
+        MPI_Send(c_info->g_ranks, snd, MPI_INT, 0, 1000, MPI_COMM_WORLD);
+        // print_array(">> c_info->g_ranks", c_info->g_ranks, snd);
+    }
+    /* End collection of group information */
+
+    IMB_set_errhand(c_info);
+
+    return 0;
+}
+
+void  IMB_adjust_timings_scale(struct comm_info *c_info, struct Bench *bmark) {
+    if (bmark->RUN_MODES[0].type == MultPassiveTransfer) {
+        /* Just sanity check */
+        if (c_info->num_procs > 1)
+            bmark->scale_bw = (double)c_info->num_procs - 1;
+    }
+}
+/**********************************************************************/
+
+void IMB_set_communicator(struct comm_info *c_info ) {
+/*
+
+       Performs the actual communicator splitting
+
+       In/out variables:
+
+       -c_info               (type struct comm_info *)
+       Collection of all base data for MPI;
+       see [1] for more information
+
+       Application communicator gets initialized
+
+*/
+    int color, key;
+    int errcode = 0;
+
+    /* insert choice for communicator here;
+       NOTE   :  globally more than one communicator is allowed
+       Example: grouping of pairs of processes:
+       0 0 1 1 2 2  .. (if even),  UNDEF 0 0 1 1 2 2  .. (if odd)
+    */
+
+    if (c_info->communicator != MPI_COMM_NULL &&
+        c_info->communicator != MPI_COMM_SELF &&
+        c_info->communicator != MPI_COMM_WORLD) {
+        errcode = MPI_Comm_free(&c_info->communicator);
+        IMB_err_hand(1, errcode);
+    }
+
+    if (c_info->px == 1 || c_info->py == 1)
+        key = c_info->w_rank;
+    else {
+        int prod = c_info->py * c_info->px;
+        key = (c_info->py * c_info->w_rank) % (prod - 1);
+        if (key == 0)
+            key = c_info->w_rank;
+    }
+
+    if (c_info->group_mode >= 0) {
+        color = key / c_info->NP;
+        c_info->group_no = color;
+        if (color >= c_info->w_num_procs / c_info->NP)
+            color = MPI_UNDEFINED;
+    }
+    else {
+        /* Default choice and Group definition.  */
+        c_info->group_no = 0;
+        if (key < c_info->NP)
+            color = 0;
+        else
+            color = MPI_UNDEFINED;
+    }
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &c_info->communicator);
+}
+
+
+int IMB_valid(struct comm_info * c_info, struct Bench* Bmark, int NP) {
+/*
+
+       Validates an input Benchmark / NP setting
+
+       Input variables:
+
+       -c_info               (type struct comm_info *)
+       Collection of all base data for MPI;
+       see [1] for more information
+
+       -Bmark                (type struct Bench*)
+       (For explanation of struct Bench type:
+       describes all aspects of modes of a benchmark;
+       see [1] for more information)
+
+       User input benchmark setting
+
+       -NP                   (type int)
+       Number of active processes
+
+       Return value          (type int)
+       1/0 for valid / invalid input
+
+*/
+    /* Checks validity of Bmark for NP processes */
+    /* Erroneous cases: */
+    int invalid, skip;
+
+    invalid = 0;
+    skip = 0;
+
+#ifndef MPIIO
+    if (Bmark->RUN_MODES[0].type == SingleTransfer ||
+        Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+        invalid = NP <= 1;
+        skip = NP > 2;
+    }
+#endif
+    if (Bmark->RUN_MODES[0].type == ParallelTransferMsgRate)
+        invalid = NP <= 1;
+
+    if (invalid) {
+        if (c_info->w_rank == 0)
+            fprintf(unit, "\n# !! Benchmark %s invalid for %d processes !! \n\n", Bmark->name, NP);
+
+        return 0;
+    }
+
+    /* Cases to skip: */
+    if (skip) return 0;
+
+    return 1;
+}
+
+void IMB_set_default(struct comm_info* c_info) {
+/*
+
+       Default initialization of comm_info
+
+       Output variables:
+
+       -c_info               (type struct comm_info*)
+       Collection of all base data for MPI;
+       see [1] for more information
+
+*/
+    c_info->w_num_procs = 0;
+    c_info->w_rank = -1;
+    c_info->NP = 0;
+    c_info->px = 0;
+    c_info->py = 0;
+    c_info->communicator = MPI_COMM_NULL;
+    c_info->num_procs = 0;
+    c_info->rank = -1;
+    c_info->s_data_type = MPI_DATATYPE_NULL;
+    c_info->r_data_type = MPI_DATATYPE_NULL;
+    c_info->red_data_type = MPI_DATATYPE_NULL;
+    c_info->op_type = MPI_OP_NULL;
+    c_info->pair0 = c_info->pair1 = -2;
+    c_info->size_scale = 0;
+    c_info->contig_type = CT_BASE;
+    c_info->zero_size  = 1;
+    c_info->select_tag = 0;
+    c_info->select_source = 0;
+    c_info->s_buffer = NULL;
+    c_info->s_data = NULL;
+    c_info->s_alloc = 0;
+    c_info->r_buffer = NULL;
+    c_info->r_data = NULL;
+    c_info->r_alloc = 0;
+    /* IMB 3.1 << */
+    c_info->max_mem = MAX_MEM_USAGE;
+    /* >> IMB 3.1  */
+    c_info->n_lens = 0;
+    c_info->msglen = NULL;
+    c_info->group_mode = 0;
+    c_info->n_groups = 0;
+    c_info->group_no = -1;
+    c_info->g_sizes = NULL;
+    c_info->g_ranks = NULL;
+    c_info->reccnt = NULL;
+    c_info->rdispl = NULL;
+    c_info->sync = 1;
+    c_info->root_shift = 0;
+
+    /* IMB 3.2.3 << */
+    c_info->max_msg_log = MAXMSGLOG;
+    c_info->min_msg_log = MINMSGLOG;
+    /* >> IMB 3.2.3  */
+
+    c_info->ERR = MPI_ERRHANDLER_NULL;
+
+#ifdef MPIIO
+    /*   FILE INFORMATION     */
+
+    c_info->filename = NULL;
+    c_info->File_comm = MPI_COMM_NULL;
+    c_info->File_num_procs = 0;
+    c_info->all_io_procs = 0;
+    c_info->File_rank = -1;
+
+    c_info->fh = MPI_FILE_NULL;
+    c_info->etype = MPI_DATATYPE_NULL;
+    c_info->e_size = 0;
+    c_info->filetype = MPI_DATATYPE_NULL;
+
+    c_info->split.Locsize = 0;
+    c_info->split.Offset = (MPI_Offset)0;
+    c_info->split.Totalsize = 0;
+
+    c_info->amode = 0;
+    c_info->info = MPI_INFO_NULL;
+
+    /* View: */
+    c_info->disp = (MPI_Offset)0;
+    c_info->datarep = NULL;
+    c_info->view = MPI_DATATYPE_NULL;
+    c_info->ERRF = MPI_ERRHANDLER_NULL;
+#endif /*MPIIO*/
+
+#if (defined EXT || defined RMA)
+    c_info->WIN = MPI_WIN_NULL;
+    c_info->info = MPI_INFO_NULL;
+    c_info->ERRW = MPI_ERRHANDLER_NULL;
+#endif /*EXT || RMA*/
+}
+
+static void IMB_init_Blist_item_pool() {
+    char** allc;
+    pool_size = IMB_get_all_cases(&allc) * 3;
+    curr_pos = 0;
+
+    pool = (struct Blist_item*) malloc(sizeof(struct Blist_item)*pool_size);
+    IMB_Assert(pool != NULL);
+}
+
+static void IMB_free_Blist_item_pool() {
+    free(pool);
+    pool = NULL;
+
+    pool_size = 0;
+    curr_pos = 0;
+}
+
+
+static int IMB_get_Blist_item_index() {
+    int   ret;
+    int   i, n;
+
+    i = curr_pos;
+    n = pool_size;
+
+
+    if (i == n) {
+        char** allc;
+        n += IMB_get_all_cases(&allc);
+
+        pool = realloc(pool, sizeof(struct Blist_item)*n);
+        IMB_Assert(pool != NULL);
+        pool_size = n;
+
+    } else
+        IMB_Assert(i < n);
+
+
+    ret = i;
+    i++;
+    curr_pos = i;
+
+    return ret;
+}
+
+static void IMB_add_to_list_tail(const char* Bname, int *list_head_index, int* list_tail_index, int *n) {
+    int head = *list_head_index;
+    int new_item_index = IMB_get_Blist_item_index();
+    struct Blist_item* blist_item = &pool[new_item_index];
+    char *chained_bname = NULL;
+
+    if (Bname[0] == 0)
+        return;
+
+    duplicated_benchmark_names[duplicated_benchmark_names_cnt++] = blist_item->bname = strdup(Bname);
+    if (duplicated_benchmark_names_cnt == 1000)
+        duplicated_benchmark_names_cnt--;
+
+    chained_bname = strchr(blist_item->bname, ',');
+    if (chained_bname != NULL)
+        *chained_bname = 0;
+
+    blist_item->next_index = -1;
+
+    if (head == -1) {
+        /* empty list*/
+        IMB_Assert(*list_tail_index == -1);
+        *list_head_index = new_item_index;
+    } else {
+        int tail = *list_tail_index;
+        struct Blist_item* blist_tail_item = &pool[tail];
+
+        blist_tail_item->next_index = new_item_index;
+    }
+
+    *list_tail_index = new_item_index;
+    (*n)++;
+    if (chained_bname != NULL)
+        IMB_add_to_list_tail(chained_bname + 1, list_head_index, list_tail_index, n);
+}
+
+static void IMB_print_list(int list_head_index) {
+    int index = list_head_index;
+    struct Blist_item* blist_item;
+
+    while (index != -1) {
+        blist_item = &pool[index];
+        index = blist_item->next_index;
+        printf("%s ", blist_item->bname);
+    }
+
+}
+
+static void IMB_remove_invalid_items(int* p_list_head, int* p_list_tail, int *n_cases) {
+    int    curr_item = *p_list_head;
+    int    prev_item = -1;
+    int    iret;
+    struct Blist_item* blist_item;
+
+    while (curr_item != -1) {
+        blist_item = &pool[curr_item];
+
+        iret = IMB_get_bmark_index((char*)blist_item->bname);
+
+        if (iret == LIST_INVALID) {
+            int next_item = blist_item->next_index;
+
+            fprintf(stderr, "Invalid benchmark name %s\n", blist_item->bname);
+
+            (*n_cases)--;
+
+            if (prev_item != -1) {
+                struct Blist_item* prev_blist_item = &pool[prev_item];
+
+                prev_blist_item->next_index = next_item;
+
+                if (next_item == -1)
+                    *p_list_tail = prev_item;
+
+                curr_item = next_item;
+
+                IMB_Assert((*n_cases) > 0);
+
+            }
+            else {
+                curr_item = *p_list_head = next_item;
+
+                if (next_item == -1) {
+                    *p_list_tail = -1;
+                    IMB_Assert((*n_cases) == 0);
+                }
+                else
+                    IMB_Assert((*n_cases) > 0);
+            }
+        }
+        else {
+            prev_item = curr_item;
+            curr_item = blist_item->next_index;
+        }
+    } /* while*/
+}
+
+static void IMB_remove_item_from_list(const char* name, int* p_list_head, int* p_list_tail, int *n_cases) {
+    int    curr_item = *p_list_head;
+    int    prev_item = -1;
+    int    iret;
+    struct Blist_item* blist_item;
+
+    while (curr_item != -1) {
+        blist_item = &pool[curr_item];
+
+        iret = IMB_strcasecmp(name, blist_item->bname);
+
+        if (iret == 0) {
+            int next_item = blist_item->next_index;
+
+            (*n_cases)--;
+
+            if (prev_item != -1) {
+                struct Blist_item* prev_blist_item = &pool[prev_item];
+
+                prev_blist_item->next_index = next_item;
+
+                if (next_item == -1) *p_list_tail = prev_item;
+
+                curr_item = next_item;
+
+                IMB_Assert((*n_cases) > 0);
+
+            }
+            else {
+                curr_item = *p_list_head = next_item;
+
+                if (next_item == -1)  {
+                    *p_list_tail = -1;
+                    IMB_Assert((*n_cases) == 0);
+                }
+                else
+                    IMB_Assert((*n_cases) > 0);
+            }
+
+        }
+        else {
+            prev_item = curr_item;
+            curr_item = blist_item->next_index;
+        }
+    } /* while*/
+}
+/********************************************************************/
+
+
diff --git a/src_c/IMB_init_file.c b/src_c/IMB_init_file.c
new file mode 100644
index 00000000..f2fe5665
--- /dev/null
+++ b/src_c/IMB_init_file.c
@@ -0,0 +1,375 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+
+ File: IMB_init_file.c 
+
+ Implemented functions: 
+
+ IMB_init_file_content;
+ IMB_init_file;
+ IMB_free_file;
+ IMB_del_file;
+ IMB_open_file;
+
+ ***************************************************************************/
+
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+
+
+
+void IMB_init_file_content(void* BUF, int pos1, int pos2) {
+/*
+
+                      Initializes contents of a file for READ benchmarks
+
+Input variables:
+
+-pos1                 (type int)
+-pos2                 (type int)
+                      pos1, pos2: target positions (start/end) in file
+
+In/out variables:
+
+-BUF                  (type void*)
+                      Content of buffer to be written to file between these positions
+
+*/
+    IMB_ass_buf(BUF, 0, pos1, pos2, 1);
+}
+
+
+
+
+/* << IMB 3.1 */
+int IMB_init_file(struct comm_info* c_info, struct Bench* Bmark, struct iter_schedule* ITERATIONS, int NP) {
+/* >> IMB 3.1 */
+/*
+
+Input variables:
+
+-Bmark                (type struct Bench*)
+                      (For explanation of struct Bench type:
+                      describes all aspects of modes of a benchmark;
+                      see [1] for more information)
+
+                      Given file i/o benchmark
+
+-ITERATIONS           (type struct iter_schedule *)
+                      Repetition scheduling
+
+-NP                   (type int)
+                      Number of active processes
+
+In/out variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+                      MPI_File component is set accordingly
+
+Return value          (type int)
+                      Error code (identical with MPI error code if occurs)
+
+*/
+    int error = 0;
+    int fnlen;
+
+    IMB_free_file(c_info);
+
+    c_info->fh = MPI_FILE_NULL;
+    c_info->etype = MPI_BYTE;
+    MPI_Type_size(c_info->etype, &c_info->e_size);
+    c_info->filetype = c_info->etype;
+
+    IMB_user_set_info(&c_info->info);
+
+    c_info->disp = (MPI_Offset)0;
+
+    c_info->datarep = IMB_str("native");
+
+    if (Bmark->RUN_MODES[0].type == SingleTransfer)
+        c_info->all_io_procs = 1;
+    else
+        c_info->all_io_procs = c_info->num_procs;
+
+    fnlen = 1 + strlen(FILENAME);
+    /* July 2002 fix V2.2.1: group_mode >= 0 */
+    if (c_info->group_mode >= 0)
+        fnlen += 4;
+
+    if (Bmark->fpointer == priv) {
+        if (c_info->rank > c_info->all_io_procs - 1 || c_info->rank < 0) {
+            c_info->File_comm = MPI_COMM_NULL;
+            c_info->File_rank = -1;
+            c_info->File_num_procs = 0;
+        } else {
+            c_info->File_comm = MPI_COMM_SELF;
+            c_info->File_rank = 0;
+            c_info->File_num_procs = 1;
+        }
+
+        if (c_info->File_rank >= 0) {
+
+            fnlen += 4;
+
+            c_info->filename = (char*)IMB_v_alloc(sizeof(char)*fnlen, "Init_File");
+
+            /* July 2002 fix V2.2.1: group_mode >= 0 */
+            if (c_info->group_mode >= 0)
+                sprintf(c_info->filename, "%s_g%d_%d", FILENAME, c_info->group_no, c_info->w_rank);
+            else
+                sprintf(c_info->filename, "%s_%d", FILENAME, c_info->w_rank);
+
+            c_info->amode = MPI_MODE_CREATE | MPI_MODE_RDWR | MPI_MODE_UNIQUE_OPEN;
+        }
+    } else {
+        if (c_info->communicator == MPI_COMM_NULL) {
+            c_info->File_comm = MPI_COMM_NULL;
+            c_info->File_rank = -1;
+            c_info->File_num_procs = 0;
+        } else {
+            c_info->File_comm = c_info->communicator;
+            c_info->File_rank = c_info->rank;
+            c_info->File_num_procs = c_info->num_procs;
+        }
+
+        c_info->filename = (char*)IMB_v_alloc(sizeof(char)*fnlen, "Init_File");
+        /* July 2002 fix V2.2.1: group_mode >= 0 */
+        if (c_info->group_mode >= 0)
+            sprintf(c_info->filename, "%s_g%d", FILENAME, c_info->group_no);
+        else
+            sprintf(c_info->filename, "%s", FILENAME);
+
+        c_info->amode = MPI_MODE_CREATE | MPI_MODE_RDWR;
+    }
+
+    if (Bmark->access == no) return 0;
+
+    IMB_del_file(c_info); // if exists
+
+    if (c_info->File_rank == 0) {
+        int ierr, size, total, i;
+        MPI_Status stat;
+
+        /* << IMB 3.1. fixes of size */
+        if (c_info->n_lens > 0) {
+            size = 0;
+            for (i = 0; i < c_info->n_lens; i++)
+                size = max(size, c_info->msglen[i]);
+        } else
+            size = 1 << c_info->max_msg_log;
+
+        total = max(size, ITERATIONS->overall_vol);
+
+        if (ITERATIONS->overall_vol / size > MSGSPERSAMPLE)
+            total = size*MSGSPERSAMPLE;
+
+        /* >> IMB 3.1 */
+
+        /* July 2002 fix V2.2.1: calculation of file sizes in "priv" case */
+        if (Bmark->fpointer == priv) {
+            int NP = c_info->all_io_procs;
+
+            total = (total + NP - 1) / NP;
+
+            if (size%NP)
+                total += asize*MSGSPERSAMPLE;
+        }
+        /* July 2002 end fix */
+
+        ierr = MPI_File_open(MPI_COMM_SELF, c_info->filename,
+                             c_info->amode, c_info->info, &c_info->fh);
+        IMB_err_hand(1, ierr);
+
+        MPI_File_set_view(c_info->fh, (MPI_Offset)0,
+                          c_info->etype, c_info->etype, c_info->datarep, c_info->info);
+
+        /* July 2002 fix V2.2.1: size <-> total */
+        ierr = MPI_File_set_size(c_info->fh, total);
+        IMB_err_hand(1, ierr);
+
+        if (Bmark->access == get) {
+            /* Prepare File for input */
+            int el_size = 1 << 20;
+            int pos1, pos2;
+
+            pos1 = 0;
+
+            while (pos1 < total) {
+                pos2 = min(total - 1, pos1 + el_size - 1);
+                size = ((pos2 - pos1) / asize + 1)*asize;
+
+
+                IMB_alloc_buf(c_info, "Init_File 1 ", size, 0);
+                IMB_init_file_content(c_info->s_buffer, pos1, pos2);
+
+                ierr = MPI_File_write(c_info->fh, c_info->s_buffer, pos2 - pos1 + 1, c_info->etype, &stat);
+                IMB_err_hand(1, ierr);
+
+                pos1 = pos2 + 1;
+            }
+
+            IMB_del_s_buf(c_info);
+        }
+
+        ierr = MPI_File_close(&c_info->fh);
+        IMB_err_hand(1, ierr);
+    }
+
+    return error;
+}
+
+void IMB_free_file(struct comm_info * c_info) {
+/*
+
+In/out variables:
+
+-c_info               (type struct comm_info *)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+                      File related components are free-d and reset to
+                      NULL initialization
+
+*/
+    if (c_info->filename != (char*)NULL) IMB_v_free((void**)&c_info->filename);
+    if (c_info->datarep != (char*)NULL) IMB_v_free((void**)&c_info->datarep);
+    if (c_info->filename != (char*)NULL)
+        if (c_info->view != MPI_DATATYPE_NULL)
+            MPI_Type_free(&c_info->view);
+    if (c_info->info != MPI_INFO_NULL)
+        MPI_Info_free(&c_info->info);
+    if (c_info->fh != MPI_FILE_NULL)
+        MPI_File_close(&c_info->fh);
+    c_info->filename = (char*)NULL;
+    c_info->datarep = (char*)NULL;
+    c_info->view = MPI_DATATYPE_NULL;
+    c_info->info = MPI_INFO_NULL;
+    c_info->fh = MPI_FILE_NULL;
+}
+
+void IMB_del_file(struct comm_info* c_info) {
+/*
+
+In/out variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+                      File associated to MPI_File component is erased from disk
+
+*/
+    if (c_info->File_comm != MPI_COMM_NULL) {
+        if (c_info->fh != MPI_FILE_NULL)
+            MPI_File_close(&c_info->fh);
+        MPI_Barrier(c_info->File_comm);
+
+        if (c_info->filename != (char*)NULL) {
+            if (c_info->File_rank == 0) {
+                // touch file
+                ierr = MPI_File_open(MPI_COMM_SELF, c_info->filename,
+                                     c_info->amode, MPI_INFO_NULL, &c_info->fh);
+
+                if (c_info->fh != MPI_FILE_NULL)
+                    MPI_File_close(&c_info->fh);
+
+                /* IMB_3.0: simplify file deletion */
+                ierr = MPI_File_delete(c_info->filename, MPI_INFO_NULL);
+            }
+        }
+        MPI_Barrier(c_info->File_comm);
+    }
+}
+
+int IMB_open_file(struct comm_info* c_info) {
+/*
+
+In/out variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+                      File associated to MPI_File component is opened, view is set
+
+Return value          (type int)
+                      Error code (identical with MPI error code if occurs)
+
+*/
+    int ierr;
+    ierr = 0;
+    if (c_info->File_comm != MPI_COMM_NULL) {
+        ierr = MPI_File_open(c_info->File_comm, c_info->filename,
+                             c_info->amode, c_info->info, &c_info->fh);
+        MPI_ERRHAND(ierr);
+
+        ierr = MPI_File_set_view(c_info->fh, c_info->disp, c_info->etype,
+                                 c_info->filetype, c_info->datarep, c_info->info);
+        MPI_ERRHAND(ierr);
+    }
+    return ierr;
+}
diff --git a/src_c/IMB_init_transfer.c b/src_c/IMB_init_transfer.c
new file mode 100644
index 00000000..5ce39563
--- /dev/null
+++ b/src_c/IMB_init_transfer.c
@@ -0,0 +1,308 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+
+ File: IMB_init_transfer.c 
+
+ Implemented functions: 
+
+ IMB_init_transfer;
+ IMB_close_transfer;
+
+ ***************************************************************************/
+
+
+
+
+
+#include "mpi.h"
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+
+
+
+/* IMB 3.1 << */
+void IMB_init_transfer(struct comm_info* c_info, struct Bench* Bmark, int size, MPI_Aint acc_size) {
+/* >> IMB 3.1  */
+/*
+
+                          For IO  case: file splitting/view is set, file is opened
+                          For EXT case: window is created and synchronized (MPI_Win_fence)
+
+Input variables:
+
+-Bmark                    (type struct Bench*)
+                          (For explanation of struct Bench type:
+                          describes all aspects of modes of a benchmark;
+                          see [1] for more information)
+
+                          Given benchmark
+
+-size                     (type int)
+                          (Only IO case): used to determine file view
+
+IMB 3.1 <<
+-acc_size                 (type int)
+                          (Only EXT case): accumulate window size
+>> IMB 3.1
+
+In/out variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+                          Corresponding components (File or Window related) are set
+
+*/
+
+#if defined MPIIO
+#include <limits.h>
+    int ne, baslen, mod;
+    int ierr;
+    size_t pos1, pos2;
+
+    if (c_info->File_rank < 0 || Bmark->access == no) return;
+
+    if (size > 0) {
+        IMB_get_rank_portion(c_info->File_rank, c_info->all_io_procs, size, asize,
+                             &pos1, &pos2);
+        baslen = (pos2 >= pos1) ? pos2 - pos1 + 1 : 0;
+    } else {
+        baslen = 0;
+        pos2 = pos1 = 0;
+    }
+
+    if (c_info->view != MPI_DATATYPE_NULL)
+        MPI_Type_free(&c_info->view);
+
+    if (Bmark->fpointer == priv) {
+        c_info->split.Locsize = baslen;
+        c_info->split.Offset = 0;
+        c_info->split.Totalsize = baslen;
+
+        if (Bmark->access == put)
+            IMB_set_buf(c_info, c_info->File_rank, 0,
+                        (baslen > 0) ? baslen - 1 : 0,
+                        1, 0);
+
+        if (Bmark->access == get)
+            IMB_set_buf(c_info, c_info->File_rank, 1, 0, 0,
+                        (baslen > 0) ? baslen - 1 : 0);
+    }
+
+    if (Bmark->fpointer == indv_block || Bmark->fpointer == shared ||
+        Bmark->fpointer == explic) {
+        int bllen[3];
+
+        MPI_Aint displ[3];
+        MPI_Datatype types[3];
+
+        bllen[0] = 1;
+        displ[0] = 0;
+        types[0] = MPI_LB;
+
+        bllen[1] = baslen;
+        displ[1] = pos1;
+        types[1] = c_info->etype;
+
+        bllen[2] = 1;
+        displ[2] = size;
+        types[2] = MPI_UB;
+
+        if (Bmark->fpointer == indv_block) {
+            /* July 2002 fix V2.2.1: handle empty view case separately */
+            if (baslen > 0) {
+                /* end change */
+                ierr = MPI_Type_create_struct(3, bllen, displ, types, &c_info->view);
+                IMB_err_hand(1, ierr);
+                ierr = MPI_Type_commit(&c_info->view);
+                IMB_err_hand(1, ierr);
+                c_info->filetype = c_info->view;
+
+                /* July 2002 fix V2.2.1: handle empty case */
+            } else
+                c_info->filetype = c_info->etype;
+            /* end change */
+        }
+
+        if (Bmark->access == put)
+            IMB_set_buf(c_info, c_info->File_rank, 0, (baslen > 0) ? baslen - 1 : 0, 1, 0);
+
+        if (Bmark->access == get)
+            IMB_set_buf(c_info, c_info->File_rank, 1, 0, 0, (baslen > 0) ? baslen - 1 : 0);
+
+        c_info->split.Locsize = bllen[1];
+        c_info->split.Offset = pos1;
+        c_info->split.Totalsize = size;
+    }
+
+    ierr = IMB_open_file(c_info);
+
+#elif defined  EXT
+    MPI_Aint sz;
+    int s_size, r_size;
+    int ierr;
+
+    ierr = 0;
+
+    if (Bmark->reduction) {
+        MPI_Type_size(c_info->red_data_type, &s_size);
+        r_size = s_size;
+    } else {
+        MPI_Type_size(c_info->s_data_type, &s_size);
+        MPI_Type_size(c_info->r_data_type, &r_size);
+    }
+
+    if (c_info->rank >= 0) {
+        IMB_user_set_info(&c_info->info);
+
+        /* IMB 3.1 << */
+        sz = acc_size;
+        /* >> IMB 3.1  */
+
+        if (Bmark->access == put) {
+            ierr = MPI_Win_create(c_info->r_buffer, sz, r_size, c_info->info,
+                                  c_info->communicator, &c_info->WIN);
+            MPI_ERRHAND(ierr);
+            ierr = MPI_Win_fence(0, c_info->WIN);
+            MPI_ERRHAND(ierr);
+        } else if (Bmark->access == get) {
+            ierr = MPI_Win_create(c_info->s_buffer, sz, s_size, c_info->info,
+                                  c_info->communicator, &c_info->WIN);
+            MPI_ERRHAND(ierr);
+            ierr = MPI_Win_fence(0, c_info->WIN);
+            MPI_ERRHAND(ierr);
+        }
+    }
+#elif defined RMA
+    int s_size, r_size;
+    int ierr = 0;
+
+    if (Bmark->reduction) {
+        MPI_Type_size(c_info->red_data_type, &s_size);
+        r_size = s_size;
+    } else {
+        MPI_Type_size(c_info->s_data_type, &s_size);
+        MPI_Type_size(c_info->r_data_type, &r_size);
+    }
+
+    if (c_info->rank >= 0) {
+        IMB_user_set_info(&c_info->info);
+
+        if (Bmark->access == put) {
+            ierr = MPI_Win_create(c_info->r_buffer, acc_size, r_size, c_info->info,
+                                  c_info->communicator, &c_info->WIN);
+        } else if (Bmark->access == get) {
+            ierr = MPI_Win_create(c_info->s_buffer, acc_size, r_size, c_info->info,
+                                  c_info->communicator, &c_info->WIN);
+        }
+        MPI_ERRHAND(ierr);
+    }
+#endif 
+
+    IMB_set_errhand(c_info);
+    err_flag = 0;
+}
+
+
+void IMB_close_transfer (struct comm_info* c_info, struct Bench* Bmark, int size) {
+/*
+
+       Closes / frees file / window components
+
+Input variables:
+
+-Bmark                (type struct Bench*)
+                      (For explanation of struct Bench type:
+                      describes all aspects of modes of a benchmark;
+                      see [1] for more information)
+
+Given benchmark
+
+-size                 (type int)
+                      (Only IO case): used to determine file view
+
+In/out variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+                      Corresponding components (File or Window related) are freed
+
+*/
+#ifdef MPIIO
+    if (c_info->view != MPI_DATATYPE_NULL)
+        MPI_Type_free(&c_info->view);
+
+    if (c_info->File_rank >= 0 && Bmark->access != no && c_info->fh != MPI_FILE_NULL)
+        MPI_File_close(&c_info->fh);
+
+#else /*not MPIIO*/
+#if (defined EXT || defined RMA)
+
+    if (c_info->WIN != MPI_WIN_NULL)
+        MPI_Win_free(&c_info->WIN);
+
+#endif /*EXT || RMA*/
+#endif /*MPIIO*/
+}
+
diff --git a/src_c/IMB_input.txt b/src_c/IMB_input.txt
new file mode 100644
index 00000000..fef0a8b6
--- /dev/null
+++ b/src_c/IMB_input.txt
@@ -0,0 +1,3 @@
+pingping
+pingpong
+Allgather
diff --git a/src/IMB_mem_info.h b/src_c/IMB_mem_info.h
similarity index 95%
rename from src/IMB_mem_info.h
rename to src_c/IMB_mem_info.h
index 12f3d5da..3163ac44 100644
--- a/src/IMB_mem_info.h
+++ b/src_c/IMB_mem_info.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
diff --git a/src/IMB_mem_manager.c b/src_c/IMB_mem_manager.c
similarity index 51%
rename from src/IMB_mem_manager.c
rename to src_c/IMB_mem_manager.c
index 0946ddc1..370f9554 100644
--- a/src/IMB_mem_manager.c
+++ b/src_c/IMB_mem_manager.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -88,50 +87,40 @@ For more documentation than found here, see
 
 #include <limits.h> /* for INT_MAX declaration*/
 
-void* IMB_v_alloc(size_t Len, char* where)
+void* IMB_v_alloc(size_t Len, char* where) {
 /*
 
-                      
                       Allocates void* memory
-                      
 
+Input variables:
 
-Input variables: 
-
--Len                  (type int)                      
+-Len                  (type int)
                       #bytes to allocate
-                      
 
--where                (type char*)                      
+-where                (type char*)
                       Comment (marker for calling place)
-                      
-
 
-Return value          (type void*)                      
+Return value          (type void*)
                       Allocated pointer
-                      
-
 
 */
-{
     void* B;
 
-    Len=max(asize,Len);
+    Len = max(asize, Len);
 
-    if( (B = (void*)malloc(Len) ) == NULL )
-    {
-	printf ("Memory allocation failed. code position: %s. tried to alloc."
+    if ((B = (void*)malloc(Len)) == NULL) {
+        printf("Memory allocation failed. code position: %s. tried to alloc."
 #ifdef WIN_IMB
-		" %I64u bytes\n",
+            " %I64u bytes\n",
 #else
-		" %lu bytes\n",
+            " %lu bytes\n",
 #endif
-		where,Len);
-	return NULL;
+        where, Len);
+        return NULL;
     }
 #ifdef DEBUG
-    if( dbg_file )
-	fprintf(dbg_file,"alloc %p %s\n",B,where);
+    if (dbg_file)
+        fprintf(dbg_file, "alloc %p %s\n", B, where);
 #endif
 
     num_alloc++;
@@ -140,269 +129,223 @@ Return value          (type void*)
 
 #if 0  
 /***************************************************************************/
-void IMB_i_alloc(int** B, size_t Len, char* where )
+void IMB_i_alloc(int** B, size_t Len, char* where ) {
 /*
 
-                      
-                      Allocates int memory
-                      
-
+                          Allocates int memory
 
-Input variables: 
+Input variables:
 
--Len                  (type int)                      
-                      #int's to allocate
-                      
+-Len                      (type int)
+                          #int's to allocate
 
--where                (type char*)                      
-                      Comment (marker for calling place)
-                      
+-where                    (type char*)
+                          Comment (marker for calling place)
 
+In/out variables:
 
-In/out variables: 
-
--B                    (type int**)                      
-                      *B contains allocated memory
-                      
-
+-B                        (type int**)
+                          *B contains allocated memory
 
 */
-{
-    Len=max(1,Len);
-    *B = (int*) IMB_v_alloc(sizeof(int)*Len, where);
+    Len = max(1, Len);
+    *B = (int*)IMB_v_alloc(sizeof(int)*Len, where);
 }
 #endif /*0*/
 
 
 /***************************************************************************/
 void IMB_alloc_buf(struct comm_info* c_info, char* where, size_t s_len, 
-                   size_t r_len)
+                   size_t r_len) {
 /*
 
-                      
                       Allocates send/recv buffers for message passing
-                      
-
 
-Input variables: 
+Input variables:
 
--where                (type char*)                      
+-where                (type char*)
                       Comment (marker for calling place)
-                      
 
--s_len                (type int)                      
+
+-s_len                (type int)
                       Send buffer length (bytes)
-                      
 
--r_len                (type int)                      
+-r_len                (type int)
                       Recv buffer length (bytes)
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-                      Send/Recv buffer components get allocated
-                      
 
+                      Send/Recv buffer components get allocated
 
 */
-{
 /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
 #if ( defined EXT || defined MPIIO || RMA )
-    MPI_Aint slen = (MPI_Aint)(max(1,s_len));
-    MPI_Aint rlen = (MPI_Aint)(max(1,r_len));
+    MPI_Aint slen = (MPI_Aint)(max(1, s_len));
+    MPI_Aint rlen = (MPI_Aint)(max(1, r_len));
     int ierr;
 #else
-    s_len=max(1,s_len);
-    r_len=max(1,r_len);
+    s_len = max(1, s_len);
+    r_len = max(1, r_len);
 #endif
-    
-    if( c_info->s_alloc < s_len )
-    {
+
+    if (c_info->s_alloc < s_len) {
         /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
 #if ( defined EXT || defined MPIIO || RMA)
-    if (c_info->s_buffer)	
-        MPI_Free_mem(c_info->s_buffer); 
+        if (c_info->s_buffer)
+            MPI_Free_mem(c_info->s_buffer);
 
-	ierr=MPI_Alloc_mem(slen, MPI_INFO_NULL, &c_info->s_buffer);
-	MPI_ERRHAND(ierr);
-	c_info->s_alloc = slen;
+        ierr = MPI_Alloc_mem(slen, MPI_INFO_NULL, &c_info->s_buffer);
+        MPI_ERRHAND(ierr);
+        c_info->s_alloc = slen;
 #else
-	IMB_v_free((void**)&c_info->s_buffer); 
+        IMB_v_free((void**)&c_info->s_buffer);
+
+        s_len *= c_info->size_scale;
 
-	c_info->s_buffer = IMB_v_alloc(s_len,where);
-	c_info->s_alloc = s_len;
+        c_info->s_buffer = IMB_v_alloc(s_len, where);
+        c_info->s_alloc = s_len;
 #endif
 
-	c_info->s_data  = (assign_type*)c_info->s_buffer;
+        c_info->s_data = (assign_type*)c_info->s_buffer;
     }
 
-    if( c_info->r_alloc < r_len )
-    {
-	 /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
+    if (c_info->r_alloc < r_len) {
+        /* July 2002 V2.2.1 change: use MPI_Alloc_mem */
 #if ( defined EXT || defined MPIIO || RMA)
-    if (c_info->r_buffer)	
-	    MPI_Free_mem(c_info->r_buffer); 
+        if (c_info->r_buffer)
+            MPI_Free_mem(c_info->r_buffer);
 
-	ierr=MPI_Alloc_mem(rlen, MPI_INFO_NULL, &c_info->r_buffer);
-	MPI_ERRHAND(ierr);
-	c_info->r_alloc = rlen;
+        ierr = MPI_Alloc_mem(rlen, MPI_INFO_NULL, &c_info->r_buffer);
+        MPI_ERRHAND(ierr);
+        c_info->r_alloc = rlen;
 #else
-	IMB_v_free((void**)&c_info->r_buffer); 
+        IMB_v_free((void**)&c_info->r_buffer);
+
+        r_len *= c_info->size_scale;
 
-	c_info->r_buffer = IMB_v_alloc(r_len,where);
-	c_info->r_alloc = r_len;
+
+        c_info->r_buffer = IMB_v_alloc(r_len, where);
+        c_info->r_alloc = r_len;
 #endif
 
-	c_info->r_data = (assign_type*)c_info->r_buffer;
+        c_info->r_data = (assign_type*)c_info->r_buffer;
     }
 }
 
 
 
 /***************************************************************************/
-void IMB_alloc_aux(size_t L, char* where)
+void IMB_alloc_aux(size_t L, char* where) {
 /*
 
-                      
                       Allocates global auxiliary memory AUX
-                      
-
 
-Input variables: 
+Input variables:
 
--L                    (type int)                      
+-L                    (type int)
                       #Bytes to allocate
-                      
 
--where                (type char*)                      
+-where                (type char*)
                       Comment (marker for calling place)
-                      
-
 
 */
-{
     L += asize;
-    if( AUX_LEN < L)
-    {
-	if( AUX_LEN>0 ) IMB_v_free((void**)&AUX);
+    if (AUX_LEN < L) {
+        if (AUX_LEN > 0)
+            IMB_v_free((void**)&AUX);
 
-	AUX = IMB_v_alloc(L, where);
-    AUX_LEN = AUX ? L : 0;
+        AUX = IMB_v_alloc(L, where);
+        AUX_LEN = AUX ? L : 0;
     }
 }
 
 
 
 /***************************************************************************/
-void IMB_free_aux()
+void IMB_free_aux() {
 /*
 
-                      
                       Free-s global auxiliary memory AUX
-                      
-
 
 */
-{
-    if (AUX_LEN > 0 ) {
-	IMB_v_free((void**)&AUX); AUX_LEN=0; 
+    if (AUX_LEN > 0) {
+        IMB_v_free((void**)&AUX); AUX_LEN = 0;
     }
 }
 
-
-void IMB_v_free(void **B)
+void IMB_v_free(void **B) {
 /*
 
-                      
                       Free-s memory
-                      
-
 
-In/out variables: 
+In/out variables:
 
--B                    (type void**)                      
+-B                    (type void**)
                       (*B) will be free-d
-                      
-
 
 */
-{
-    if( *B ) 
-    {
+    if (*B) {
 #ifdef DEBUG
-	if( dbg_file )
-	    fprintf(dbg_file,"delete %p \n",*B);
+        if (dbg_file)
+            fprintf(dbg_file, "delete %p \n", *B);
 #endif
-	free(*B);
-	num_free++;
+        free(*B);
+        num_free++;
     }
 
-    *B=NULL;
+    *B = NULL;
 }
 
 
 /***************************************************************************/
 void IMB_ass_buf(void* buf, int rank, size_t pos1, 
-                 size_t pos2, int value)
+                 size_t pos2, int value) {
 /*
 
-                      
                       Assigns values to a buffer
-                      
-
 
-Input variables: 
+Input variables:
 
--rank                 (type int)                      
+-rank                 (type int)
                       Rank of calling process
-                      
 
 -pos1                 (type int)
--pos2                 (type int)                      
-                      Assignment between byte positions pos1, pos2 
-                      
+-pos2                 (type int)
+                      Assignment between byte positions pos1, pos2
 
--value                (type int)                      
+-value                (type int)
                       1/0 for non-zero (defined in IMB_settings.h)/ zero value
-                      
-
 
-In/out variables: 
+In/out variables:
 
--buf                  (type void*)                      
+-buf                  (type void*)
                       Values assigned within given positions
-                      
-
 
 */
-{
-    if( pos2>= pos1 )
-    {
-	size_t a_pos1, a_pos2, i, j;
-	a_pos1 =  pos1/asize;
-
-	if( pos2>=pos1 )
-	    a_pos2 =  pos2/asize;
-	else
-	    a_pos2 =  a_pos1-1;
-
-	if( value )
-	    for ( i=a_pos1,j=0 ; i<=a_pos2; i++,j++ )
-		((assign_type *)buf)[j] = BUF_VALUE(rank,i);
-	else
-	    for ( i=a_pos1,j=0 ; i<=a_pos2; i++,j++ )
-		((assign_type *)buf)[j] = 0.;
-
-	if( a_pos1*asize != pos1 )
-	{
-	    void* xx = (void*)(((char*)buf)+pos1-a_pos1*asize);
-	    memmove(buf,xx,pos2-pos1+1); 
-	}
+    if (pos2 >= pos1) {
+        size_t a_pos1, a_pos2, i, j;
+        a_pos1 = pos1 / asize;
+
+        if (pos2 >= pos1)
+            a_pos2 = pos2 / asize;
+        else
+            a_pos2 = a_pos1 - 1;
+
+        if (value)
+            for (i = a_pos1, j = 0; i <= a_pos2; i++, j++)
+                ((assign_type *)buf)[j] = BUF_VALUE(rank, i);
+        else
+            for (i = a_pos1, j = 0; i <= a_pos2; i++, j++)
+                ((assign_type *)buf)[j] = 0.;
+
+        if (a_pos1*asize != pos1) {
+            void* xx = (void*)(((char*)buf) + pos1 - a_pos1*asize);
+            memmove(buf, xx, pos2 - pos1 + 1);
+        }
     } /*if( pos2>= pos1 )*/
 }
 
@@ -410,87 +353,71 @@ In/out variables:
 
 /***************************************************************************/
 void IMB_set_buf(struct comm_info* c_info, int selected_rank, size_t s_pos1, 
-                 size_t s_pos2, size_t r_pos1, size_t r_pos2)
+                 size_t s_pos2, size_t r_pos1, size_t r_pos2) {
 /*
 
-                      
                       Sets Send/Recv buffers for a selected rank
                       (by call to => IMB_ass_buf)
-                      
-
 
-Input variables: 
+Input variables:
 
--selected_rank        (type int)                      
+-selected_rank        (type int)
                       Relevant process rank
                       (Can be different from local rank: for checking purposes)
-                      
 
 -s_pos1               (type int)
--s_pos2               (type int)                      
+-s_pos2               (type int)
                       s_pos1 .. s_pos2 positions for send buffer
-                      
 
 -r_pos1               (type int)
--r_pos2               (type int)                      
+-r_pos2               (type int)
                       r_pos1 .. r_pos2 positions for recv buffer
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-                      Corresponding buffer components are assigned values
-                      
 
+                      Corresponding buffer components are assigned values
 
 */
-{
 /*
-Sets c_info->s_buffer/c_info->r_buffer int byte positions 
+Sets c_info->s_buffer/c_info->r_buffer int byte positions
 s_pos1..s_pos2/r_pos1..r_pos2
 Values are taken for "selected_rank"
 Checks right allocation.
 */
     size_t s_len, r_len;
 
-    s_len = (max(s_pos2-s_pos1,0)/asize+1)*asize;
-    r_len = (max(r_pos2-r_pos1,0)/asize+1)*asize;
+    s_len = (max(s_pos2 - s_pos1, 0) / asize + 1)*asize;
+    r_len = (max(r_pos2 - r_pos1, 0) / asize + 1)*asize;
 
-    IMB_alloc_buf(c_info, "set_buf 1",s_len, r_len);
+    IMB_alloc_buf(c_info, "set_buf 1", s_len, r_len);
 
-    if( s_pos2 >= s_pos1 ) 
-	IMB_ass_buf( c_info->s_buffer, selected_rank, s_pos1, s_pos2, 1);
+    if (s_pos2 >= s_pos1)
+        IMB_ass_buf(c_info->s_buffer, selected_rank, s_pos1, s_pos2, 1);
 
-    if( r_pos2 >= r_pos1 ) 
-	IMB_ass_buf( c_info->r_buffer, selected_rank, r_pos1, r_pos2, 0);
+    if (r_pos2 >= r_pos1)
+        IMB_ass_buf(c_info->r_buffer, selected_rank, r_pos1, r_pos2, 0);
 }
 
 
 /***************************************************************************/
-void IMB_init_pointers(struct comm_info *c_info )
+void IMB_init_pointers(struct comm_info *c_info ) {
 /*
 
-                      
                       Initializes pointer components of comm_info
-                      
 
+In/out variables:
 
-In/out variables: 
-
--c_info               (type struct comm_info *)                      
+-c_info               (type struct comm_info *)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-                      Corresponding pointer components are initialized
-                      
 
+                      Corresponding pointer components are initialized
 
 */
-{
 /********************************************************************
 
 
@@ -501,35 +428,37 @@ In/Out     : c_info   | struct comm_info* | see comm_info.h
                       |                   | Pointers initialized
 ----------------------------------------------------------------------*/
 
-    MPI_Comm_size(MPI_COMM_WORLD,&c_info->w_num_procs);
-    MPI_Comm_rank(MPI_COMM_WORLD,&c_info->w_rank     );
+    MPI_Comm_size(MPI_COMM_WORLD, &c_info->w_num_procs);
+    MPI_Comm_rank(MPI_COMM_WORLD, &c_info->w_rank);
 
 #ifdef DEBUG
     dbgf_name = IMB_str("DBG_   ");
-    sprintf(dbgf_name+4,"%d",c_info->w_rank);
-    dbg_file = fopen(dbgf_name,"w");
+    sprintf(dbgf_name + 4, "%d", c_info->w_rank);
+    dbg_file = fopen(dbgf_name, "w");
     unit = dbg_file;
 #endif
 
-    c_info->s_data_type   = MPI_BYTE;  /* DATA TYPE of SEND    BUFFER    */ 
-    c_info->r_data_type   = MPI_BYTE;  /* DATA TYPE of RECEIVE BUFFER    */
+    c_info->s_data_type = MPI_BYTE;  /* DATA TYPE of SEND    BUFFER    */
+    c_info->r_data_type = MPI_BYTE;  /* DATA TYPE of RECEIVE BUFFER    */
 
-    c_info->op_type       = MPI_SUM;   /* OPERATION TYPE IN Allred       */
+    c_info->op_type = MPI_SUM;   /* OPERATION TYPE IN Allred       */
     c_info->red_data_type = MPI_FLOAT; /* NOTE: NO 'CAST' CHECK IN. IBUF */
 
-
-    c_info->communicator= MPI_COMM_NULL;
+    c_info->size_scale   = 1;
+    c_info->zero_size    = 1;
+    c_info->contig_type  = CT_BASE;
+    c_info->communicator = MPI_COMM_NULL;
 
     /* Auxiliary space */
-    IMB_i_alloc(int, c_info->g_ranks,c_info->w_num_procs,"Init_Pointers 1");
-    IMB_i_alloc(int, c_info->g_sizes,c_info->w_num_procs,"Init_Pointers 2");
+    IMB_i_alloc(int, c_info->g_ranks, c_info->w_num_procs, "Init_Pointers 1");
+    IMB_i_alloc(int, c_info->g_sizes, c_info->w_num_procs, "Init_Pointers 2");
 
-#if (defined MPI1 || defined NBC || defined MPIIO)     
-    IMB_i_alloc(int, c_info->sndcnt,c_info->w_num_procs,"Init_Pointers 3");
-    IMB_i_alloc(int, c_info->sdispl,c_info->w_num_procs,"Init_Pointers 4");
+#if (defined MPI1 || defined NBC)
+    IMB_i_alloc(int, c_info->sndcnt, c_info->w_num_procs, "Init_Pointers 3");
+    IMB_i_alloc(int, c_info->sdispl, c_info->w_num_procs, "Init_Pointers 4");
 
-    IMB_i_alloc(int, c_info->reccnt,c_info->w_num_procs,"Init_Pointers 5");
-    IMB_i_alloc(int, c_info->rdispl,c_info->w_num_procs,"Init_Pointers 6");
+    IMB_i_alloc(int, c_info->reccnt, c_info->w_num_procs, "Init_Pointers 5");
+    IMB_i_alloc(int, c_info->rdispl, c_info->w_num_procs, "Init_Pointers 6");
 #else
     c_info->sndcnt = NULL;
     c_info->sdispl = NULL;
@@ -549,84 +478,76 @@ In/Out     : c_info   | struct comm_info* | see comm_info.h
 
 /**********************************************************************/
 
-static int has_root(const char* bname)
-{
+static int has_root(const char* bname) {
     return bname &&
 #if defined MPI1
-           (!strcmp(bname,"Gather")   ||
-            !strcmp(bname,"Gatherv")  ||
-            !strcmp(bname,"Scatter")  ||
-            !strcmp(bname,"Scatterv") ||
-            !strcmp(bname,"Bcast")    ||
-            !strcmp(bname,"Reduce"));
+        (!strcmp(bname, "Gather") ||
+        !strcmp(bname, "Gatherv") ||
+        !strcmp(bname, "Scatter") ||
+        !strcmp(bname, "Scatterv") ||
+        !strcmp(bname, "Bcast") ||
+        !strcmp(bname, "Reduce"));
 #elif defined NBC
-           (!strcmp(bname,"Igather")        ||
-            !strcmp(bname,"Igatherv")       ||
-            !strcmp(bname,"Iscatter")       ||
-            !strcmp(bname,"Iscatterv")      ||
-            !strcmp(bname,"Ibcast")         ||
-            !strcmp(bname,"Ireduce")        ||
-            !strcmp(bname,"Igather_pure")   ||
-            !strcmp(bname,"Igatherv_pure")  ||
-            !strcmp(bname,"Iscatter_pure")  ||
-            !strcmp(bname,"Iscatterv_pure") ||
-            !strcmp(bname,"Ibcast_pure")    ||
-            !strcmp(bname,"Ireduce_pure"));
+        (!strcmp(bname, "Igather") ||
+        !strcmp(bname, "Igatherv") ||
+        !strcmp(bname, "Iscatter") ||
+        !strcmp(bname, "Iscatterv") ||
+        !strcmp(bname, "Ibcast") ||
+        !strcmp(bname, "Ireduce") ||
+        !strcmp(bname, "Igather_pure") ||
+        !strcmp(bname, "Igatherv_pure") ||
+        !strcmp(bname, "Iscatter_pure") ||
+        !strcmp(bname, "Iscatterv_pure") ||
+        !strcmp(bname, "Ibcast_pure") ||
+        !strcmp(bname, "Ireduce_pure"));
 #else
-           0;
+        0;
 #endif /* MPI1 | NBC */
-} 
+}
 
 /* IMB 3.1 << */
 /*
 Major reconstruction of memory management for -off_cache flag
 */
 void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, 
-                           struct Bench* Bmark, MODES BMODE, int iter, int size)
+                           struct Bench* Bmark, MODES BMODE, int iter, int size) {
 /*
 
-                      
                       Initializes communications buffers (call set_buf)
                       Initializes iterations scheduling
 
+Input variables:
 
-Input variables: 
-
-
--Bmark                (type struct Bench*)                      
+-Bmark                (type struct Bench*)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
+
                       Current benchmark
 
 -BMODE                (type MODES)
                       aggregate / non aggregate
-                      
+
 -iter                 (type int)
                       number of current iteration of message size loop
 
--size                 (type int)                      
+-size                 (type int)
                       Message size
-                      
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
+
                       Communications buffers are allocated and assigned values
 
 -ITERATIONS           (type struct iter_schedule*)
                       Adaptive number of iterations, out of cache scheduling are
                       setup if requested
-                      
-
 
 */
 /* >> IMB 3.1  */
-{
 /* IMB 3.1 << */
     size_t s_len, r_len, s_alloc, r_alloc;
     int init_size, irep, i_s, i_r, x_sample;
@@ -637,127 +558,113 @@ In/out variables:
 
     /* July 2002 fix V2.2.1: */
 #if (defined EXT || defined MPIIO || RMA)
-    if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
+    if (Bmark->access == no) x_sample = ITERATIONS->msgs_nonaggr;
 #endif
 
     ITERATIONS->n_sample = (size > 0)
-                         ? max(1, min(ITERATIONS->overall_vol / size, x_sample))
-                         : x_sample;
+                           ? max(1, min(ITERATIONS->overall_vol / size, x_sample))
+                           : x_sample;
 
     Bmark->sample_failure = 0;
 
     init_size = max(size, asize);
 
-    if (c_info->rank < 0) {
+    if (c_info->rank < 0)
         return;
-    } else {
-
+    else {
         if (ITERATIONS->iter_policy == imode_off) {
             ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample;
-        } else if ((ITERATIONS->iter_policy == imode_multiple_np) || (ITERATIONS->iter_policy == imode_auto && root_based)) {
+        } else if ((ITERATIONS->iter_policy == imode_multiple_np) || 
+                   (ITERATIONS->iter_policy == imode_auto && root_based)) {
             /* n_sample for benchmarks with uneven distribution of works
                must be greater or equal and multiple to num_procs.
                The formula below is a negative leg of hyperbola.
                It's moved and scaled relative to max message size
                and initial n_sample subject to multiple to num_procs.
-            */
+               */
             double d_n_sample = ITERATIONS->msgspersample;
-            int max_msg_size = 1<<c_info->max_msg_log;
-            int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5);
-            ITERATIONS->n_sample = x_sample = max(tmp-tmp%c_info->num_procs, c_info->num_procs);
+            int max_msg_size = 1 << c_info->max_msg_log;
+            int tmp = (int)(d_n_sample*max_msg_size / (c_info->num_procs*init_size + max_msg_size) + 0.5);
+            ITERATIONS->n_sample = x_sample = max(tmp - tmp%c_info->num_procs, c_info->num_procs);
         } /* else as is */
     }
 
     if (
 #ifdef MPI1
-       !strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv")
+        !strcmp(Bmark->name, "Alltoall") || !strcmp(Bmark->name, "Alltoallv")
 #elif defined NBC // MPI1
-          !strcmp(Bmark->name, "Ialltoall")  || !strcmp(Bmark->name, "Ialltoall_pure")
-       || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure")
+        !strcmp(Bmark->name, "Ialltoall") || !strcmp(Bmark->name, "Ialltoall_pure")
+        || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure")
 #else
-       0
+        0
 #endif // NBC // MPI1
-      )
-    {
+        ) {
         s_len = (size_t)c_info->num_procs * (size_t)init_size;
         r_len = (size_t)c_info->num_procs * (size_t)init_size;
-    }
-    else if (
+    } else if (
 #ifdef MPI1
-                !strcmp(Bmark->name, "Allgather")   || !strcmp(Bmark->name, "Allgatherv")
-             || !strcmp(Bmark->name, "Gather")      || !strcmp(Bmark->name, "Gatherv")
+        !strcmp(Bmark->name, "Allgather") || !strcmp(Bmark->name, "Allgatherv")
+        || !strcmp(Bmark->name, "Gather") || !strcmp(Bmark->name, "Gatherv")
 #elif defined NBC
-                !strcmp(Bmark->name, "Iallgather")  || !strcmp(Bmark->name, "Iallgather_pure")
-             || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure")
-             || !strcmp(Bmark->name, "Igather")     || !strcmp(Bmark->name, "Igather_pure")
-             || !strcmp(Bmark->name, "Igatherv")    || !strcmp(Bmark->name, "Igatherv_pure")
+        !strcmp(Bmark->name, "Iallgather") || !strcmp(Bmark->name, "Iallgather_pure")
+        || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure")
+        || !strcmp(Bmark->name, "Igather") || !strcmp(Bmark->name, "Igather_pure")
+        || !strcmp(Bmark->name, "Igatherv") || !strcmp(Bmark->name, "Igatherv_pure")
 #else // MPI1 // NBC
-             0
+        0
 #endif // MPI1 // NBC
-            )
-    {
-        s_len = (size_t) init_size;
-        r_len = (size_t) c_info->num_procs * (size_t)init_size;
-    }
-    else if( !strcmp(Bmark->name,"Exchange") )
-    {
+        ) {
+        s_len = (size_t)init_size;
+        r_len = (size_t)c_info->num_procs * (size_t)init_size;
+    } else if (!strcmp(Bmark->name, "Exchange")) {
         s_len = 2 * (size_t)init_size;
-        r_len = (size_t) init_size;
-    }
-    else if(
+        r_len = (size_t)init_size;
+    } else if (
 #ifdef MPI1
-            !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv")
+        !strcmp(Bmark->name, "Scatter") || !strcmp(Bmark->name, "Scatterv")
 #elif defined NBC // MPI1
-               !strcmp(Bmark->name,"Iscatter")  || !strcmp(Bmark->name,"Iscatter_pure")
-            || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure")
+        !strcmp(Bmark->name, "Iscatter") || !strcmp(Bmark->name, "Iscatter_pure")
+        || !strcmp(Bmark->name, "Iscatterv") || !strcmp(Bmark->name, "Iscatterv_pure")
 #else // NBC // MPI1
-            0
+        0
 #endif // NBC // MPI1
-            )
-    {
+        ) {
         s_len = (size_t)c_info->num_procs * (size_t)init_size;
         r_len = (size_t)init_size;
-    } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) {
+    } else if (!strcmp(Bmark->name, "Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name, "Open_Close")) {
         s_len = r_len = 0;
-    }
-    else if ( ! strcmp(Bmark->name,"Exchange_put") || ! strcmp(Bmark->name,"Exchange_get") )
-    {
+    } else if (!strcmp(Bmark->name, "Exchange_put") || !strcmp(Bmark->name, "Exchange_get")) {
         s_len = 2 * (size_t)init_size;
         r_len = 2 * (size_t)init_size;
-    } 
-    else if (! strcmp(Bmark->name,"Compare_and_swap") )
-    {
+    } else if (!strcmp(Bmark->name, "Compare_and_swap")) {
         /* Compare_and_swap operations require 3 buffers, so allocate space for compare
          * buffers in our r_buffer */
         s_len = (size_t)init_size;
         r_len = 3 * (size_t)init_size;
-    } 
-    else 
-    {
-        s_len = r_len = (size_t) init_size;
+    } else {
+        s_len = r_len = (size_t)init_size;
     }
 
     /*===============================================*/
     /* the displ is declared as int by MPI1 standard
        If c_info->num_procs*init_size  exceed INT_MAX value there is no way to run this sample
-     */
+       */
     if (
 #ifdef MPI1
-        !strcmp(Bmark->name,"Alltoallv")  ||
-        !strcmp(Bmark->name,"Allgatherv") ||
-        !strcmp(Bmark->name,"Scatterv")   ||
-        !strcmp(Bmark->name,"Gatherv")
+        !strcmp(Bmark->name, "Alltoallv") ||
+        !strcmp(Bmark->name, "Allgatherv") ||
+        !strcmp(Bmark->name, "Scatterv") ||
+        !strcmp(Bmark->name, "Gatherv")
 #elif defined NBC // MPI1
-        !strcmp(Bmark->name,"Ialltoallv")  || !strcmp(Bmark->name,"Ialltoallv_pure")  ||
-        !strcmp(Bmark->name,"Iallgatherv") || !strcmp(Bmark->name,"Iallgatherv_pure") ||
-        !strcmp(Bmark->name,"Iscatterv")   || !strcmp(Bmark->name,"Iscatterv_pure")   ||
-        !strcmp(Bmark->name,"Igatherv")    || !strcmp(Bmark->name,"Igatherv_pure")
+        !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure") ||
+        !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure") ||
+        !strcmp(Bmark->name, "Iscatterv") || !strcmp(Bmark->name, "Iscatterv_pure") ||
+        !strcmp(Bmark->name, "Igatherv") || !strcmp(Bmark->name, "Igatherv_pure")
 #else // NBC // MPI1
-       0
+        0
 #endif // NBC // MPI1
-       )
-    {
-        if( s_len > INT_MAX || r_len > INT_MAX) {
+        ) {
+        if (s_len > INT_MAX || r_len > INT_MAX) {
             Bmark->sample_failure = SAMPLE_FAILED_INT_OVERFLOW;
             return;
         }
@@ -766,26 +673,26 @@ In/out variables:
 
     /* IMB 3.1: new memory management for -off_cache */
     if (BMODE->type == Sync) {
-        ITERATIONS->use_off_cache=0;
-        ITERATIONS->n_sample=x_sample;
+        ITERATIONS->use_off_cache = 0;
+        ITERATIONS->n_sample = x_sample;
     } else {
 #ifdef MPIIO
-        ITERATIONS->use_off_cache=0;
+        ITERATIONS->use_off_cache = 0;
 #else  
         ITERATIONS->use_off_cache = ITERATIONS->off_cache;
 #endif  
         if (ITERATIONS->off_cache) {
-            if ( ITERATIONS->cache_size > 0) {
-                size_t cls = (size_t) ITERATIONS->cache_line_size;
-                size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls;
+            if (ITERATIONS->cache_size > 0) {
+                size_t cls = (size_t)ITERATIONS->cache_line_size;
+                size_t ofs = ((s_len + cls - 1) / cls + 1) * cls;
                 ITERATIONS->s_offs = ofs;
-                ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
-                ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
+                ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample, (2 * ITERATIONS->cache_size*CACHE_UNIT + ofs - 1) / ofs);
+                ofs = ((r_len + cls - 1) / cls + 1)*cls;
                 ITERATIONS->r_offs = ofs;
-                ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
+                ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample, (2 * ITERATIONS->cache_size*CACHE_UNIT + ofs - 1) / ofs);
             } else {
-                ITERATIONS->s_offs=ITERATIONS->r_offs=0;
-                ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
+                ITERATIONS->s_offs = ITERATIONS->r_offs = 0;
+                ITERATIONS->s_cache_iter = ITERATIONS->r_cache_iter = 1;
             }
         }
     }
@@ -794,46 +701,46 @@ In/out variables:
     s_alloc = s_len;
     r_alloc = r_len;
 #else
-    if( ITERATIONS->use_off_cache ) {
-        s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
-        r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
+    if (ITERATIONS->use_off_cache) {
+        s_alloc = max(s_len, ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
+        r_alloc = max(r_len, ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
     } else {
         s_alloc = s_len;
         r_alloc = r_len;
     }
 #endif
 
-    c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;
+    c_info->used_mem = 1.f*(s_alloc + r_alloc) / MEM_UNIT;
 
 #ifdef DEBUG 
     {
         size_t mx, mu;
 
-        mx = (size_t) MEM_UNIT*c_info->max_mem;
-        mu = (size_t) MEM_UNIT*c_info->used_mem;
-
-        DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
-        DBG_I2("max  / used memory ",mx,mu);
-        DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
-        DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); 
-        DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
-        DBGF_I2("Got send / recv lengths ",s_len,r_len);
-        DBGF_I2("max  / used memory ",mx,mu);
-        DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
-        DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); 
-        DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
+        mx = (size_t)MEM_UNIT*c_info->max_mem;
+        mu = (size_t)MEM_UNIT*c_info->used_mem;
+
+        DBG_I3("Got send / recv lengths; iters ", s_len, r_len, ITERATIONS->n_sample);
+        DBG_I2("max  / used memory ", mx, mu);
+        DBG_I2("send / recv offsets ", ITERATIONS->s_offs, ITERATIONS->r_offs);
+        DBG_I2("send / recv cache iterations ", ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
+        DBG_I2("send / recv buffer allocations ", s_alloc, r_alloc);
+        DBGF_I2("Got send / recv lengths ", s_len, r_len);
+        DBGF_I2("max  / used memory ", mx, mu);
+        DBGF_I2("send / recv offsets ", ITERATIONS->s_offs, ITERATIONS->r_offs);
+        DBGF_I2("send / recv cache iterations ", ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
+        DBGF_I2("send / recv buffer allocations ", s_alloc, r_alloc);
     }
 #endif
 
-    if( c_info->used_mem > c_info->max_mem ) {
-        Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
+    if (c_info->used_mem > c_info->max_mem) {
+        Bmark->sample_failure = SAMPLE_FAILED_MEMORY;
         return;
     }
 
-    if (s_alloc > 0  && r_alloc > 0) {
+    if (s_alloc > 0 && r_alloc > 0) {
         if (ITERATIONS->use_off_cache) {
             IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc);
-            IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);
+            IMB_set_buf(c_info, c_info->rank, 0, s_len - 1, 0, r_len - 1);
 
             for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) {
                 i_s = irep % ITERATIONS->s_cache_iter;
@@ -845,11 +752,11 @@ In/out variables:
                 memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len);
             }
         } else {
-            IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
+            IMB_set_buf(c_info, c_info->rank, 0, s_alloc - 1, 0, r_alloc - 1);
         }
     }
 
-    IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc));
+    IMB_init_transfer(c_info, Bmark, size, (MPI_Aint)max(s_alloc, r_alloc));
 
     /* Determine #iterations if dynamic adaptation requested */
     if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) {
@@ -866,21 +773,21 @@ In/out variables:
         }
 
         /* first, run 1 iteration only */
-        ITERATIONS->n_sample=1;
+        ITERATIONS->n_sample = 1;
 #ifdef MPI1
         c_info->select_source = Bmark->select_source;
 #endif
-        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
+        Bmark->Benchmark(c_info, size, ITERATIONS, BMODE, &time[0]);
 
         time[1] = time[0];
 
 #ifdef MPIIO
-        if( Bmark->access != no) {
-            ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
+        if (Bmark->access != no) {
+            ierr = MPI_File_seek(c_info->fh, 0, MPI_SEEK_SET);
             MPI_ERRHAND(ierr);
 
-            if( Bmark->fpointer == shared) {
-                ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
+            if (Bmark->fpointer == shared) {
+                ierr = MPI_File_seek_shared(c_info->fh, 0, MPI_SEEK_SET);
                 MPI_ERRHAND(ierr);
             }
         }
@@ -892,7 +799,8 @@ In/out variables:
             int rep_test = 1;
             if (time[0] < (1.0 / MSGSPERSAMPLE)) {
                 rep_test = MSGSPERSAMPLE;
-            } else if ((time[0] < 1.0)) {
+            }
+            else if ((time[0] < 1.0)) {
                 rep_test = (int)(1.0 / time[0] + 0.5);
             }
 
@@ -905,15 +813,15 @@ In/out variables:
 #ifdef MPI1
             c_info->select_source = Bmark->select_source;
 #endif
-            Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
+            Bmark->Benchmark(c_info, size, ITERATIONS, BMODE, &time[0]);
             time[1] = time[0];
 #ifdef MPIIO
-            if( Bmark->access != no) {
-                ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
+            if (Bmark->access != no) {
+                ierr = MPI_File_seek(c_info->fh, 0, MPI_SEEK_SET);
                 MPI_ERRHAND(ierr);
 
-                if ( Bmark->fpointer == shared) {
-                    ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
+                if (Bmark->fpointer == shared) {
+                    ierr = MPI_File_seek_shared(c_info->fh, 0, MPI_SEEK_SET);
                     MPI_ERRHAND(ierr);
                 }
             }
@@ -923,109 +831,102 @@ In/out variables:
         }
 
         {
-            float val = (float) (1+ITERATIONS->secs/time[0]);
-            t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff))
-                        ? (int)val
-                        : selected_n_sample;
+            float val = (float)(1 + ITERATIONS->secs / time[0]);
+            t_sample = (time[0] > 1.e-8 && (val <= (float)0x7fffffff))
+                ? (int)val
+                : selected_n_sample;
         }
 
-        if (c_info->n_lens>0 && BMODE->type != Sync) {
+        if (c_info->n_lens > 0 && BMODE->type != Sync) {
             // check monotonicity with msg sizes 
             int i;
             for (i = 0; i < iter; i++) {
-                t_sample = ( c_info->msglen[i] < size )
-                            ? min(t_sample,ITERATIONS->numiters[i])
-                            : max(t_sample,ITERATIONS->numiters[i]);
+                t_sample = (c_info->msglen[i] < size)
+                           ? min(t_sample, ITERATIONS->numiters[i])
+                           : max(t_sample, ITERATIONS->numiters[i]);
             }
             ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min(selected_n_sample, t_sample);
         } else {
             ITERATIONS->n_sample = min(selected_n_sample,
-                                        min(ITERATIONS->n_sample_prev, t_sample));
+                min(ITERATIONS->n_sample_prev, t_sample));
         }
 
         MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);
 
 #ifdef DEBUG
         {
-            int usec=time*1000000;
+            int usec = time * 1000000;
 
-            DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
-            DBGF_I1("=> # samples, aligned with previous ",t_sample);
-            DBGF_I1("final #samples ",ITERATIONS->n_sample);
+            DBGF_I2("Checked time with #iters / usec ", acc_rep_test, usec);
+            DBGF_I1("=> # samples, aligned with previous ", t_sample);
+            DBGF_I1("final #samples ", ITERATIONS->n_sample);
         }
 #endif
     } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/
         double time[MAX_TIME_ID];
-        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
+        Bmark->Benchmark(c_info, size, ITERATIONS, BMODE, &time[0]);
     }
 
-    ITERATIONS->n_sample_prev=ITERATIONS->n_sample;
+    ITERATIONS->n_sample_prev = ITERATIONS->n_sample;
 
-/* >> IMB 3.1  */
+    /* >> IMB 3.1  */
 
 }
 
 /********************************************************************/
 
-extern char *bmark_names_from_input_file[100];
+extern char *duplicated_benchmark_names[1000];
 
 /* IMB 3.1 << */
-void IMB_free_all(struct comm_info* c_info, struct Bench** P_BList, struct iter_schedule* ITERATIONS)
+void IMB_free_all(struct comm_info* c_info, struct Bench** P_BList, struct iter_schedule* ITERATIONS) {
 /* >> IMB 3.1  */
 /*
 
-                      
                       Free-s all allocated memory in c_info and P_Blist
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--P_BList              (type struct Bench**)                      
+-P_BList              (type struct Bench**)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
 
 -ITERATIONS           (type struct iter_schedule*)
                       Iteration scheduling object
-                      
-
 
 */
-{
     int i;
-    for (i = 0; i < 100; i++) {
-        free(bmark_names_from_input_file[i]);
-        bmark_names_from_input_file[i] = NULL;
+    for (i = 0; i < 1000; i++) {
+        if (duplicated_benchmark_names[i] != NULL)
+            free(duplicated_benchmark_names[i]);
+        duplicated_benchmark_names[i] = NULL;
     }
     IMB_del_s_buf(c_info);
     IMB_del_r_buf(c_info);
 
     IMB_v_free((void**)&c_info->msglen);
-/* IMB 3.1 << */
+    /* IMB 3.1 << */
     IMB_v_free((void**)&ITERATIONS->numiters);
-/* >> IMB 3.1  */
+    /* >> IMB 3.1  */
 
     IMB_v_free((void**)&c_info->g_sizes);
     IMB_v_free((void**)&c_info->g_ranks);
 
-    IMB_v_free((void**)&c_info->sndcnt); 
+    IMB_v_free((void**)&c_info->sndcnt);
     IMB_v_free((void**)&c_info->sdispl);
 
-    IMB_v_free((void**)&c_info->reccnt); 
+    IMB_v_free((void**)&c_info->reccnt);
     IMB_v_free((void**)&c_info->rdispl);
 
-    if( c_info->communicator != MPI_COMM_NULL && 
-	c_info->communicator != MPI_COMM_SELF &&
-	c_info->communicator != MPI_COMM_WORLD )
-    {
-	IMB_del_errhand(c_info);
-	MPI_Comm_free(&c_info->communicator);
+    if (c_info->communicator != MPI_COMM_NULL &&
+        c_info->communicator != MPI_COMM_SELF &&
+        c_info->communicator != MPI_COMM_WORLD) {
+        IMB_del_errhand(c_info);
+        MPI_Comm_free(&c_info->communicator);
     }
 
     IMB_destruct_blist(P_BList);
@@ -1033,12 +934,17 @@ In/out variables:
 #ifdef MPIIO
     IMB_free_file(c_info);
 #endif
-    if( all_times ) {IMB_v_free ((void**)&all_times);}
+    if (all_times)
+        IMB_v_free((void**)&all_times);
 
 #ifdef CHECK
-    if( all_defect ) {IMB_v_free ((void**)&all_defect);}
+    if (all_defect)
+        IMB_v_free((void**)&all_defect);
 
-    if( AUX_LEN > 0 ) {IMB_v_free((void**)&AUX); AUX_LEN = 0;}
+    if (AUX_LEN > 0) {
+        IMB_v_free((void**)&AUX);
+        AUX_LEN = 0; 
+    }
 #endif
 
 #ifdef DEBUG
@@ -1047,72 +953,54 @@ In/out variables:
 #endif
 }
 
-
-void IMB_del_s_buf(struct comm_info* c_info )
+void IMB_del_s_buf(struct comm_info* c_info ) {
 /*
 
-                      
                       Deletes send buffer component of c_info
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-
 
 */
-{
 /* July 2002 V2.2.1 change: use MPI_Free_mem */
-    if ( c_info->s_alloc> 0)
-    {
+    if (c_info->s_alloc > 0) {
 #if (defined EXT || defined MPIIO || defined RMA)
-	MPI_Free_mem( c_info->s_buffer );
+        MPI_Free_mem(c_info->s_buffer);
 #else
-	IMB_v_free( (void**)&c_info->s_buffer );
+        IMB_v_free((void**)&c_info->s_buffer);
 #endif
 
-	c_info-> s_alloc = 0;
-	c_info->s_buffer = NULL;
+        c_info->s_alloc = 0;
+        c_info->s_buffer = NULL;
 
     }
 }
 
-
-
-
-void IMB_del_r_buf(struct comm_info* c_info )
+void IMB_del_r_buf(struct comm_info* c_info){
 /*
 
-                      
                       Deletes recv buffer component of c_info
-                      
-
 
-In/out variables: 
+In/out variables:
 
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
-
 
 */
-{
 /* July 2002 V2.2.1 change: use MPI_Free_mem */
-    if ( c_info->r_alloc> 0)
-    {
+    if (c_info->r_alloc > 0) {
 #if (defined EXT || defined MPIIO || defined RMA)
-	MPI_Free_mem( c_info->r_buffer );
+        MPI_Free_mem(c_info->r_buffer);
 #else
-	IMB_v_free( (void**)&c_info->r_buffer );
+        IMB_v_free((void**)&c_info->r_buffer);
 #endif
 
-	c_info-> r_alloc = 0;
-	c_info->r_buffer = NULL;
+        c_info->r_alloc = 0;
+        c_info->r_buffer = NULL;
 
     }
 }
diff --git a/src/IMB_ones_accu.c b/src_c/IMB_ones_accu.c
similarity index 54%
rename from src/IMB_ones_accu.c
rename to src_c/IMB_ones_accu.c
index 8aef7572..1d6a0b86 100644
--- a/src/IMB_ones_accu.c
+++ b/src_c/IMB_ones_accu.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -94,159 +93,136 @@ Hans-Joachim Plum, Intel GmbH
 /* ===================================================================== */
 
 
-void IMB_accumulate (struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-                     MODES RUN_MODE, double* time)
+void IMB_accumulate(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time) {
 /*
 
-                      
                       MPI-2 benchmark kernel
                       Benchmarks MPI_Accumulate
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--size                 (type int)                      
+-size                 (type int)
                       Basic message size in bytes
 
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 
--RUN_MODE             (type MODES)                      
+-RUN_MODE             (type MODES)
                       Mode (aggregate/non aggregate; blocking/nonblocking);
                       see "IMB_benchmark.h" for definition
 
+Output variables:
 
-Output variables: 
-
--time                 (type double*)                      
+-time                 (type double*)
                       Timing result per sample
 
-
 */
-{
-  double t1, t2;
-  
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-/* IMB 3.1 << */
-  int r_off;
-/* >> IMB 3.1  */
-  int s_tag, r_tag;
-  int dest, source, root;
-  int i;
-  MPI_Status stat;
+    double t1, t2;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    /* IMB 3.1 << */
+    int r_off;
+    /* >> IMB 3.1  */
+    int s_tag, r_tag;
+    int dest, source, root;
+    int i;
+    MPI_Status stat;
 
 
 #ifdef CHECK 
-  defect=0;
+    defect = 0;
 #endif
-  ierr = 0;
-
-  /*  GET SIZE OF DATA TYPE */  
-MPI_Type_size(c_info->red_data_type,&s_size);
+    ierr = 0;
 
-/* IMB 3.1 << */
-s_num=size/s_size;
-r_size=s_size;
-r_num=s_num;
-r_off=ITERATIONS->r_offs/r_size;
-/* >> IMB 3.1  */
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->red_data_type, &s_size);
 
-root = (c_info-> rank == 0);
+    /* IMB 3.1 << */
+    s_num = size / s_size;
+    r_size = s_size;
+    r_num = s_num;
+    r_off = ITERATIONS->r_offs / r_size;
+    /* >> IMB 3.1  */
 
-if( c_info-> rank < 0 )
-*time = 0.;
-else
-{
+    root = (c_info->rank == 0);
 
-if( !RUN_MODE->AGGREGATE )
-{
+    if (c_info->rank < 0)
+        *time = 0.;
+    else {
+        if (!RUN_MODE->AGGREGATE) {
 
-*time = MPI_Wtime();
+            *time = MPI_Wtime();
 
-for(i=0;i< ITERATIONS->n_sample;i++)
-	{
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                ierr = MPI_Accumulate((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                      s_num, c_info->red_data_type,
+                                      0, i % ITERATIONS->r_cache_iter * r_off,
+                                      r_num, c_info->red_data_type, c_info->op_type,
+                                      c_info->WIN);
+                MPI_ERRHAND(ierr);
 
-       ierr = MPI_Accumulate(
-                 (char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                 s_num, c_info->red_data_type,
-                 0, i%ITERATIONS->r_cache_iter*r_off,
-                 r_num, c_info->red_data_type, c_info->op_type,
-                 c_info->WIN );
-       MPI_ERRHAND(ierr);
-
-       ierr = MPI_Win_fence(0, c_info->WIN);
-       MPI_ERRHAND(ierr);
+                ierr = MPI_Win_fence(0, c_info->WIN);
+                MPI_ERRHAND(ierr);
 #ifdef CHECK
-if( root ) 
-{
-       CHK_DIFF("Accumulate",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                0, size, size, asize, 
-                put, 0, ITERATIONS->n_sample, i,
-                -1, &defect);
-       IMB_ass_buf((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, 
-		   (size>0)? size-1 : 0, 0);
-}
-MPI_Barrier(c_info->communicator);
+                if (root) {
+                    CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                             0, size, size, asize,
+                             put, 0, ITERATIONS->n_sample, i,
+                             -1, &defect);
+                    IMB_ass_buf((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0,
+                                (size > 0) ? size - 1 : 0, 0);
+                }
+                MPI_Barrier(c_info->communicator);
 #endif
 
-	}
-*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
-}
+            }
+            *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
+        }
 
-if( RUN_MODE->AGGREGATE )
-{
+        if (RUN_MODE->AGGREGATE) {
 
-for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
+            for (i = 0; i < N_BARR; i++)
+                MPI_Barrier(c_info->communicator);
 
-*time = MPI_Wtime();
+            *time = MPI_Wtime();
 
 #ifdef CHECK
-for(i=0;i< ITERATIONS->r_cache_iter; i++)
+            for (i = 0; i < ITERATIONS->r_cache_iter; i++)
 #else
-for(i=0;i< ITERATIONS->n_sample;i++)
+            for (i = 0; i < ITERATIONS->n_sample; i++)
 #endif
-	{
-
-       ierr = MPI_Accumulate(
-                 (char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                 s_num, c_info->red_data_type,
-                 0, i%ITERATIONS->r_cache_iter*r_off,
-                 r_num, c_info->red_data_type, c_info->op_type,
-                 c_info->WIN );
-       MPI_ERRHAND(ierr);
-
-	}
+            {
+                ierr = MPI_Accumulate(
+                    (char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                    s_num, c_info->red_data_type,
+                    0, i%ITERATIONS->r_cache_iter*r_off,
+                    r_num, c_info->red_data_type, c_info->op_type,
+                    c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
 
-       ierr = MPI_Win_fence(0, c_info->WIN);
-       MPI_ERRHAND(ierr);
+            ierr = MPI_Win_fence(0, c_info->WIN);
+            MPI_ERRHAND(ierr);
 
-*time=(MPI_Wtime()-*time)/ITERATIONS->n_sample;
+            *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
 
 #ifdef CHECK
-if( root ) 
-{
-    for(i=0;i< ITERATIONS->r_cache_iter; i++)
-    {
-	CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-    		 0, size, size, asize, 
-                 put, 0, ITERATIONS->n_sample, i,
-	         -1, &defect);
-        IMB_ass_buf((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, (size>0)? size-1 : 0, 0);
-	     
-    }
-}
+            if (root) {
+                for (i = 0; i < ITERATIONS->r_cache_iter; i++) {
+                    CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                             0, size, size, asize,
+                             put, 0, ITERATIONS->n_sample, i,
+                             -1, &defect);
+                    IMB_ass_buf((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, (size>0) ? size - 1 : 0, 0);
+                }
+            }
 #endif
-
-
-
-}
-
-}
+        }
+    }
 }
diff --git a/src_c/IMB_ones_bidir.c b/src_c/IMB_ones_bidir.c
new file mode 100644
index 00000000..4570be3d
--- /dev/null
+++ b/src_c/IMB_ones_bidir.c
@@ -0,0 +1,234 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_ones_bidir.c 
+
+ Implemented functions: 
+
+ IMB_bidir_get;
+ IMB_bidir_put;
+
+ ***************************************************************************/
+
+
+/* ===================================================================== */
+/* 
+IMB 3.1 changes
+July 2007
+Hans-Joachim Plum, Intel GmbH
+
+- replace "int n_sample" by iteration scheduling object "ITERATIONS"
+  (see => IMB_benchmark.h)
+
+- proceed with offsets in send / recv buffers to eventually provide
+  out-of-cache data
+*/
+/* ===================================================================== */
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+/*************************************************************************/
+
+
+
+void IMB_bidir_get(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                   MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-2 benchmark kernel
+                          Driver for aggregate / non agg. bidirectional MPI_Get benchmarks
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    double t1, t2;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int dest, source, sender;
+    MPI_Status stat;
+
+    ierr = 0;
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    if (c_info->rank == c_info->pair0) {
+        dest = c_info->pair1;
+    } else if (c_info->rank == c_info->pair1) {
+        dest = c_info->pair0;
+    } else {
+        dest = -1;
+    }
+    sender = 0;
+
+    if (!RUN_MODE->AGGREGATE)
+        IMB_ones_get(c_info,
+                     s_num, dest,
+                     r_num, sender,
+                     size, ITERATIONS,
+                     time);
+    if (RUN_MODE->AGGREGATE)
+        IMB_ones_mget(c_info,
+                      s_num, dest,
+                      r_num, sender,
+                      size, ITERATIONS,
+                      time);
+}
+
+
+
+
+void IMB_bidir_put(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                   MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-2 benchmark kernel
+                          Driver for aggregate / non agg. bidirectional MPI_Put benchmarks
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    double t1, t2;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int dest, source, sender;
+    int ierr;
+    MPI_Status stat;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    if (c_info->rank == c_info->pair0) {
+        dest = c_info->pair1;
+    } else if (c_info->rank == c_info->pair1) {
+        dest = c_info->pair0;
+    } else {
+        dest = -1;
+    }
+
+
+    sender = 1;
+
+    if (!RUN_MODE->AGGREGATE)
+        IMB_ones_put(c_info,
+                     s_num, dest,
+                     r_num, sender,
+                     size, ITERATIONS,
+                     time);
+
+    if (RUN_MODE->AGGREGATE)
+        IMB_ones_mput(c_info,
+                      s_num, dest,
+                      r_num, sender,
+                      size, ITERATIONS,
+                      time);
+}
diff --git a/src_c/IMB_ones_unidir.c b/src_c/IMB_ones_unidir.c
new file mode 100644
index 00000000..e4e78dd1
--- /dev/null
+++ b/src_c/IMB_ones_unidir.c
@@ -0,0 +1,595 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_ones_unidir.c 
+
+ Implemented functions: 
+
+ IMB_unidir_put;
+ IMB_unidir_get;
+ IMB_ones_get;
+ IMB_ones_mget;
+ IMB_ones_put;
+ IMB_ones_mput;
+
+ ***************************************************************************/
+
+
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+/*************************************************/
+
+
+/* ===================================================================== */
+/* 
+IMB 3.1 changes
+July 2007
+Hans-Joachim Plum, Intel GmbH
+
+- replace "int n_sample" by iteration scheduling object "ITERATIONS"
+  (see => IMB_benchmark.h)
+
+- proceed with offsets in send / recv buffers to eventually provide
+  out-of-cache data
+*/
+/* ===================================================================== */
+
+
+void IMB_unidir_put(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-2 benchmark kernel
+                          Driver for aggregate / non agg. unidirectional MPI_Put benchmarks
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    double t1, t2;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int dest, source, sender;
+    MPI_Status stat;
+
+    ierr = 0;
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    if (c_info->rank == c_info->pair0) {
+        dest = c_info->pair1;
+        sender = 1;
+    } else if (c_info->rank == c_info->pair1) {
+        dest = c_info->pair0;
+        sender = 0;
+    } else {
+        dest = -1;
+        sender = -1;
+    }
+
+    if (!RUN_MODE->AGGREGATE)
+        IMB_ones_put(c_info,
+                     s_num, dest,
+                     r_num, sender,
+                     size, ITERATIONS,
+                     time);
+    if (RUN_MODE->AGGREGATE)
+        IMB_ones_mput(c_info,
+                      s_num, dest,
+                      r_num, sender,
+                      size, ITERATIONS,
+                      time);
+}
+
+/*************************************************************************/
+
+
+
+void IMB_unidir_get(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-2 benchmark kernel
+                          Driver for aggregate / non agg. unidirectional MPI_Get benchmarks
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    double t1, t2;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int dest, source, sender;
+    int ierr;
+    MPI_Status stat;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    if (c_info->rank == c_info->pair0) {
+        dest = c_info->pair1;
+        sender = 1;
+    } else if (c_info->rank == c_info->pair1) {
+        dest = c_info->pair0;
+        sender = 0;
+    } else {
+        dest = -1;
+        sender = -1;
+    }
+
+
+    if (!RUN_MODE->AGGREGATE)
+        IMB_ones_get(c_info,
+                     s_num, dest,
+                     r_num, sender,
+                     size, ITERATIONS,
+                     time);
+    if (RUN_MODE->AGGREGATE)
+        IMB_ones_mget(c_info,
+                      s_num, dest,
+                      r_num, sender,
+                      size, ITERATIONS,
+                      time);
+}
+
+
+
+
+void IMB_ones_get(struct comm_info* c_info, int s_num, int dest,
+                  int r_num, int sender, int size,
+                  struct iter_schedule *ITERATIONS, double* time) {
+/*
+
+                          Non aggregate MPI_Get + MPI_Win_fence
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-s_num                    (type int)
+                          #buffer entries to put if relevant for calling process
+
+-dest                     (type int)
+                          destination rank
+
+-r_num                    (type int)
+                          #buffer entries to get if relevant for calling process
+
+-sender                   (type int)
+                          logical flag: 1/0 for 'local process puts/gets'
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    int i, ierr;
+    int s_size;
+
+#ifdef CHECK 
+    defect = 0;
+#endif
+
+    MPI_Type_size(c_info->s_data_type, &s_size);
+
+    if (c_info->rank < 0)
+        *time = 0.;
+    else {
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time = MPI_Wtime();
+
+        if (sender) {
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                /* "Send ", i.e. synchronize window */
+                ierr = MPI_Win_fence(0, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+        } else {
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                /* "Receive" */
+                ierr = MPI_Get((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                               r_num, c_info->r_data_type,
+                               dest, i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                               s_num, c_info->s_data_type, c_info->WIN);
+                ierr = MPI_Win_fence(0, c_info->WIN);
+
+                MPI_ERRHAND(ierr);
+
+                DIAGNOSTICS("MPI_Get: ", c_info, c_info->r_buffer, r_num, r_num, i, 0);
+
+                CHK_DIFF("MPI_Get", c_info, (void*)((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 0,
+                         size, size, asize,
+                         get, 0, ITERATIONS->n_sample, i,
+                         dest, &defect);
+            }
+        }
+        *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
+    }
+}
+
+void IMB_ones_mget(struct comm_info* c_info, int s_num, int dest,
+                   int r_num, int sender, int size,
+                   struct iter_schedule* ITERATIONS, double* time) {
+/*
+
+                          Aggregate MPI_Get + MPI_Win_fence
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-s_num                    (type int)
+                          #buffer entries to put if relevant for calling process
+
+-dest                     (type int)
+                          destination rank
+
+-r_num                    (type int)
+                          #buffer entries to get if relevant for calling process
+
+-sender                   (type int)
+                          logical flag: 1/0 for 'local process puts/gets'
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    int i, ierr;
+    char* recv;
+
+#ifdef CHECK 
+    defect = 0;
+#endif
+
+    if (c_info->rank < 0)
+        *time = 0.;
+    else {
+        recv = (char*)c_info->r_buffer;
+
+        ierr = MPI_Win_fence(0, c_info->WIN);
+        MPI_ERRHAND(ierr);
+
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time = MPI_Wtime();
+
+        if (!sender)
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                ierr = MPI_Get((void*)(recv + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs),
+                               r_num, c_info->r_data_type,
+                               dest, i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                               s_num, c_info->s_data_type, c_info->WIN);
+            }
+        ierr = MPI_Win_fence(0, c_info->WIN);
+        MPI_ERRHAND(ierr);
+
+        *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
+
+#ifdef CHECK
+        if (!sender) {
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                CHK_DIFF("MPI_Get", c_info, (void*)((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 0,
+                         size, size, asize, 
+                         get, 0, ITERATIONS->n_sample, i,
+                         dest, &defect);
+            }
+        }
+#endif
+    }
+}
+
+void IMB_ones_put(struct comm_info* c_info, int s_num, int dest,
+                  int r_num, int sender, int size,
+                  struct iter_schedule* ITERATIONS, double* time) {
+/*
+
+                          Non aggregate MPI_Put + MPI_Win_fence
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-s_num                    (type int)
+                          #buffer entries to put if relevant for calling process
+
+-dest                     (type int)
+                          destination rank
+
+-r_num                    (type int)
+                          #buffer entries to get if relevant for calling process
+
+-sender                   (type int)
+                          logical flag: 1/0 for 'local process puts/gets'
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    int i, ierr, r_size;
+    char* recv;
+
+#ifdef CHECK 
+    defect = 0;
+#endif
+
+    MPI_Type_size(c_info->r_data_type, &r_size);
+
+    recv = (char*)c_info->r_buffer;
+
+    if (c_info->rank < 0)
+        *time = 0.;
+    else {
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time = MPI_Wtime();
+
+        if (sender) {
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                /* Send */
+                ierr = MPI_Put((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                               s_num, c_info->s_data_type,
+                               dest, i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                               r_num, c_info->r_data_type, c_info->WIN);
+                ierr = MPI_Win_fence(0, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+        } else {
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                /* "Receive", i.e. synchronize the window */
+                ierr = MPI_Win_fence(0, c_info->WIN);
+                MPI_ERRHAND(ierr);
+
+                DIAGNOSTICS("MPI_Put: ", c_info, c_info->r_buffer, r_num, r_num, i, 0);
+
+                CHK_DIFF("MPI_Put", c_info, (void*)(recv + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 0,
+                    size, size, asize,
+                    get, 0, ITERATIONS->n_sample, i,
+                    dest, &defect);
+            }
+        }
+        *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
+    }
+}
+
+
+
+
+void IMB_ones_mput(struct comm_info* c_info, int s_num, int dest,
+                   int r_num, int sender, int size,
+                   struct iter_schedule* ITERATIONS, double* time) {
+/*
+
+                          Aggregate MPI_Put + MPI_Win_fence
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-s_num                    (type int)
+                          #buffer entries to put if relevant for calling process
+
+-dest                     (type int)
+                          destination rank
+
+-r_num                    (type int)
+                          #buffer entries to get if relevant for calling process
+
+-sender                   (type int)
+                          logical flag: 1/0 for 'local process puts/gets'
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    int i, ierr;
+    char* send, *recv;
+
+#ifdef CHECK 
+    defect = 0;
+#endif
+
+    if (c_info->rank < 0)
+        *time = 0.;
+    else {
+        send = (char*)c_info->s_buffer;
+        recv = (char*)c_info->r_buffer;
+
+        ierr = MPI_Win_fence(0, c_info->WIN);
+        MPI_ERRHAND(ierr);
+
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time = MPI_Wtime();
+
+        if (sender)
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                ierr = MPI_Put((void*)(send + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs),
+                               s_num, c_info->s_data_type,
+                               dest, i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                               r_num, c_info->r_data_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+
+        ierr = MPI_Win_fence(0, c_info->WIN);
+        MPI_ERRHAND(ierr);
+
+        *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
+
+        if (!sender)
+            for (i = 0; i < ITERATIONS->n_sample; i++) {
+                CHK_DIFF("MPI_Put", c_info, (void*)(recv + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs), 0,
+                         size, size, asize,
+                         get, 0, ITERATIONS->n_sample, i,
+                         dest, &defect);
+            }
+    }
+}
diff --git a/src/IMB_open_close.c b/src_c/IMB_open_close.c
similarity index 67%
rename from src/IMB_open_close.c
rename to src_c/IMB_open_close.c
index fc98e77c..5942f15e 100644
--- a/src/IMB_open_close.c
+++ b/src_c/IMB_open_close.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -94,68 +93,57 @@ Hans-Joachim Plum, Intel GmbH
 /* ===================================================================== */
 
 void IMB_open_close(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                    MODES RUN_MODE, double* time)
+                    MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-IO benchmark kernel
-                      MPI_File_open + MPI_File_close
-                      
+                          MPI-IO benchmark kernel
+                          MPI_File_open + MPI_File_close
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)                      
-                      Basic message size in bytes
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
 
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  double t1, t2;
-  int    i, dum;
-  MPI_Status stat;
-
-  ierr = 0;
-
-  if(c_info->rank!=-1)
-    {
-      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-      t1 = MPI_Wtime();
-      for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-        ierr = MPI_File_open(c_info->File_comm, c_info->filename,
-                             c_info->amode, c_info->info, &c_info->fh);
-        MPI_ERRHAND(ierr);
-        ierr=MPI_File_write
-          (c_info->fh, (void*)&dum, 1 ,c_info->etype,&stat);
-        ierr = MPI_File_close(&c_info->fh);
-        MPI_ERRHAND(ierr);
-	}
-      t2 = MPI_Wtime();
-      *time=(t2 - t1)/(ITERATIONS->n_sample);
-    }
-  else
-    { 
-      *time = 0.; 
+    double t1, t2;
+    int    i, dum;
+    MPI_Status stat;
+
+    ierr = 0;
+
+    if (c_info->rank != -1) {
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        t1 = MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_File_open(c_info->File_comm, c_info->filename,
+                                 c_info->amode, c_info->info, &c_info->fh);
+            MPI_ERRHAND(ierr);
+            ierr = MPI_File_write(c_info->fh, (void*)&dum, 1, c_info->etype, &stat);
+            ierr = MPI_File_close(&c_info->fh);
+            MPI_ERRHAND(ierr);
+        }
+        t2 = MPI_Wtime();
+        *time = (t2 - t1) / (ITERATIONS->n_sample);
+    } else {
+        *time = 0.;
     }
 }
 
diff --git a/src_c/IMB_output.c b/src_c/IMB_output.c
new file mode 100644
index 00000000..05feaf88
--- /dev/null
+++ b/src_c/IMB_output.c
@@ -0,0 +1,1347 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_output.c 
+
+ Implemented functions: 
+
+ IMB_output;
+ IMB_display_times;
+ IMB_calculate_times;
+ IMB_show_selections;
+ IMB_show_procids;
+ IMB_print_array;
+ IMB_print_int_row;
+ IMB_print_info;
+ IMB_print_header;
+ IMB_edit_format;
+ IMB_make_line;
+
+New in IMB_3.0:
+ IMB_help;
+
+ ***************************************************************************/
+
+#include <string.h>
+#include <float.h> // DBL_MAX
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+enum output_format {
+    /* print msg size, number of iterations, time and bandwidth */
+    OUT_TIME_AND_BW,
+
+    /* print msg size, number of iterations, bandwidth and msg rate */
+    OUT_BW_AND_MSG_RATE,
+
+    /* print msg size, number of iterations,
+     * min, max and avrg times (among all ranks) and bandwidth */
+    OUT_TIME_RANGE_AND_BW,
+
+    /* print msg size, number of iterations
+     * min, max and avrg times (among all ranks) */
+    OUT_TIME_RANGE,
+
+    /* print pure communication time, total time, computation time and
+       the overlap of computation and communication (in %-s) */
+    OUT_OVERLAP,
+
+    /* It is used for operations where msg size is not relevant
+     * (for instance Barrier, Ibarrier). The format may differ for
+     * different benchmarks, and msg size is not printed. */
+    OUT_SYNC
+};
+
+
+/*****************************************************************/
+
+
+
+/* IMB 3.1 << */
+/*
+Introduce new ITERATIONS object
+*/
+void IMB_output(struct comm_info* c_info, struct Bench* Bmark, MODES BMODE,
+                int header, int size, struct iter_schedule* ITERATIONS,
+                double *time) {
+/* >> IMB 3.1  */
+/*
+
+Input variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+                      
+-Bmark                (type struct Bench*)
+                      (For explanation of struct Bench type:
+                      describes all aspects of modes of a benchmark;
+                      see [1] for more information)
+
+                      The actual benchmark
+                      
+-BMODE                (type MODES)
+                      The actual benchmark mode (if relevant; only MPI-2 case, see [1])
+                      
+-header               (type int)
+                      1/0 for do/don't print table headers
+                      
+-size                 (type int)
+                      Benchmark message size
+
+-ITERATIONS           (type struct iter_schedule)
+                      Benchmark repetition descr. object
+
+
+-time                 (type double *)
+                      Benchmark timing outcome
+                      3 numbers (min/max/average)
+
+*/
+    double scaled_time[MAX_TIME_ID];
+
+    int i, i_gr;
+    int li_len;
+    int out_format;
+
+    const int DO_OUT = (c_info->w_rank == 0) ? 1 : 0;
+    const int GROUP_OUT = (c_info->group_mode > 0) ? 1 : 0;
+
+    ierr = 0;
+
+    if (DO_OUT) {
+        /* Fix IMB_1.0.1: NULL all_times before allocation */
+        IMB_v_free((void**)&all_times);
+
+        all_times = (double*)IMB_v_alloc(c_info->w_num_procs * Bmark->Ntimes * sizeof(double), "Output 1");
+#ifdef CHECK
+        if (!all_defect) {
+            all_defect = (double*)IMB_v_alloc(c_info->w_num_procs * sizeof(double), "Output 1");
+            for (i = 0; i < c_info->w_num_procs; i++)
+                all_defect[i] = 0.;
+        }
+#endif   
+    } /*if (DO_OUT)*/
+
+    /* Scale the timings */
+    for (i = 0; i < Bmark->Ntimes; i++)
+        scaled_time[i] = time[i] * SCALE * Bmark->scale_time;
+
+    /* collect all times  */
+    ierr = MPI_Gather(scaled_time, Bmark->Ntimes, MPI_DOUBLE, all_times, Bmark->Ntimes, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_ERRHAND(ierr);
+
+#ifdef CHECK      
+    /* collect all defects */
+    ierr = MPI_Gather(&defect, 1, MPI_DOUBLE, all_defect, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+    MPI_ERRHAND(ierr);
+#endif
+
+    if (DO_OUT) {
+        BTYPES type = Bmark->RUN_MODES[0].type;
+        const int n_groups = GROUP_OUT ? c_info->n_groups : 1;
+
+        if (Bmark->RUN_MODES[0].NONBLOCKING && type != Sync) {
+            out_format = OUT_OVERLAP;
+        } else if ((type == SingleTransfer && c_info->group_mode != 0) ||
+                    type == MultPassiveTransfer ||
+                    (type == SingleElementTransfer && c_info->group_mode != 0)) {
+            out_format = OUT_TIME_AND_BW;
+        } else if (type == ParallelTransfer || type == SingleTransfer || type == SingleElementTransfer) {
+            out_format = OUT_TIME_RANGE_AND_BW;
+        } else if (type == ParallelTransferMsgRate) {
+            out_format = OUT_BW_AND_MSG_RATE;
+        } else if (type == Collective) {
+#ifdef MPIIO
+            out_format = OUT_TIME_RANGE_AND_BW;
+#else
+            out_format = OUT_TIME_RANGE;
+#endif
+        } else {
+            out_format = OUT_SYNC;
+        }
+
+        if (header) {
+            IMB_print_header(out_format, Bmark, c_info, BMODE);
+        }
+
+        if (GROUP_OUT) {
+            fprintf(unit, "\n");
+        }
+
+        for (i_gr = 0; i_gr < n_groups; i_gr++) {
+            IMB_display_times(Bmark, all_times, c_info, i_gr, ITERATIONS->n_sample, size, out_format);
+        }
+    } /*if( DO_OUT )*/
+}
+
+
+/*****************************************************************/
+void IMB_display_times(struct Bench* Bmark, double* tlist, struct comm_info* c_info,
+                       int group, int n_sample, int size, int out_format) {
+/*
+
+Input variables:
+
+-Bmark                (type struct Bench*)
+                      (For explanation of struct Bench type:
+                      describes all aspects of modes of a benchmark;
+                      see [1] for more information)
+
+                      The actual benchmark
+
+
+-tlist                (type double*)
+                      Benchmark timing outcome
+                      3 numbers (min/max/average)
+
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+
+-group                (type int)
+                      Index of group to be displayed (multi-case only)
+
+
+-n_sample             (type int)
+                      Benchmark repetition number
+
+
+-size                 (type int)
+                      Benchmark message size
+
+
+-out_format           (type int)
+                      Code for table formatting details
+
+*/
+    int i, offset = 0, peers;
+    static double MEGA = 1.0 / 1e6;
+
+    double throughput = 0.;
+    double overlap = 0.;
+    double t_pure = 0.;
+    double t_ovrlp = 0.;
+    double t_comp = 0.;
+    double msgrate = 0;
+
+    Timing timing[MAX_TIME_ID]; // min, max and avg
+#ifdef CHECK
+    double defect = 0.;
+#endif
+    memset(&timing, 0, MAX_TIME_ID * sizeof(timing[MIN]));
+
+
+    if (c_info->g_sizes[group] <= 0) {
+        return;
+    }
+
+#ifdef CHECK
+    IMB_calculate_times(Bmark->Ntimes, c_info, group, tlist, timing, &defect);
+#else
+    IMB_calculate_times(Bmark->Ntimes, c_info, group, tlist, timing);
+#endif
+
+#ifdef NBC
+    if (!strstr(Bmark->name, "_pure")) {
+        const size_t rank_index = timing[MAX].offset[OVRLP];
+        t_pure = tlist[rank_index + PURE];
+        t_ovrlp = tlist[rank_index + OVRLP];
+        t_comp = tlist[rank_index + COMP];
+        overlap = 100. * max(0., min(1., (t_pure + t_comp - t_ovrlp) / max(t_pure, t_comp)));
+    }
+
+#elif defined RMA
+    /* RMA benchmarks which test truly passive synchronisation presence */
+    if (Bmark->RUN_MODES[0].NONBLOCKING) {
+        /* Time when the target was inside MPI stack */
+        t_pure = timing[MAX].times[PURE];
+
+        /* Time when the target was calculating something outside the MPI stack
+         * for a while and then entered the MPI stack */
+        t_ovrlp = timing[MAX].times[OVRLP];
+    }
+
+#else // NBC || RMA
+    if (Bmark->RUN_MODES[0].NONBLOCKING) {
+        t_pure = timing[MAX].times[PURE];
+        t_ovrlp = timing[MAX].times[OVRLP];
+        t_comp = tCPU;
+        overlap = 100.* max(0, min(1, (t_pure + t_comp - t_ovrlp) / min(t_pure, t_comp)));
+    }
+#endif // NBC || RMA
+
+    if (timing[MAX].times[PURE] > 0.) {
+        if (Bmark->RUN_MODES[0].type != ParallelTransferMsgRate)
+            throughput = (Bmark->scale_bw * SCALE * MEGA) * size / timing[MAX].times[PURE];
+#ifndef MPIIO
+        else {
+            peers = c_info->num_procs / 2;
+            msgrate = (Bmark->scale_bw * SCALE * MAX_WIN_SIZE * peers) / timing[MAX].times[PURE];
+            throughput = MEGA * msgrate * size;
+        }
+#endif
+    }
+
+    if (c_info->group_mode > 0) {
+        IMB_edit_format(1, 0);
+        sprintf(aux_string, format, group);
+        offset = strlen(aux_string);
+    }
+
+    if (Bmark->sample_failure) {
+        IMB_edit_format(1, 0);
+        sprintf(aux_string + offset, format, size);
+        offset = strlen(aux_string);
+
+        switch (Bmark->sample_failure) {
+            case SAMPLE_FAILED_MEMORY:
+                sprintf(aux_string + offset,
+                        " out-of-mem.; needed X=%8.3f GB; use flag \"-mem X\" or MAX_MEM_USAGE>=X (IMB_mem_info.h)",
+                        (1000. * c_info->used_mem + 1.) / 1000.);
+                break;
+
+            case SAMPLE_FAILED_INT_OVERFLOW:
+                sprintf(aux_string + offset, " int-overflow.; The production rank*size caused int overflow for given sample");
+                break;
+            case SAMPLE_FAILED_TIME_OUT:
+                aux_string[offset] = '\0';
+                fprintf(unit, "%s%s", aux_string, " time-out.; Time limit (secs_per_sample * msg_sizes_list_len) is over; use \"-time X\" or SECS_PER_SAMPLE=X (IMB_settings.h) to increase time limit.");
+                aux_string[0] = '\0';
+                break;
+        } /*switch*/
+    } else {
+        switch (out_format) {
+            case OUT_TIME_AND_BW:
+                IMB_edit_format(2, 2);
+                sprintf(aux_string + offset, format, size, n_sample, timing[MAX].times[PURE], throughput);
+                break;
+            case OUT_BW_AND_MSG_RATE:
+                IMB_edit_format(2, 1);
+                offset += sprintf(aux_string + offset, format, size, n_sample, throughput);
+                sprintf(&(format[0]), "%%%d.0f", ow_format);
+                sprintf(aux_string + offset, format, msgrate);
+                break;
+            case OUT_TIME_RANGE_AND_BW:
+                IMB_edit_format(2, 4);
+                sprintf(aux_string + offset, format, size, n_sample, timing[MIN].times[PURE], timing[MAX].times[PURE], timing[AVG].times[PURE], throughput);
+                break;
+            case OUT_TIME_RANGE:
+                IMB_edit_format(2, 3);
+                sprintf(aux_string + offset, format, size, n_sample, timing[MIN].times[PURE], timing[MAX].times[PURE], timing[AVG].times[PURE]);
+                break;
+            case OUT_SYNC:
+#ifdef NBC
+                if (Bmark->RUN_MODES[0].NONBLOCKING && !strstr(Bmark->name, "_pure")) {
+                    IMB_edit_format(1, 4);
+                    sprintf(aux_string + offset, format, n_sample, t_ovrlp, t_pure, t_comp, overlap);
+                } else
+#endif // NBC
+                {
+                    IMB_edit_format(1, 3);
+                    sprintf(aux_string + offset, format, n_sample, timing[MIN].times[PURE], timing[MAX].times[PURE], timing[AVG].times[PURE]);
+                }
+                break;
+            case OUT_OVERLAP:
+#ifdef RMA
+                IMB_edit_format(2, 2);
+                sprintf(aux_string + offset, format, size, n_sample, t_pure, t_ovrlp);
+#else            
+                IMB_edit_format(2, 4);
+                sprintf(aux_string + offset, format, size, n_sample, t_ovrlp, t_pure, t_comp, overlap);
+#endif            
+                break;
+        }
+
+#ifdef CHECK 
+        if (out_format != OUT_SYNC  && strcmp(Bmark->name, "Window")) {
+            IMB_edit_format(0, 1);
+            offset = strlen(aux_string);
+            sprintf(aux_string + offset, format, defect);
+
+            if (defect > TOL) {
+                Bmark->success = 0;
+            }
+        }
+#endif
+
+    } /*if( Bmark->sample_failure )*/
+
+    fprintf(unit, "%s\n", aux_string);
+    fflush(unit);
+}
+
+void IMB_calculate_times(int ntimes,
+                         struct comm_info* c_info,
+                         int group_id,
+                         double* tlist,
+                         Timing* timing
+#ifdef CHECK
+                         , double* defect
+#endif
+                         ) {
+    int offset = 0;
+    int nproc = 0;
+    int rank = 0;
+    int times_count = 0;
+    int i = 0;
+    Time_Id time_id = PURE;
+
+    const int is_group_mode = c_info->group_mode > 0 ? 1 : 0;
+
+
+    int ncount = is_group_mode
+                 ? group_id
+                 : c_info->n_groups;
+
+#ifdef CHECK
+    *defect = 0;
+#endif
+
+
+    for (i = 0; i < ncount; i++) {
+        nproc += c_info->g_sizes[i];
+    }
+
+    ncount = is_group_mode
+             ? c_info->g_sizes[group_id]
+             : nproc;
+
+    for (time_id = PURE; time_id < ntimes; time_id++) {
+        times_count = 0;
+        timing[MIN].times[time_id] = DBL_MAX;
+
+        for (i = 0; i < ncount; i++) {
+            rank = is_group_mode
+                   ? (nproc + i) * ntimes
+                   : c_info->g_ranks[i] * ntimes;
+
+            offset = rank + time_id;
+            if (tlist[offset] < 0.) {
+                continue;
+            }
+            times_count++;
+
+            if (tlist[offset] < timing[MIN].times[time_id]) {
+                timing[MIN].times[time_id] = tlist[offset];
+                timing[MIN].offset[time_id] = rank;
+            }
+
+            if ((tlist[offset] > timing[MAX].times[time_id])) {
+                timing[MAX].times[time_id] = tlist[offset];
+                timing[MAX].offset[time_id] = rank;
+            }
+
+            timing[AVG].times[time_id] += tlist[offset];
+#ifdef CHECK
+            {
+                const size_t check_index = is_group_mode
+                    ? c_info->g_ranks[nproc + i]
+                    : c_info->g_ranks[i];
+                *defect = max(*defect, all_defect[check_index]);
+            }
+#endif 
+        }
+        // fixed 'times_count may be 0' issue
+        if (times_count != 0)
+            timing[AVG].times[time_id] /= times_count;
+        else
+            timing[AVG].times[time_id] = 0;
+    }
+}
+
+
+/************************************************************************/
+/* IMB 3.1 << */
+// Re-display calling sequence
+void IMB_show_selections(struct comm_info* c_info, struct Bench* BList, int *argc, char ***argv) {
+/* >> IMB 3.1  */
+/*
+
+       Displays on stdout an overview of the user selections
+
+Input variables:
+
+-c_info               (type struct comm_info*)
+                      Collection of all base data for MPI;
+                      see [1] for more information
+
+-BList                (type struct Bench*)
+                      (For explanation of struct Bench type:
+                      describes all aspects of modes of a benchmark;
+                      see [1] for more information)
+
+                      The requested list of benchmarks
+
+-argc                 (type int *)
+                      Number of command line arguments
+
+-argv                 (type char ***)
+                      List of command line arguments
+
+*/
+    int iarg = 0;
+    int i;
+
+    if (c_info->w_rank == 0) {
+        IMB_general_info();
+        /* IMB 3.1 << */
+
+        // repeat calling sequence
+        fprintf(unit, "\n\n# Calling sequence was: \n\n#");
+
+        while (iarg < *argc) {
+            if (iarg > 0 && iarg % 6 == 0){
+                if ((*argv)[iarg][0] == '-' && iarg + 1 < *argc){
+                    fprintf(unit, " %s %s\n#", (*argv)[iarg], (*argv)[iarg + 1]);
+                    iarg++;
+                } else
+                    fprintf(unit, " %s\n#", (*argv)[iarg]);
+
+                for (i = 0; i <= strlen((*argv)[0]); i++)
+                    fprintf(unit, " ");
+            } else
+                fprintf(unit, " %s", (*argv)[iarg]);
+
+            iarg++;
+        } /*while( iarg<*argc )*/
+
+        fprintf(unit, "\n\n");
+#ifndef MPIIO
+        if (c_info->n_lens > 0) {
+            fprintf(unit, "# Message lengths were user defined\n");
+        } else {
+            /* >> IMB 3.1  */
+            fprintf(unit, "# Minimum message length in bytes:   %d\n", 0);
+            fprintf(unit, "# Maximum message length in bytes:   %d\n", 1 << c_info->max_msg_log);
+        }
+
+        fprintf(unit, "#\n");
+        fprintf(unit, "# MPI_Datatype                   :   MPI_BYTE \n");
+        fprintf(unit, "# MPI_Datatype for reductions    :   MPI_FLOAT\n");
+        fprintf(unit, "# MPI_Op                         :   MPI_SUM  \n");
+#else
+        if (c_info->n_lens > 0) {
+            fprintf(unit, "# IO lengths were user defined\n");
+        } else {
+            fprintf(unit, "# Minimum io portion in bytes:   %d\n", 0);
+            fprintf(unit, "# Maximum io portion in bytes:   %d\n", 1 << c_info->max_msg_log);
+        }
+        fprintf(unit, "#\n");
+        IMB_print_info();
+#endif
+        fprintf(unit, "#\n");
+
+#ifdef IMB_OPTIONAL
+        fprintf(unit, "#\n\n");
+        fprintf(unit, "# !! Attention: results have been achieved in\n");
+        fprintf(unit, "# !! IMB_OPTIONAL mode.\n");
+        fprintf(unit, "# !! Results may differ from standard case.\n");
+        fprintf(unit, "#\n");
+#endif
+
+        fprintf(unit, "#\n");
+
+        IMB_print_blist(c_info, BList);
+
+        if (do_nonblocking) {
+            fprintf(unit, "\n\n# For nonblocking benchmarks:\n\n");
+            fprintf(unit, "# Function CPU_Exploit obtains an undisturbed\n");
+            fprintf(unit, "# performance of %7.2f MFlops\n", MFlops);
+        }
+
+    } /*if(c_info->w_rank == 0 )*/
+
+} /* end of IMB_show_selections*/
+
+/****************************************************************************/
+void IMB_show_procids(struct comm_info* c_info) {
+    /*
+
+       Prints to stdout the process ids (of group eventually)
+
+       Input variables:
+
+       -c_info               (type struct comm_info*)
+       Collection of all base data for MPI;
+       see [1] for more information
+
+
+
+       */
+    int ip, py, i, k, idle;
+
+    if (c_info->w_rank == 0) {
+        if (c_info->n_groups == 1) {
+            if (c_info->px > 1 && c_info->py > 1) {
+                fprintf(unit, "\n# #processes = %d; rank order (rowwise): \n",
+                              c_info->num_procs);
+                ip = 0;
+
+                for (i = 0; i < c_info->px && ip < c_info->NP; i++) {
+                    py = c_info->w_num_procs / c_info->px;
+                    if (i < c_info->w_num_procs%c_info->px) py++;
+                    py = min(py, c_info->NP - ip);
+                    IMB_print_array(c_info->g_ranks + ip, 1, 0, py, "", unit);
+                    fprintf(unit, "\n");
+                    ip = ip + py;
+                }
+            } else
+                fprintf(unit, "\n# #processes = %d \n", c_info->num_procs);
+
+            idle = c_info->w_num_procs - c_info->num_procs;
+        } /*if(c_info->n_groups == 1)*/
+
+        if (c_info->n_groups != 1) {
+            fprintf(unit, "\n# ( %d groups of %d processes each running simultaneous ) \n",
+                          c_info->n_groups, c_info->num_procs);
+
+            IMB_print_array(c_info->g_ranks, c_info->n_groups, 0,
+                            c_info->g_sizes[0], "Group ", unit);
+
+            idle = c_info->w_num_procs - c_info->n_groups*c_info->g_sizes[0];
+        }
+
+        if (idle) {
+            if (idle == 1)
+                fprintf(unit, "# ( %d additional process waiting in MPI_Barrier)\n", idle);
+            else
+                fprintf(unit, "# ( %d additional processes waiting in MPI_Barrier)\n", idle);
+        }
+    } /*if( c_info->w_rank == 0 )*/
+
+} /* end of IMB_show_procids*/
+
+
+/****************************************************************************/
+void IMB_print_array(int* Array, int N, int disp_N,
+                     int M, char* txt, FILE* unit) {
+/*
+
+       Formattedly prints to stdout a M by N int array
+
+Input variables:
+
+-Array                (type int*)
+                      Array to be printed
+
+
+-N                    (type int)
+                      Number of rows to be printed
+
+-disp_N               (type int)
+                      Displacement in Array where frist row begins
+
+-M                    (type int)
+                      Number of columns
+
+-txt                  (type char*)
+                      Accompanying text
+
+-unit                 (type FILE*)
+                      Output unit
+
+*/
+#define MAX_SHOW 1024
+    int i, j;
+
+    char* outtxt;
+    int do_out;
+
+    do_out = 0;
+
+    if (txt)
+        if (strcmp(txt, "")) {
+            outtxt = (char*)IMB_v_alloc((strlen(txt) + 6)*sizeof(char), " IMB_print_array ");
+            do_out = 1;
+        }
+
+    if (N <= 1) {
+        if (M > MAX_SHOW) {
+            fprintf(unit, "#  ");
+            IMB_print_int_row(unit, Array, MAX_SHOW / 2);
+            fprintf(unit, " ... ");
+            IMB_print_int_row(unit, &Array[M - MAX_SHOW / 2], MAX_SHOW / 2);
+        } else {
+            if (do_out) fprintf(unit, "# %s", txt);
+            else         fprintf(unit, "# ");
+            IMB_print_int_row(unit, Array, M);
+        }
+    } else if (N <= MAX_SHOW) {
+        int zero = 0, one = 1;
+        for (i = 0; i < N; i++) {
+            if (do_out)
+                sprintf(outtxt, "%s %d: ", txt, disp_N + i);
+            else    outtxt = (char*)NULL;
+            IMB_print_array(&Array[i*M], one, zero, M, outtxt, unit);
+
+            fprintf(unit, "\n");
+        }
+    } else { /*for( i=0...*/
+        int disp;
+
+        disp = 0;
+        IMB_print_array(Array, MAX_SHOW / 2, disp, M, txt, unit);
+        fprintf(unit, "#  . \n");
+        fprintf(unit, "#  . \n");
+        disp = N - MAX_SHOW / 2;
+        IMB_print_array(&Array[(N - MAX_SHOW / 2)*M], MAX_SHOW / 2, disp, M, txt, unit);
+    }
+
+    if (do_out) {
+        IMB_v_free((void**)&outtxt);
+    }
+}
+
+/****************************************************************************/
+void IMB_print_int_row(FILE* unit, int* Array, int M) {
+/*
+
+       Formattedly prints to stdout a row of int numbers
+
+Input variables:
+
+-unit                 (type FILE*)
+                      Output unit
+
+-Array                (type int*)
+                      Data to be printed
+
+-M                    (type int)
+                      Number of data
+
+*/
+#define X_PER_ROW 16
+    int i, j, i0, irest;
+
+    irest = M%X_PER_ROW;
+    for (j = 0; j < (M + X_PER_ROW - 1) / X_PER_ROW; j++) {
+        i0 = j*X_PER_ROW;
+
+        for (i = 0; i < min(M - i0, X_PER_ROW); i++)
+            fprintf(unit, " %4d", Array[i0 + i]);
+        fprintf(unit, "\n# ");
+    }
+}
+
+#ifdef MPIIO
+/****************************************************************************/
+void IMB_print_info() {
+/*
+
+       Prints MPI_Info selections (MPI-2 only)
+
+*/
+    int nkeys, ikey, vlen, exists;
+    MPI_Info tmp_info;
+    char key[MPI_MAX_INFO_KEY], *value;
+
+    IMB_user_set_info(&tmp_info);
+
+    /* July 2002 fix V2.2.1: handle NULL case */
+    if (tmp_info != MPI_INFO_NULL) {
+        /* end change */
+
+        MPI_Info_get_nkeys(tmp_info, &nkeys);
+
+        if (nkeys > 0) fprintf(unit, "# Got %d Info-keys:\n\n", nkeys);
+
+        for (ikey = 0; ikey < nkeys; ikey++) {
+            MPI_Info_get_nthkey(tmp_info, ikey, key);
+
+            MPI_Info_get_valuelen(tmp_info, key, &vlen, &exists);
+
+            value = (char*)IMB_v_alloc((vlen + 1)* sizeof(char), "Print_Info");
+
+            MPI_Info_get(tmp_info, key, vlen, value, &exists);
+            printf("# %s = \"%s\"\n", key, value);
+
+            IMB_v_free((void**)&value);
+        }
+
+        MPI_Info_free(&tmp_info);
+
+        /* July 2002 fix V2.2.1: end if */
+    }
+    /* end change */
+}
+#endif
+
+
+
+/*****************************************************************/
+void IMB_print_header(int out_format, struct Bench* bmark,
+                      struct comm_info* c_info, MODES bench_mode) {
+
+    int line_len = 0;
+    char* help;
+    char* token;
+
+    fprintf(unit, "\n");            /* FOR GNUPLOT: CURVE SEPERATOR  */
+
+    if (c_info->group_mode > 0) {
+        /* several groups output*/
+        strcpy(aux_string, "&Group");
+        line_len = 1;
+    } else {
+        strcpy(aux_string, "");
+        line_len = 0;
+    }
+
+
+    switch (out_format) {
+        case OUT_TIME_AND_BW:
+            line_len += 4;
+            strcat(aux_string, "&#bytes&#repetitions&t[usec]&Mbytes/sec&");
+            break;
+
+        case OUT_BW_AND_MSG_RATE:
+            line_len += 4;
+            strcat(aux_string, "&#bytes&#repetitions&Mbytes/sec&Msg/sec&");
+            break;
+
+        case OUT_TIME_RANGE_AND_BW:
+            line_len += 6;
+            strcat(aux_string,
+                   "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&Mbytes/sec&");
+            break;
+
+        case OUT_TIME_RANGE:
+            line_len += 5;
+            strcat(aux_string, "&#bytes&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
+            break;
+
+        case OUT_SYNC:
+            if (bmark->RUN_MODES[0].NONBLOCKING && !strstr(bmark->name, "_pure")) {
+                line_len += 5;
+                strcat(aux_string,
+                       "&#repetitions&t_ovrl[usec]&t_pure[usec]&t_CPU[usec]& overlap[%]&");
+            } else {
+                line_len += 4;
+                strcat(aux_string,
+                       "&#repetitions&t_min[usec]&t_max[usec]&t_avg[usec]&");
+            }
+            break;
+
+        case OUT_OVERLAP:
+#ifdef RMA
+            line_len += 4;
+            strcat(aux_string,
+                   "&#bytes&#repetitions&t_pure[usec]&t_ovrl[usec]&");
+#else        
+            line_len += 6;
+            strcat(aux_string,
+                "&#bytes&#repetitions&t_ovrl[usec]&t_pure[usec]&t_CPU[usec]& overlap[%]&");
+#endif        
+            break;
+    }
+
+#ifdef CHECK
+    if (bmark->RUN_MODES[0].type != Sync && strcmp(bmark->name, "Window")) {
+        line_len += 1;
+        strcat(aux_string, "&defects&");
+    }
+#endif
+
+    IMB_make_line(line_len);
+
+    if (c_info->n_groups > 1) {
+        fprintf(unit, "# Benchmarking Multi-%s ", bmark->name);
+    } else {
+        fprintf(unit, "# Benchmarking %s ", bmark->name);
+    }
+
+    IMB_show_procids(c_info);
+
+    IMB_make_line(line_len);
+
+    switch (bench_mode->AGGREGATE) {
+        case 1:
+            fprintf(unit, "#\n#    MODE: AGGREGATE \n#\n");
+            break;
+
+        case 0:
+            fprintf(unit, "#\n#    MODE: NON-AGGREGATE \n#\n");
+            break;
+    }
+
+    help = aux_string;
+    while (token = strtok(help, "&")) {
+        sprintf(format, "%%%ds", ow_format);
+        fprintf(unit, format, token);
+        help = NULL;
+    }
+    fprintf(unit, "\n");
+
+    return;
+}
+
+
+/*****************************************************************/
+void IMB_edit_format(int n_ints, int n_floats) {
+/*
+
+       Edits format string for output
+
+       In/out variables:
+
+       -n_ints               (type int)
+# of int items to be printed
+
+
+        -n_floats             (type int)
+# of float items to be printed
+
+*/
+    int ip, i;
+
+    ip = 0;
+
+    for (i = 1; i <= n_ints; i++) {
+        sprintf(&(format[ip]), "%%%dd", ow_format);
+        ip = strlen(format);
+    }
+
+    for (i = 1; i <= n_floats; i++) {
+        sprintf(&(format[ip]), "%%%d.2f", ow_format);
+        ip = strlen(format);
+    }
+}
+
+/***************************************************************************/
+void IMB_make_line(int line_len) {
+/*
+
+       Prints an underline
+
+Input variables:
+
+-line_len               (type int)
+                        Length of underline
+
+*/
+    int i;
+    char* char_line = "-";
+    fprintf(unit, "#");
+
+    for (i = 1; i < line_len*ow_format; i++)
+        fprintf(unit, "%s", char_line);
+    fprintf(unit, "\n");
+}
+
+/***************************************************************************/
+/* New function for IMB_3.0 */
+void IMB_help()
+{
+
+#ifdef MPI1
+    const char *progname = "IMB-MPI1";
+#elif defined(EXT)
+    const char *progname = "IMB-EXT";
+#elif defined (MPIIO)
+    const char *progname = "IMB-IO";
+#elif defined (RMA)
+    const char *progname = "IMB-RMA";
+#elif defined (NBC)
+    const char *progname = "IMB-NBC";
+#endif
+
+    fflush(stderr);
+    fflush(unit);
+
+    fprintf(unit, "\nCalling sequence (command line will be repeated in Output table!):\n\n");
+
+    fprintf(unit, "\n%s         [-h{elp}]\n", progname);
+
+    /* IMB 3.1 << */
+    /* Update calling sequence */
+    /* >> IMB 3.1  */
+    fprintf(unit,
+        "[-npmin        <NPmin>]\n"
+        "[-multi        <outflag>]\n"
+        "[-off_cache    <cache_size[,cache_line_size]>\n"
+        "[-iter         <msgspersample[,overall_vol[,msgs_nonaggr]]>\n"
+        "[-iter_policy  <iter_policy>]\n"
+        "[-time         <max_runtime per sample>]\n"
+        "[-mem          <max. per process memory for overall message buffers>]\n"
+        "[-msglen       <Lengths_file>]\n"
+        "[-map          <PxQ>]\n"
+        "[-input        <filename>]\n"
+        "[benchmark1    [benchmark2 [...]]]\n"
+        "[-include      [benchmark1,[benchmark2,[...]]]\n"
+        "[-exclude      [benchmark1,[benchmark2,[...]]]\n"
+        "[-msglog       <[min_msglog]:max_msglog>]\n"
+#if (defined MPI1 || defined NBC)
+        "[-root_shift   <on or off>]\n"
+        "[-sync         <on or off>]\n"
+#endif            
+        "[-imb_barrier  <on or off>]\n"
+        "\n"
+        "where \n"
+        "\n"
+        "- h ( or help) just provides basic help \n"
+        "(if active, all other arguments are ignored)\n"
+        "\n"
+        "- npmin\n\n"
+        "the argumaent after npmin is NPmin, \n"
+        "the minimum number of processes to run on\n"
+        "(then if IMB is started on NP processes, the process numbers \n"
+        "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
+        ">>>\n"
+        "to run on just NP processes, run IMB on NP and select -npmin NP\n"
+        "<<<\n"
+        "default: \n"
+        "NPmin=2\n"
+        "\n");
+#ifdef MPIIO
+    fprintf(unit,
+        "-off_cache \n\n"
+        "no effect for IMB-IO (only IMB-MPI1, IMB-EXT) \n"
+        "\n");
+#else
+    fprintf(unit,
+        "- off_cache \n"
+        "\n"
+        "the argument after off_cache can be either 1 single number (cache_size),  \n"
+        "or 2 comma separated numbers (cache_size,cache_line_size), or just -1 \n"
+        "\n"
+        "By default, without this flag, the communications buffer is  \n"
+        "the same within all repetitions of one message size sample;   \n"
+        "most likely, cache reusage is yielded and thus throughput results   \n"
+        "that might be non realistic.    \n"
+        "\n"
+        "With -off_cache, it is attempted to avoid cache reusage.    \n"
+        "cache_size is a float for an upper bound of the size of the last level cache in MBytes \n"
+        "cache_line_size is assumed to be the size (Bytes) of a last level cache line  \n"
+        "(can be an upper estimate).  \n"
+        "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );  \n"
+        "when repetitively using messages of a particular size, their addresses are advanced within those  \n"
+        "buffers so that a single message is at least 2 cache lines after the end of the previous message.  \n"
+        "Only when those buffers have been marched through (eventually), they will re-used from the beginning.  \n"
+        "\n"
+        "A cache_size and a cache_line_size are assumed as statically defined    \n"
+        "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered  \n"
+        "\n"
+        "remark: -off_cache is effective for IMB-MPI1, IMB-EXT, but not IMB-IO  \n"
+        "\n"
+        "examples  \n"
+        "-off_cache -1 (use defaults of IMB_mem_info.h);  \n"
+        "-off_cache 2.5 (2.5 MB last level cache, default line size);  \n"
+        "-off_cache 16,128 (16 MB last level cache, line size 128);  \n"
+        "\n"
+        "NOTE: the off_cache mode might also be influenced by eventual internal  \n"
+        "caching with the MPI library. This could make the interpretation \n"
+        "intricate.  \n"
+        "\n"
+        "default: \n"
+        "no cache control, data likely to come out of cache most of the time \n"
+        "\n");
+#endif
+    fprintf(unit,
+        "- iter \n\n"
+        "the argument after -iter can contain from 1 to 3 comma separated values\n"
+        "3 integer numbers override the defaults \n"
+        "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of =>IMB_settings.h\n"
+        "examples \n"
+        "-iter 2000        (override MSGSPERSAMPLE by value 2000) \n"
+        "-iter 1000,100    (override OVERALL_VOL by 100) \n"
+        "-iter 1000,40,150 (override MSGS_NONAGGR by 150) \n"
+        "\n"
+        "\n"
+        "default: \n"
+        "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR => IMB_settings.h \n"
+        "\n"
+        "- iter_policy\n"
+        "\n"
+        "the argument after -iter_policy is a one from possible strings,\n"
+        "specifying that policy will be used for auto iteration control:\n"
+        "dynamic,multiple_np,auto,off\n"
+        "example \n"
+        "-iter_policy auto\n"
+        "default:\n"
+        "iteration control through parameter ITER_POLICY => IMB_settings.h \n"
+        "\n"
+        "NOTE: !! New in versions from IMB 3.2 on !!  \n"
+        "the iter selection is overridden by a dynamic selection that is a new default in \n"
+        "IMB 3.2: when a maximum run time (per sample) is expected to be exceeded, the \n"
+        "iteration number will be cut down; see -time flag  \n"
+        "\n"
+        "- time\n"
+        "\n"
+        "the argument after -time is a float, specifying that \n"
+        "a benchmark will run at most that many seconds per message size \n"
+        "the combination with the -iter flag or its defaults is so that always \n"
+        "the maximum number of repetitions is chosen that fulfills all restrictions \n"
+        "example \n"
+        "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, iff\n"
+        "the default (or -iter selected) number of repetitions would take longer than that) \n"
+        "\n"
+        "remark: per sample, the rough number of repetitions to fulfill the -time request \n"
+        "is estimated in preparatory runs that use ~ 1 second overhead \n"
+        "\n"
+        "default: \n"
+        "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10  \n"
+        "(new default in IMB_3.2) \n"
+        "\n"
+        "- mem\n\n"
+        "the argument after -mem is a float, specifying that \n"
+        "at most that many GBytes are allocated per process for the message buffers \n"
+        "if the size is exceeded, a warning will be output, stating how much memory \n"
+        "would have been necessary, but the overall run is not interrupted \n"
+        "example \n"
+        "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process) \n"
+        "\n"
+        "default: \n"
+        "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h \n"
+        "\n"
+        "- map\n\n"
+        "the argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
+        "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
+        "the basic communicator is set up as P by Q process grid\n"
+        "\n"
+        "if, e.g., one runs on N nodes of X processors each, and inserts\n"
+        "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
+        "running PingPong with P=X, Q=2 would measure inter-node performance\n"
+        "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
+        "first priority) \n"
+        "\n"
+        "default: \n"
+        "Q=1\n"
+        "\n"
+        "- multi\n\n"
+        "the argument after -multi is outflag (0 or 1)\n"
+        "\n"
+        "if -multi is selected, running the N process version of a benchmark\n"
+        "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
+        "\n"
+        "outflag only controls default (0) or extensive (1) output charts.\n"
+        "0: only lowest performance groups is output\n"
+        "1: all groups are output\n"
+        "\n"
+        "default: \n"
+        "multi off\n"
+        "\n"
+        "- msglen\n\n"
+        "the argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
+        "message lengths, 1 per line\n"
+        "\n"
+        "default: \n"
+        "no lengths_file, lengths defined by settings.h, settings_io.h\n"
+        "\n"
+        "- input\n\n"
+        "the argument after -input is a filename is any text file containing, line by line, benchmark names\n"
+        "facilitates running particular benchmarks as compared to using the\n"
+        "command line.\n"
+        "default: \n"
+        "no input file exists\n"
+        "\n"
+        "- include\n\n"
+        "the argument after -include  is one or more benchmark names separated by comma\n"
+        "\n"
+        "- exclude\n\n"
+        "the argument after -exclude  is one or more benchmark names separated by comma\n"
+        "\n"
+        "\n"
+        "-msglog\n\n"
+        "the argument after -msglog min:max, min and max are positive integer numbers, min<max\n"
+        "where min is power of 2 so that second smallest data transfer size is max(unit,2^min)\n"
+        "(the smallest always being 0), where unit = sizeof(float) for reductions, unit = 1 else\n"
+        "max is power of 2 so that 2^max is largest messages size, max must be less than 31"
+        "\n\n"
+#if (defined MPI1 || defined NBC)
+        "-root_shift\n\n"
+        "controls root change at each iteration step for certain collective benchmarks,\n"
+        "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+        "default:\n"
+        "off\n"
+        "\n"
+        "-sync\n\n"
+        "controls whether all processes are syncronized at each iteration step in collective benchmarks,\n"
+        "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+        "default:\n"
+        "on\n"
+        "\n"
+        "\n"
+#endif            
+        "-imb_barrier\n\n"
+        "use internal MPI-independent barrier syncronization implementation,\n"
+        "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+        "default:\n"
+        "off\n"
+        "\n"
+        "- benchmarkX is (in arbitrary lower/upper case spelling)\n"
+        "\n");
+#ifdef MPI1
+    fprintf(unit,
+        "PingPongSpecificSource\n"
+        "PingPongAnySource\n"
+        "PingPingSpecificSource\n"
+        "PingPingAnySource\n"
+        "PingPing\n"
+        "PingPong\n"
+        "Sendrecv\n"
+        "Exchange\n"
+        "Bcast\n"
+        "Allgather\n"
+        "Allgatherv\n"
+        "Gather\n"
+        "Gatherv\n"
+        "Scatter\n"
+        "Scatterv\n"
+        "Alltoall\n"
+        "Alltoallv\n"
+        "Reduce\n"
+        "Reduce_scatter\n"
+        "Allreduce\n"
+        "Barrier\n"
+        "Uniband\n"
+        "Biband\n"
+        "\n");
+
+#elif defined(EXT)
+    fprintf(unit,
+        "Window\n"
+        "Unidir_Put\n"
+        "Unidir_Get\n"
+        "Bidir_Get\n"
+        "Bidir_Put\n"
+        "Accumulate\n"
+        "\n");
+
+#elif defined(MPIIO)
+    fprintf(unit,
+        "S_Write_indv\n"
+        "S_Read_indv\n"
+        "S_Write_expl\n"
+        "S_Read_expl\n"
+        "P_Write_indv\n"
+        "P_Read_indv\n"
+        "P_Write_expl\n"
+        "P_Read_expl\n"
+        "P_Write_shared\n"
+        "P_Read_shared\n"
+        "P_Write_priv\n"
+        "P_Read_priv\n"
+        "C_Write_indv\n"
+        "C_Read_indv\n"
+        "C_Write_expl\n"
+        "C_Read_expl\n"
+        "C_Write_shared\n"
+        "C_Read_shared\n"
+        "\n");
+
+#elif defined(NBC) 
+    fprintf(unit,
+        "Ibcast\n"
+        "Ibcast_pure\n"
+        "Iallgather\n"
+        "Iallgather_pure\n"
+        "Iallgatherv\n"
+        "Iallgatherv_pure\n"
+        "Igather\n"
+        "Igather_pure\n"
+        "Igatherv\n"
+        "Igatherv_pure\n"
+        "Iscatter\n"
+        "Iscatter_pure\n"
+        "Iscatterv\n"
+        "Iscatterv_pure\n"
+        "Ialltoall\n"
+        "Ialltoall_pure\n"
+        "Ialltoallv\n"
+        "Ialltoallv_pure\n"
+        "Ireduce\n"
+        "Ireduce_pure\n"
+        "Ireduce_scatter\n"
+        "Ireduce_scatter_pure\n"
+        "Iallreduce\n"
+        "Iallreduce_pure\n"
+        "Ibarrier\n"
+        "Ibarrier_pure\n"
+        "\n");
+
+#elif defined(RMA)
+    fprintf(unit,
+        "Unidir_put\n"
+        "Unidir_get\n"
+        "Bidir_put\n"
+        "Bidir_get\n"
+        "One_put_all\n"
+        "One_get_all\n"
+        "All_put_all\n"
+        "All_get_all\n"
+        "Put_local\n"
+        "Get_local\n"
+        "Put_all_local\n"
+        "Get_all_local\n"
+        "Exchange_put\n"
+        "Exchange_get\n"
+        "Accumulate\n"
+        "Get_accumulate\n"
+        "Fetch_and_op\n"
+        "Compare_and_swap\n"
+        "Truly_passive_put\n"
+        "\n");
+#endif
+
+}
diff --git a/src/IMB_parse_name_ext.c b/src_c/IMB_parse_name_ext.c
similarity index 56%
rename from src/IMB_parse_name_ext.c
rename to src_c/IMB_parse_name_ext.c
index 2adc8355..208a4aa3 100644
--- a/src/IMB_parse_name_ext.c
+++ b/src_c/IMB_parse_name_ext.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -81,98 +80,78 @@ For more documentation than found here, see
 #include "IMB_prototypes.h"
 
 
-int IMB_get_def_cases(char*** defc, char*** Gcmt)
+int IMB_get_def_cases(char*** defc, char*** Gcmt) {
 /*
 
-                      
-                      Initializes default benchmark names (defc) and accompanying
-                      comments (Gcmt)
-                      
+                          Initializes default benchmark names (defc) and accompanying
+                          comments (Gcmt)
 
+In/out variables:
 
-In/out variables: 
-
--defc                 (type char***)                      
-                      List of benchkark names (strings)
-                      
-
--Gcmt                 (type char***)                      
-                      List of general comments (strings)
-                      
+-defc                     (type char***)
+                          List of benchkark names (strings)
 
+-Gcmt                     (type char***)
+                          List of general comments (strings)
 
 */
-{
     *defc = &DEFC[0];
     *Gcmt = &Gral_cmt[0];
-    return (int) (sizeof(DEFC)/sizeof(char*));
+    return (int)(sizeof(DEFC) / sizeof(char*));
 }
 
-int IMB_get_all_cases(char*** allc)
+int IMB_get_all_cases(char*** allc) {
 /*
 
-                      
                       Initializes default benchmark names (defc) and accompanying
                       comments (Gcmt)
-                      
-
 
-In/out variables: 
+In/out variables:
 
--defc                 (type char***)                      
+-defc                 (type char***)
                       List of benchkark names (strings)
-                      
 
--Gcmt                 (type char***)                      
+-Gcmt                 (type char***)
                       List of general comments (strings)
-                      
-
 
 */
-{
     *allc = &ALLC[0];
-    return (int) (sizeof(ALLC)/sizeof(char*));
+    return (int)(sizeof(ALLC) / sizeof(char*));
 }
 
 
-void IMB_set_bmark(struct Bench* Bmark)
+void IMB_set_bmark(struct Bench* Bmark) {
 /*
 
+In/out variables:
 
-
-In/out variables: 
-
--Bmark                (type struct Bench*)                      
+-Bmark                (type struct Bench*)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
+
                       On input, only the name of the benchmark is set.
                       On output, all necessary run modes are set accordingly
-                      
-
 
 */
-{
     BTYPES type;
 #if 0
     int index;
 
-    IMB_get_def_index(&index,Bmark->name);
+    IMB_get_def_index(&index, Bmark->name);
 
-    if( index < 0 )
-    {
-	Bmark->RUN_MODES[0].type=BTYPE_INVALID;
-	Bmark->RUN_MODES[1].type=BTYPE_INVALID;
-	return;
+    if (index < 0) {
+        Bmark->RUN_MODES[0].type = BTYPE_INVALID;
+        Bmark->RUN_MODES[1].type = BTYPE_INVALID;
+        return;
     }
 #endif
 
-    Bmark->RUN_MODES[0].AGGREGATE=1;
-    Bmark->RUN_MODES[1].AGGREGATE=0;
+    Bmark->RUN_MODES[0].AGGREGATE = 1;
+    Bmark->RUN_MODES[1].AGGREGATE = 0;
 
-    Bmark->RUN_MODES[0].NONBLOCKING =0;
-    Bmark->RUN_MODES[1].NONBLOCKING =0;
+    Bmark->RUN_MODES[0].NONBLOCKING = 0;
+    Bmark->RUN_MODES[1].NONBLOCKING = 0;
 
     Bmark->N_Modes = 2;
 
@@ -180,63 +159,50 @@ In/out variables:
 
     Bmark->Ntimes = 1;
 
-    if (!strcmp(Bmark->name,"unidir_get"))
-    { 
-	strcpy(Bmark->name,"Unidir_Get");
-	Bmark->Benchmark = IMB_unidir_get;
-	Bmark->bench_comments = &Unidir_Get_cmt[0];
-	type = SingleTransfer;
-	Bmark->access = get;
-    }
-    else if (!strcmp(Bmark->name,"unidir_put"))
-    { 
-	strcpy(Bmark->name,"Unidir_Put");
-	Bmark->Benchmark = IMB_unidir_put;
-	Bmark->bench_comments = &Unidir_Put_cmt[0];
-	type = SingleTransfer;
-	Bmark->access = put;
-    }
-    else if (!strcmp(Bmark->name,"bidir_get"))
-    { 
-	strcpy(Bmark->name,"Bidir_Get");
-	Bmark->Benchmark = IMB_bidir_get;
-	Bmark->bench_comments = &Bidir_Get_cmt[0];
-	type = SingleTransfer;
-	Bmark->access = get;
-    }
-    else if (!strcmp(Bmark->name,"bidir_put"))
-    { 
-	strcpy(Bmark->name,"Bidir_Put");
-	Bmark->Benchmark = IMB_bidir_put;
-	Bmark->bench_comments = &Bidir_Put_cmt[0];
-	type = SingleTransfer;
-	Bmark->access = put;
-    }
-    else if (!strcmp(Bmark->name,"accumulate"))
-    { 
-	strcpy(Bmark->name,"Accumulate");
-	Bmark->Benchmark = IMB_accumulate;
-	Bmark->bench_comments = &Accumulate_cmt[0];
-	type = Collective;
-	Bmark->access = put;
-	Bmark->reduction = 1;
-    }
-    else if (!strcmp(Bmark->name,"window"))
-    { 
-	strcpy(Bmark->name,"Window");
-	Bmark->Benchmark = IMB_window;
-	Bmark->bench_comments = &Window_cmt[0];
-	Bmark->RUN_MODES[0].AGGREGATE=-1;
-	Bmark->N_Modes = 1;
-	type = Collective;
-	Bmark->access = no;
-	Bmark->reduction = 1;
-    }
-    else 
-    {
-	type = BTYPE_INVALID;
+    if (!strcmp(Bmark->name, "unidir_get")) {
+        strcpy(Bmark->name, "Unidir_Get");
+        Bmark->Benchmark = IMB_unidir_get;
+        Bmark->bench_comments = &Unidir_Get_cmt[0];
+        type = SingleTransfer;
+        Bmark->access = get;
+    } else if (!strcmp(Bmark->name, "unidir_put")) {
+        strcpy(Bmark->name, "Unidir_Put");
+        Bmark->Benchmark = IMB_unidir_put;
+        Bmark->bench_comments = &Unidir_Put_cmt[0];
+        type = SingleTransfer;
+        Bmark->access = put;
+    } else if (!strcmp(Bmark->name, "bidir_get")) {
+        strcpy(Bmark->name, "Bidir_Get");
+        Bmark->Benchmark = IMB_bidir_get;
+        Bmark->bench_comments = &Bidir_Get_cmt[0];
+        type = SingleTransfer;
+        Bmark->access = get;
+    } else if (!strcmp(Bmark->name, "bidir_put")) {
+        strcpy(Bmark->name, "Bidir_Put");
+        Bmark->Benchmark = IMB_bidir_put;
+        Bmark->bench_comments = &Bidir_Put_cmt[0];
+        type = SingleTransfer;
+        Bmark->access = put;
+    } else if (!strcmp(Bmark->name, "accumulate")) {
+        strcpy(Bmark->name, "Accumulate");
+        Bmark->Benchmark = IMB_accumulate;
+        Bmark->bench_comments = &Accumulate_cmt[0];
+        type = Collective;
+        Bmark->access = put;
+        Bmark->reduction = 1;
+    } else if (!strcmp(Bmark->name, "window")) {
+        strcpy(Bmark->name, "Window");
+        Bmark->Benchmark = IMB_window;
+        Bmark->bench_comments = &Window_cmt[0];
+        Bmark->RUN_MODES[0].AGGREGATE = -1;
+        Bmark->N_Modes = 1;
+        type = Collective;
+        Bmark->access = no;
+        Bmark->reduction = 1;
+    } else {
+        type = BTYPE_INVALID;
     }
-    Bmark->RUN_MODES[0].type=type;
-    Bmark->RUN_MODES[1].type=type;
+    Bmark->RUN_MODES[0].type = type;
+    Bmark->RUN_MODES[1].type = type;
 
 }
diff --git a/src/IMB_parse_name_io.c b/src_c/IMB_parse_name_io.c
similarity index 50%
rename from src/IMB_parse_name_io.c
rename to src_c/IMB_parse_name_io.c
index 4a770fa4..73fc5840 100644
--- a/src/IMB_parse_name_io.c
+++ b/src_c/IMB_parse_name_io.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -80,76 +79,57 @@ For more documentation than found here, see
 
 #include "IMB_prototypes.h"
 
-int IMB_get_def_cases(char*** defc, char*** Gcmt)
+int IMB_get_def_cases(char*** defc, char*** Gcmt) {
 /*
 
-                      
                       Initializes default benchmark names (defc) and accompanying
                       comments (Gcmt)
-                      
 
+In/out variables:
 
-In/out variables: 
-
--defc                 (type char***)                      
+-defc                 (type char***)
                       List of benchkark names (strings)
-                      
 
--Gcmt                 (type char***)                      
+-Gcmt                 (type char***)
                       List of general comments (strings)
-                      
-
 
 */
-{
     *defc = &DEFC[0];
     *Gcmt = &Gral_cmt[0];
-    return (int) (sizeof(DEFC)/sizeof(char*));
+    return (int)(sizeof(DEFC) / sizeof(char*));
 }
 
-int IMB_get_all_cases(char*** allc)
+int IMB_get_all_cases(char*** allc) {
 /*
 
-                      
                       Initializes default benchmark names (defc) and accompanying
                       comments (Gcmt)
-                      
-
 
-In/out variables: 
+In/out variables:
 
--allc                 (type char***)                      
+-allc                 (type char***)
                       List of benchkark names (strings)
-                      
-
-
 
 */
-{
     *allc = &ALLC[0];
-    return (int) (sizeof(ALLC)/sizeof(char*));
+    return (int)(sizeof(ALLC) / sizeof(char*));
 }
 
 
-void IMB_set_bmark(struct Bench* Bmark)
+void IMB_set_bmark(struct Bench* Bmark) {
 /*
 
+In/out variables:
 
-
-In/out variables: 
-
--Bmark                (type struct Bench*)                      
+-Bmark                (type struct Bench*)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
+
                       On input, only the name of the benchmark is set.
                       On output, all necessary run modes are set accordingly
-                      
-
 
 */
-{
     BTYPES type;
     char *tmp_name;
     int md, nam_beg;
@@ -157,171 +137,144 @@ In/out variables:
 #if 0
     int index;
 
-    IMB_get_def_index(&index,Bmark->name);
+    IMB_get_def_index(&index, Bmark->name);
 
-    if( index < 0 )
-    {
-	Bmark->RUN_MODES[0].type=BTYPE_INVALID;
-	Bmark->RUN_MODES[1].type=BTYPE_INVALID;
-	return;
+    if (index < 0) {
+        Bmark->RUN_MODES[0].type = BTYPE_INVALID;
+        Bmark->RUN_MODES[1].type = BTYPE_INVALID;
+        return;
     }
 #endif
 
-    Bmark->RUN_MODES[0].AGGREGATE=1;
-    Bmark->RUN_MODES[1].AGGREGATE=0;
+    Bmark->RUN_MODES[0].AGGREGATE = 1;
+    Bmark->RUN_MODES[1].AGGREGATE = 0;
 
 
-    Bmark->reduction=0;
+    Bmark->reduction = 0;
 
     Bmark->Ntimes = 1;
     md = 0;
     nam_beg = 2;
 
-    if( Bmark->name[0] == 's' )
-    {
-	type = SingleTransfer;
-	Bmark->name[0] = 'S';
-	if( Bmark->name[2] == 'i' )
-	{
-	    md = 1; 
-	    nam_beg++;
-	    Bmark->name[2]='I';
-	}
+    if (Bmark->name[0] == 's') {
+        type = SingleTransfer;
+        Bmark->name[0] = 'S';
+        if (Bmark->name[2] == 'i') {
+            md = 1;
+            nam_beg++;
+            Bmark->name[2] = 'I';
+        }
     }
 
-    if( Bmark->name[0] == 'p' )
-    {
-	type = ParallelTransfer;
-	Bmark->name[0] = 'P';
-	if( Bmark->name[2] == 'i' )
-	{
-	    md = 1;
-	    nam_beg++;
-	    Bmark->name[2]='I';
-	}
+    if (Bmark->name[0] == 'p') {
+        type = ParallelTransfer;
+        Bmark->name[0] = 'P';
+        if (Bmark->name[2] == 'i') {
+            md = 1;
+            nam_beg++;
+            Bmark->name[2] = 'I';
+        }
     }
 
-    if( Bmark->name[0] == 'c' )
-    {
-	type = Collective;
-	Bmark->name[0] = 'C';
-	if( Bmark->name[2] == 'i' )
-	{
-	    md = 1;
-	    nam_beg++;
-	    Bmark->name[2]='I';
-	}
+    if (Bmark->name[0] == 'c') {
+        type = Collective;
+        Bmark->name[0] = 'C';
+        if (Bmark->name[2] == 'i') {
+            md = 1;
+            nam_beg++;
+            Bmark->name[2] = 'I';
+        }
     }
 
-    if( md ) do_nonblocking=1;
+    if (md) do_nonblocking = 1;
     Bmark->Ntimes += md;
 
-    Bmark->RUN_MODES[0].NONBLOCKING =md;
-    Bmark->RUN_MODES[1].NONBLOCKING =md;
-
-    tmp_name=Bmark->name+nam_beg;
-
-    if (!strcmp(tmp_name,"write_indv"))
-    { 
-	strcpy(tmp_name,"Write_Indv");
-	Bmark->Benchmark = IMB_write_indv;
-	Bmark->bench_comments = &Write_Indv_cmt[0];
-
-	Bmark->access = put;
-	Bmark->fpointer = indv_block;
-    }
-    else if (!strcmp(tmp_name,"write_shared"))
-    { 
-	strcpy(tmp_name,"Write_Shared");
-	Bmark->Benchmark = IMB_write_shared;
-	Bmark->bench_comments = &Write_Shared_cmt[0];
-
-	Bmark->access = put;
-	Bmark->fpointer = shared;
-    }
-    else if (!strcmp(tmp_name,"write_priv"))
-    { 
-	strcpy(tmp_name,"Write_Priv");
-	Bmark->Benchmark = IMB_write_indv;
-	Bmark->bench_comments = &Write_Priv_cmt[0];
-
-	Bmark->access = put;
-	Bmark->fpointer = private;
-    }
-    else if (!strcmp(tmp_name,"write_expl"))
-    { 
-	strcpy(tmp_name,"Write_Expl");
-	Bmark->Benchmark = IMB_write_expl;
-	Bmark->bench_comments = &Write_Expl_cmt[0];
-
-	Bmark->access = put;
-	Bmark->fpointer = explicit;
-    }
-    else if (!strcmp(tmp_name,"read_indv"))
-    { 
-	strcpy(tmp_name,"Read_Indv");
-	Bmark->Benchmark = IMB_read_indv;
-	Bmark->bench_comments = &Read_Indv_cmt[0];
-
-	Bmark->access = get;
-	Bmark->fpointer = indv_block;
-    }
-    else if (!strcmp(tmp_name,"read_shared"))
-    { 
-	strcpy(tmp_name,"Read_Shared");
-	Bmark->Benchmark = IMB_read_shared;
-	Bmark->bench_comments = &Read_Shared_cmt[0];
-
-	Bmark->access = get;
-	Bmark->fpointer = shared;
-    }
-    else if (!strcmp(tmp_name,"read_priv"))
-    { 
-	strcpy(tmp_name,"Read_Priv");
-	Bmark->Benchmark = IMB_read_indv;
-	Bmark->bench_comments = &Read_Priv_cmt[0];
-
-	Bmark->access = get;
-	Bmark->fpointer = private;
-    }
-    else if (!strcmp(tmp_name,"read_expl"))
-    {
-	strcpy(tmp_name,"Read_Expl");
-	Bmark->Benchmark = IMB_read_expl;
-	Bmark->bench_comments = &Read_Expl_cmt[0];
-
-	Bmark->access = get;
-	Bmark->fpointer = explicit;
-    }
-    else if (!strcmp(Bmark->name,"open_close"))
-    {
-	strcpy(Bmark->name,"Open_Close");
-	Bmark->Benchmark = IMB_open_close;
-	Bmark->bench_comments = &Open_cmt[0];
-
-	type = Sync;
-	Bmark->RUN_MODES[0].AGGREGATE=-1;
-
-	Bmark->access = no;
-	Bmark->fpointer = explicit;
-    }
-    else 
-    {
-	type = BTYPE_INVALID;
+    Bmark->RUN_MODES[0].NONBLOCKING = md;
+    Bmark->RUN_MODES[1].NONBLOCKING = md;
+
+    tmp_name = Bmark->name + nam_beg;
+
+    if (!strcmp(tmp_name, "write_indv")) {
+        strcpy(tmp_name, "Write_Indv");
+        Bmark->Benchmark = IMB_write_indv;
+        Bmark->bench_comments = &Write_Indv_cmt[0];
+
+        Bmark->access = put;
+        Bmark->fpointer = indv_block;
+    } else if (!strcmp(tmp_name, "write_shared")) {
+        strcpy(tmp_name, "Write_Shared");
+        Bmark->Benchmark = IMB_write_shared;
+        Bmark->bench_comments = &Write_Shared_cmt[0];
+
+        Bmark->access = put;
+        Bmark->fpointer = shared;
+    } else if (!strcmp(tmp_name, "write_priv")) {
+        strcpy(tmp_name, "Write_Priv");
+        Bmark->Benchmark = IMB_write_indv;
+        Bmark->bench_comments = &Write_Priv_cmt[0];
+
+        Bmark->access = put;
+        Bmark->fpointer = priv;
+    } else if (!strcmp(tmp_name, "write_expl")) {
+        strcpy(tmp_name, "Write_Expl");
+        Bmark->Benchmark = IMB_write_expl;
+        Bmark->bench_comments = &Write_Expl_cmt[0];
+
+        Bmark->access = put;
+        Bmark->fpointer = explic;
+    } else if (!strcmp(tmp_name, "read_indv")) {
+        strcpy(tmp_name, "Read_Indv");
+        Bmark->Benchmark = IMB_read_indv;
+        Bmark->bench_comments = &Read_Indv_cmt[0];
+
+        Bmark->access = get;
+        Bmark->fpointer = indv_block;
+    } else if (!strcmp(tmp_name, "read_shared")) {
+        strcpy(tmp_name, "Read_Shared");
+        Bmark->Benchmark = IMB_read_shared;
+        Bmark->bench_comments = &Read_Shared_cmt[0];
+
+        Bmark->access = get;
+        Bmark->fpointer = shared;
+    } else if (!strcmp(tmp_name, "read_priv")) {
+        strcpy(tmp_name, "Read_Priv");
+        Bmark->Benchmark = IMB_read_indv;
+        Bmark->bench_comments = &Read_Priv_cmt[0];
+
+        Bmark->access = get;
+        Bmark->fpointer = priv;
+    } else if (!strcmp(tmp_name, "read_expl")) {
+        strcpy(tmp_name, "Read_Expl");
+        Bmark->Benchmark = IMB_read_expl;
+        Bmark->bench_comments = &Read_Expl_cmt[0];
+
+        Bmark->access = get;
+        Bmark->fpointer = explic;
+    } else if (!strcmp(Bmark->name, "open_close")) {
+        strcpy(Bmark->name, "Open_Close");
+        Bmark->Benchmark = IMB_open_close;
+        Bmark->bench_comments = &Open_cmt[0];
+
+        type = Sync;
+        Bmark->RUN_MODES[0].AGGREGATE = -1;
+
+        Bmark->access = no;
+        Bmark->fpointer = explic;
+    } else {
+        type = BTYPE_INVALID;
     }
 
-    if( Bmark->access == no ) Bmark->Ntimes = 1;
+    if (Bmark->access == no) Bmark->Ntimes = 1;
 
-    Bmark->RUN_MODES[0].type=type;
-    Bmark->RUN_MODES[1].type=type;
+    Bmark->RUN_MODES[0].type = type;
+    Bmark->RUN_MODES[1].type = type;
 
-    if( type == SingleTransfer ) Bmark->fpointer = private;
+    if (type == SingleTransfer) Bmark->fpointer = priv;
 
-    if( Bmark->access == get || Bmark->access == no  ||
-	md )
-	Bmark->N_Modes = 1;
+    if (Bmark->access == get || Bmark->access == no ||
+        md)
+        Bmark->N_Modes = 1;
     else
-	Bmark->N_Modes = 2;
-
+        Bmark->N_Modes = 2;
 }
 
diff --git a/src_c/IMB_parse_name_mpi1.c b/src_c/IMB_parse_name_mpi1.c
new file mode 100644
index 00000000..f010654f
--- /dev/null
+++ b/src_c/IMB_parse_name_mpi1.c
@@ -0,0 +1,297 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_parse_name_mpi1.c 
+
+ Implemented functions: 
+
+ IMB_get_def_cases;
+ IMB_set_bmark;
+
+ ***************************************************************************/
+
+
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_bnames_mpi1.h"
+#include "IMB_comments.h"
+
+#include "IMB_prototypes.h"
+
+
+int IMB_get_def_cases(char*** defc, char*** Gcmt) {
+/*
+
+                      Initializes default benchmark names (defc) and accompanying
+                      comments (Gcmt)
+
+In/out variables:
+
+-defc                 (type char***)
+                      List of benchkark names (strings)
+
+-Gcmt                 (type char***)
+                      List of general comments (strings)
+
+*/
+    *defc = &DEFC[0];
+    *Gcmt = &Gral_cmt[0];
+    return (int)(sizeof(DEFC) / sizeof(char*));
+}
+
+int IMB_get_all_cases(char*** allc) {
+/*
+
+                      Initializes default benchmark names (defc) and accompanying
+                      comments (Gcmt)
+
+In/out variables:
+
+-defc                 (type char***)
+                      List of benchkark names (strings)
+
+
+-Gcmt                 (type char***)
+                      List of general comments (strings)
+
+*/
+    *allc = &ALLC[0];
+    return (int)(sizeof(ALLC) / sizeof(char*));
+}
+
+
+void IMB_set_bmark(struct Bench* Bmark) {
+/*
+
+In/out variables:
+
+-Bmark                (type struct Bench*)
+                      (For explanation of struct Bench type:
+                      describes all aspects of modes of a benchmark;
+                      see [1] for more information)
+
+                      On input, only the name of the benchmark is set.
+                      On output, all necessary run modes are set accordingly
+
+*/
+    BTYPES 	type;
+
+#if 0
+    int 	index;
+
+    IMB_get_def_index(&index, Bmark->name);
+
+    if (index < 0) {
+        Bmark->RUN_MODES[0].type = BTYPE_INVALID;
+        Bmark->RUN_MODES[1].type = BTYPE_INVALID;
+        return;
+    }
+#endif /* 0 */
+
+    Bmark->N_Modes = 1;
+    Bmark->RUN_MODES[0].AGGREGATE = -1;
+    Bmark->RUN_MODES[0].NONBLOCKING = 0;
+
+    Bmark->reduction = 0;
+    Bmark->Ntimes = 1;
+    Bmark->select_source = 0;
+
+    if (!strcmp(Bmark->name, "pingpong")) {
+        strcpy(Bmark->name, "PingPong");
+        Bmark->Benchmark = IMB_pingpong;
+        Bmark->select_source = 1;
+        Bmark->bench_comments = &PingPong_cmt[0];
+        type = SingleTransfer;
+        Bmark->scale_time = 0.5;
+    } else if (!strcmp(Bmark->name, "pingping")) {
+        strcpy(Bmark->name, "PingPing");
+        Bmark->Benchmark = IMB_pingping;
+        Bmark->select_source = 1;
+        Bmark->bench_comments = &PingPing_cmt[0];
+        type = SingleTransfer;
+    } else if (!strcmp(Bmark->name, "pingponganysource")) {
+        strcpy(Bmark->name, "PingPongAnySource");
+        Bmark->Benchmark = IMB_pingpong;
+        Bmark->bench_comments = &PingPong_cmt[0];
+        type = SingleTransfer;
+        Bmark->scale_time = 0.5;
+    } else if (!strcmp(Bmark->name, "pingpongspecificsource")) {
+        strcpy(Bmark->name, "PingPongSpecificSource");
+        Bmark->Benchmark = IMB_pingpong;
+        Bmark->select_source = 1;
+        Bmark->bench_comments = &PingPong_cmt[0];
+        type = SingleTransfer;
+        Bmark->scale_time = 0.5;
+    } else if (!strcmp(Bmark->name, "pingpinganysource")) {
+        strcpy(Bmark->name, "PingPingAnySource");
+        Bmark->Benchmark = IMB_pingping;
+        Bmark->bench_comments = &PingPing_cmt[0];
+        type = SingleTransfer;
+    } else if (!strcmp(Bmark->name, "pingpingspecificsource")) {
+        strcpy(Bmark->name, "PingPingSpecificSource");
+        Bmark->Benchmark = IMB_pingping;
+        Bmark->select_source = 1;
+        Bmark->bench_comments = &PingPing_cmt[0];
+        type = SingleTransfer;
+    } else if (!strcmp(Bmark->name, "sendrecv")) {
+        strcpy(Bmark->name, "Sendrecv");
+        Bmark->Benchmark = IMB_sendrecv;
+        Bmark->bench_comments = &Sendrecv_cmt[0];
+        type = ParallelTransfer;
+        Bmark->scale_time = 1.0;
+        Bmark->scale_bw = 2.0;
+    } else if (!strcmp(Bmark->name, "exchange")) {
+        strcpy(Bmark->name, "Exchange");
+        Bmark->Benchmark = IMB_exchange;
+        Bmark->bench_comments = &Exchange_cmt[0];
+        type = ParallelTransfer;
+        Bmark->scale_time = 1.0;
+        Bmark->scale_bw = 4.0;
+    } else if (!strcmp(Bmark->name, "allreduce")) {
+        strcpy(Bmark->name, "Allreduce");
+        Bmark->Benchmark = IMB_allreduce;
+        type = Collective;
+        Bmark->bench_comments = &Allreduce_cmt[0];
+        Bmark->reduction = 1;
+    } else if (!strcmp(Bmark->name, "reduce")) {
+        strcpy(Bmark->name, "Reduce");
+        Bmark->Benchmark = IMB_reduce;
+        type = Collective;
+        Bmark->bench_comments = &Reduce_cmt[0];
+        Bmark->reduction = 1;
+    } else if (!strcmp(Bmark->name, "reduce_scatter")) {
+        strcpy(Bmark->name, "Reduce_scatter");
+        Bmark->Benchmark = IMB_reduce_scatter;
+        type = Collective;
+        Bmark->bench_comments = &Reduce_scatter_cmt[0];
+        Bmark->reduction = 1;
+    } else if (!strcmp(Bmark->name, "bcast")) {
+        strcpy(Bmark->name, "Bcast");
+        Bmark->Benchmark = IMB_bcast;
+        type = Collective;
+        Bmark->bench_comments = &Bcast_cmt[0];
+    } else if (!strcmp(Bmark->name, "barrier")) {
+        strcpy(Bmark->name, "Barrier");
+        Bmark->Benchmark = IMB_barrier;
+        type = Sync;
+        Bmark->bench_comments = &Barrier_cmt[0];
+    } else if (!strcmp(Bmark->name, "allgather")) {
+        strcpy(Bmark->name, "Allgather");
+        Bmark->Benchmark = IMB_allgather;
+        type = Collective;
+        Bmark->bench_comments = &Allgather_cmt[0];
+    } else if (!strcmp(Bmark->name, "allgatherv")) {
+        strcpy(Bmark->name, "Allgatherv");
+        Bmark->Benchmark = IMB_allgatherv;
+        type = Collective;
+        Bmark->bench_comments = &Allgatherv_cmt[0];
+    } else if (!strcmp(Bmark->name, "gather")) {
+        strcpy(Bmark->name, "Gather");
+        Bmark->Benchmark = IMB_gather;
+        type = Collective;
+        Bmark->bench_comments = &Gather_cmt[0];
+    } else if (!strcmp(Bmark->name, "gatherv")) {
+        strcpy(Bmark->name, "Gatherv");
+        Bmark->Benchmark = IMB_gatherv;
+        type = Collective;
+        Bmark->bench_comments = &Gatherv_cmt[0];
+    } else if (!strcmp(Bmark->name, "scatter")) {
+        strcpy(Bmark->name, "Scatter");
+        Bmark->Benchmark = IMB_scatter;
+        type = Collective;
+        Bmark->bench_comments = &Scatter_cmt[0];
+    } else if (!strcmp(Bmark->name, "scatterv")) {
+        strcpy(Bmark->name, "Scatterv");
+        Bmark->Benchmark = IMB_scatterv;
+        type = Collective;
+        Bmark->bench_comments = &Scatterv_cmt[0];
+    } else if (!strcmp(Bmark->name, "alltoall")) {
+        strcpy(Bmark->name, "Alltoall");
+        Bmark->Benchmark = IMB_alltoall;
+        type = Collective;
+        Bmark->bench_comments = &Alltoall_cmt[0];
+    } else if (!strcmp(Bmark->name, "alltoallv")) {
+        strcpy(Bmark->name, "Alltoallv");
+        Bmark->Benchmark = IMB_alltoallv;
+        type = Collective;
+        Bmark->bench_comments = &Alltoallv_cmt[0];
+    } else if (!strcmp(Bmark->name, "uniband")) {
+        strcpy(Bmark->name, "Uniband");
+        Bmark->Benchmark = IMB_uni_bandwidth;
+        Bmark->bench_comments = &Uni_bandwidth_cmt[0];
+        type = ParallelTransferMsgRate;
+        Bmark->scale_time = 1.0;
+        Bmark->scale_bw = 1.0;
+    } else if (!strcmp(Bmark->name, "biband")) {
+        strcpy(Bmark->name, "Biband");
+        Bmark->Benchmark = IMB_bi_bandwidth;
+        Bmark->bench_comments = &Bi_bandwidth_cmt[0];
+        type = ParallelTransferMsgRate;
+        Bmark->scale_time = 1.0;
+        Bmark->scale_bw = 2.0;
+    } else {
+        type = BTYPE_INVALID;
+    }
+
+    Bmark->RUN_MODES[0].type = type;
+}
+
diff --git a/src/IMB_parse_name_nbc.c b/src_c/IMB_parse_name_nbc.c
similarity index 69%
rename from src/IMB_parse_name_nbc.c
rename to src_c/IMB_parse_name_nbc.c
index 773a30a3..c34374a7 100644
--- a/src/IMB_parse_name_nbc.c
+++ b/src_c/IMB_parse_name_nbc.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -78,147 +77,128 @@ For more documentation than found here, see
 
 #include "IMB_prototypes.h"
 
- 
-int IMB_get_def_cases(char*** defc, char*** Gcmt)
+
+int IMB_get_def_cases(char*** defc, char*** Gcmt) {
 /*
 
-                      
                       Initializes default benchmark names (defc) and accompanying
                       comments (Gcmt)
-                      
-
 
-In/out variables: 
+In/out variables:
 
--defc                 (type char***)                      
+-defc                 (type char***)
                       List of benchkark names (strings)
-                      
 
--Gcmt                 (type char***)                      
+-Gcmt                 (type char***)
                       List of general comments (strings)
-                      
-
 
 */
-{
     *defc = &DEFC[0];
     *Gcmt = &Gral_cmt[0];
-    return (int) (sizeof(DEFC)/sizeof(char*));
+    return (int)(sizeof(DEFC) / sizeof(char*));
 }
 
-int IMB_get_all_cases(char*** allc)
+int IMB_get_all_cases(char*** allc) {
 /*
 
-                      
                       Initializes default benchmark names (defc) and accompanying
                       comments (Gcmt)
-                      
 
+In/out variables:
 
-In/out variables: 
-
--defc                 (type char***)                      
+-defc                 (type char***)
                       List of benchkark names (strings)
-                      
 
--Gcmt                 (type char***)                      
+-Gcmt                 (type char***)
                       List of general comments (strings)
-                      
-
 
 */
-{
     *allc = &ALLC[0];
-    return (int) (sizeof(ALLC)/sizeof(char*));
+    return (int)(sizeof(ALLC) / sizeof(char*));
 }
 
 
-void IMB_set_bmark(struct Bench* Bmark)
+void IMB_set_bmark(struct Bench* Bmark) {
 /*
 
+In/out variables:
 
-
-In/out variables: 
-
--Bmark                (type struct Bench*)                      
+-Bmark                (type struct Bench*)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
+
                       On input, only the name of the benchmark is set.
                       On output, all necessary run modes are set accordingly
-                      
-
 
 */
-{
-    Bmark->N_Modes                  = 1;
-    Bmark->RUN_MODES[0].AGGREGATE   =-1;
+    Bmark->N_Modes = 1;
+    Bmark->RUN_MODES[0].AGGREGATE = -1;
     Bmark->RUN_MODES[0].NONBLOCKING = strstr(Bmark->name, "_pure") ? 0 : 1;
     Bmark->RUN_MODES[0].type = Collective;
 
-    Bmark->reduction      = 0;
-    Bmark->Ntimes         = Bmark->RUN_MODES[0].NONBLOCKING ? 3 : 1;
+    Bmark->reduction = 0;
+    Bmark->Ntimes = Bmark->RUN_MODES[0].NONBLOCKING ? 3 : 1;
     Bmark->bench_comments = NIL_COMMENT;
 
-    if (!strcmp(Bmark->name,"ibcast")) {
+    if (!strcmp(Bmark->name, "ibcast")) {
         Bmark->Benchmark = IMB_ibcast;
-    } else if (!strcmp(Bmark->name,"ibcast_pure")) {
+    } else if (!strcmp(Bmark->name, "ibcast_pure")) {
         Bmark->Benchmark = IMB_ibcast_pure;
-    } else if (!strcmp(Bmark->name,"iallgather")) {
+    } else if (!strcmp(Bmark->name, "iallgather")) {
         Bmark->Benchmark = IMB_iallgather;
-    } else if (!strcmp(Bmark->name,"iallgather_pure")) {
+    } else if (!strcmp(Bmark->name, "iallgather_pure")) {
         Bmark->Benchmark = IMB_iallgather_pure;
-    } else if (!strcmp(Bmark->name,"iallgatherv")) {
+    } else if (!strcmp(Bmark->name, "iallgatherv")) {
         Bmark->Benchmark = IMB_iallgatherv;
-    } else if (!strcmp(Bmark->name,"iallgatherv_pure")) {
+    } else if (!strcmp(Bmark->name, "iallgatherv_pure")) {
         Bmark->Benchmark = IMB_iallgatherv_pure;
-    } else if (!strcmp(Bmark->name,"igather")) {
+    } else if (!strcmp(Bmark->name, "igather")) {
         Bmark->Benchmark = IMB_igather;
-    } else if (!strcmp(Bmark->name,"igather_pure")) {
+    } else if (!strcmp(Bmark->name, "igather_pure")) {
         Bmark->Benchmark = IMB_igather_pure;
-    } else if (!strcmp(Bmark->name,"igatherv")) {
+    } else if (!strcmp(Bmark->name, "igatherv")) {
         Bmark->Benchmark = IMB_igatherv;
-    } else if (!strcmp(Bmark->name,"igatherv_pure")) {
+    } else if (!strcmp(Bmark->name, "igatherv_pure")) {
         Bmark->Benchmark = IMB_igatherv_pure;
-    } else if (!strcmp(Bmark->name,"iscatter")) {
+    } else if (!strcmp(Bmark->name, "iscatter")) {
         Bmark->Benchmark = IMB_iscatter;
-    } else if (!strcmp(Bmark->name,"iscatter_pure")) {
+    } else if (!strcmp(Bmark->name, "iscatter_pure")) {
         Bmark->Benchmark = IMB_iscatter_pure;
-    } else if (!strcmp(Bmark->name,"iscatterv")) {
+    } else if (!strcmp(Bmark->name, "iscatterv")) {
         Bmark->Benchmark = IMB_iscatterv;
-    } else if (!strcmp(Bmark->name,"iscatterv_pure")) {
+    } else if (!strcmp(Bmark->name, "iscatterv_pure")) {
         Bmark->Benchmark = IMB_iscatterv_pure;
-    }else if (!strcmp(Bmark->name,"ialltoall")) {
+    } else if (!strcmp(Bmark->name, "ialltoall")) {
         Bmark->Benchmark = IMB_ialltoall;
-    } else if (!strcmp(Bmark->name,"ialltoall_pure")) {
+    } else if (!strcmp(Bmark->name, "ialltoall_pure")) {
         Bmark->Benchmark = IMB_ialltoall_pure;
-    } else if (!strcmp(Bmark->name,"ialltoallv")) {
+    } else if (!strcmp(Bmark->name, "ialltoallv")) {
         Bmark->Benchmark = IMB_ialltoallv;
-    } else if (!strcmp(Bmark->name,"ialltoallv_pure")) {
+    } else if (!strcmp(Bmark->name, "ialltoallv_pure")) {
         Bmark->Benchmark = IMB_ialltoallv_pure;
-    } else if (!strcmp(Bmark->name,"ireduce")) {
+    } else if (!strcmp(Bmark->name, "ireduce")) {
         Bmark->reduction = 1;
         Bmark->Benchmark = IMB_ireduce;
-    } else if (!strcmp(Bmark->name,"ireduce_pure")) {
+    } else if (!strcmp(Bmark->name, "ireduce_pure")) {
         Bmark->reduction = 1;
         Bmark->Benchmark = IMB_ireduce_pure;
-    } else if (!strcmp(Bmark->name,"ireduce_scatter")) {
+    } else if (!strcmp(Bmark->name, "ireduce_scatter")) {
         Bmark->reduction = 1;
         Bmark->Benchmark = IMB_ireduce_scatter;
-    } else if (!strcmp(Bmark->name,"ireduce_scatter_pure")) {
+    } else if (!strcmp(Bmark->name, "ireduce_scatter_pure")) {
         Bmark->reduction = 1;
         Bmark->Benchmark = IMB_ireduce_scatter_pure;
-    } else if (!strcmp(Bmark->name,"iallreduce")) {
+    } else if (!strcmp(Bmark->name, "iallreduce")) {
         Bmark->reduction = 1;
         Bmark->Benchmark = IMB_iallreduce;
-    } else if (!strcmp(Bmark->name,"iallreduce_pure")) {
+    } else if (!strcmp(Bmark->name, "iallreduce_pure")) {
         Bmark->reduction = 1;
         Bmark->Benchmark = IMB_iallreduce_pure;
-    } else if (!strcmp(Bmark->name,"ibarrier")) {
+    } else if (!strcmp(Bmark->name, "ibarrier")) {
         Bmark->Benchmark = IMB_ibarrier;
         Bmark->RUN_MODES[0].type = Sync;
-    } else if (!strcmp(Bmark->name,"ibarrier_pure")) {
+    } else if (!strcmp(Bmark->name, "ibarrier_pure")) {
         Bmark->Benchmark = IMB_ibarrier_pure;
         Bmark->RUN_MODES[0].type = Sync;
     } else {
diff --git a/src/IMB_parse_name_rma.c b/src_c/IMB_parse_name_rma.c
similarity index 72%
rename from src/IMB_parse_name_rma.c
rename to src_c/IMB_parse_name_rma.c
index 23752936..0b102d6a 100644
--- a/src/IMB_parse_name_rma.c
+++ b/src_c/IMB_parse_name_rma.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -81,211 +80,161 @@ For more documentation than found here, see
 #include "IMB_prototypes.h"
 
 
-int IMB_get_def_cases(char*** defc, char*** Gcmt)
-{
+int IMB_get_def_cases(char*** defc, char*** Gcmt) {
     *defc = &DEFC[0];
     *Gcmt = &Gral_cmt[0];
-    return (int) (sizeof(DEFC)/sizeof(char*));
+    return (int)(sizeof(DEFC) / sizeof(char*));
 }
 
-int IMB_get_all_cases(char*** allc)
+int IMB_get_all_cases(char*** allc) {
 /*
 
-                      
                       Initializes default benchmark names (defc) and accompanying
                       comments (Gcmt)
-                      
 
+In/out variables:
 
-In/out variables: 
-
--defc                 (type char***)                      
+-defc                 (type char***)
                       List of benchkark names (strings)
-                      
 
--Gcmt                 (type char***)                      
+-Gcmt                 (type char***)
                       List of general comments (strings)
-                      
-
 
 */
-{
     *allc = &ALLC[0];
-    return (int) (sizeof(ALLC)/sizeof(char*));
+    return (int)(sizeof(ALLC) / sizeof(char*));
 }
 
 
-void IMB_set_bmark(struct Bench* bmark)
-{
+void IMB_set_bmark(struct Bench* bmark) {
     BTYPES type;
-    
+
     bmark->bench_comments = NIL_COMMENT;
     bmark->N_Modes = 2;
     bmark->Ntimes = 1;
     bmark->reduction = 0;
-    
-    bmark->RUN_MODES[0].AGGREGATE=0;
-    bmark->RUN_MODES[1].AGGREGATE=1;
-    bmark->RUN_MODES[0].NONBLOCKING =0;
-    bmark->RUN_MODES[1].NONBLOCKING =0;
-    bmark->RUN_MODES[0].BIDIR =0;
-    bmark->RUN_MODES[1].BIDIR =0;
-    
-    
-    if (!strcmp(bmark->name,"unidir_put"))
-    { 
+
+    bmark->RUN_MODES[0].AGGREGATE = 0;
+    bmark->RUN_MODES[1].AGGREGATE = 1;
+    bmark->RUN_MODES[0].NONBLOCKING = 0;
+    bmark->RUN_MODES[1].NONBLOCKING = 0;
+    bmark->RUN_MODES[0].BIDIR = 0;
+    bmark->RUN_MODES[1].BIDIR = 0;
+
+
+    if (!strcmp(bmark->name, "unidir_put")) {
         bmark->Benchmark = IMB_rma_single_put;
         type = SingleTransfer;
         bmark->access = put;
-    }
-    else if (!strcmp(bmark->name,"unidir_get"))
-    { 
+    } else if (!strcmp(bmark->name, "unidir_get")) {
         bmark->Benchmark = IMB_rma_single_get;
         type = SingleTransfer;
         bmark->access = get;
-    }
-    else if (!strcmp(bmark->name,"bidir_put"))
-    { 
+    } else if (!strcmp(bmark->name, "bidir_put")) {
         bmark->Benchmark = IMB_rma_single_put;
         type = SingleTransfer;
         bmark->access = put;
         bmark->RUN_MODES[0].BIDIR = 1;
         bmark->RUN_MODES[1].BIDIR = 1;
 
-    }
-    else if (!strcmp(bmark->name,"bidir_get"))
-    { 
+    } else if (!strcmp(bmark->name, "bidir_get")) {
         bmark->Benchmark = IMB_rma_single_get;
         type = SingleTransfer;
         bmark->access = get;
         bmark->RUN_MODES[0].BIDIR = 1;
         bmark->RUN_MODES[1].BIDIR = 1;
 
-    }
-    else if (!strcmp(bmark->name,"put_local"))
-    { 
+    } else if (!strcmp(bmark->name, "put_local")) {
         bmark->Benchmark = IMB_rma_put_local;
         type = SingleTransfer;
         bmark->access = put;
-    }
-    else if (!strcmp(bmark->name,"get_local"))
-    { 
+    } else if (!strcmp(bmark->name, "get_local")) {
         bmark->Benchmark = IMB_rma_get_local;
         type = SingleTransfer;
         bmark->access = get;
-    }
-    else if (!strcmp(bmark->name,"put_all_local"))
-    { 
+    } else if (!strcmp(bmark->name, "put_all_local")) {
         bmark->N_Modes = 1;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
+        bmark->RUN_MODES[0].AGGREGATE = -1;
         bmark->Benchmark = IMB_rma_put_all_local;
         type = MultPassiveTransfer;
         bmark->access = put;
-    }
-    else if (!strcmp(bmark->name,"get_all_local"))
-    { 
+    } else if (!strcmp(bmark->name, "get_all_local")) {
         bmark->N_Modes = 1;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
+        bmark->RUN_MODES[0].AGGREGATE = -1;
         bmark->Benchmark = IMB_rma_get_all_local;
         type = MultPassiveTransfer;
         bmark->access = get;
-    }
-    else if (!strcmp(bmark->name,"one_put_all"))
-    { 
+    } else if (!strcmp(bmark->name, "one_put_all")) {
         bmark->N_Modes = 1;
         bmark->Benchmark = IMB_rma_put_all;
         type = MultPassiveTransfer;
         bmark->access = put;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-
-    }
-    else if (!strcmp(bmark->name,"one_get_all"))
-    { 
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+    } else if (!strcmp(bmark->name, "one_get_all")) {
         bmark->N_Modes = 1;
         bmark->Benchmark = IMB_rma_get_all;
         type = MultPassiveTransfer;
         bmark->access = get;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-
-    }
-    else if (!strcmp(bmark->name,"all_put_all"))
-    { 
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+    } else if (!strcmp(bmark->name, "all_put_all")) {
         bmark->N_Modes = 1;
         bmark->Benchmark = IMB_rma_put_all;
         type = Collective;
         bmark->access = put;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-    }
-    else if (!strcmp(bmark->name,"all_get_all"))
-    { 
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+    } else if (!strcmp(bmark->name, "all_get_all")) {
         bmark->N_Modes = 1;
         bmark->Benchmark = IMB_rma_get_all;
         type = Collective;
         bmark->access = get;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-    }
-    else if (!strcmp(bmark->name,"exchange_put"))
-    { 
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+    } else if (!strcmp(bmark->name, "exchange_put")) {
         bmark->N_Modes = 1;
         bmark->Benchmark = IMB_rma_exchange_put;
         type = Collective;
         bmark->access = put;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-    }
-    else if (!strcmp(bmark->name,"exchange_get"))
-    { 
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+    } else if (!strcmp(bmark->name, "exchange_get")) {
         bmark->N_Modes = 1;
         bmark->Benchmark = IMB_rma_exchange_get;
         type = Collective;
         bmark->access = get;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-    }
-    else if (!strcmp(bmark->name,"accumulate"))
-    { 
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+    } else if (!strcmp(bmark->name, "accumulate")) {
         bmark->Benchmark = IMB_rma_accumulate;
         type = SingleTransfer;
         bmark->access = put;
         bmark->reduction = 1;
-    }
-    else if (!strcmp(bmark->name,"get_accumulate"))
-    { 
+    } else if (!strcmp(bmark->name, "get_accumulate")) {
         bmark->Benchmark = IMB_rma_get_accumulate;
         type = SingleTransfer;
         bmark->access = put;
         bmark->reduction = 1;
-    }
-    else if (!strcmp(bmark->name,"fetch_and_op"))
-    { 
+    } else if (!strcmp(bmark->name, "fetch_and_op")) {
         bmark->Benchmark = IMB_rma_fetch_and_op;
         type = SingleElementTransfer;
         bmark->access = put;
         bmark->reduction = 1;
-    }
-    else if (!strcmp(bmark->name,"compare_and_swap"))
-    { 
+    } else if (!strcmp(bmark->name, "compare_and_swap")) {
         bmark->Benchmark = IMB_rma_compare_and_swap;
         type = SingleElementTransfer;
         bmark->access = put;
         bmark->bench_comments = Compare_and_swap_cmt;
-    }
-    else if (!strcmp(bmark->name,"truly_passive_put"))
-    { 
+    } else if (!strcmp(bmark->name, "truly_passive_put")) {
         bmark->Benchmark = IMB_rma_passive_put;
         type = SingleTransfer;
         bmark->access = put;
         bmark->N_Modes = 1;
         bmark->Ntimes = 2;
-        bmark->RUN_MODES[0].AGGREGATE=-1;
-        bmark->RUN_MODES[0].NONBLOCKING=1;
+        bmark->RUN_MODES[0].AGGREGATE = -1;
+        bmark->RUN_MODES[0].NONBLOCKING = 1;
         bmark->bench_comments = Truly_passive_put_cmt;
-    }
-    else 
-    {
+    } else {
         bmark->RUN_MODES[0].type = BTYPE_INVALID;
         bmark->RUN_MODES[1].type = BTYPE_INVALID;
         return;
     }
-    
+
     bmark->RUN_MODES[0].type = type;
     bmark->RUN_MODES[1].type = type;
     bmark->name[0] = toupper(bmark->name[0]);
diff --git a/src/IMB_pingping.c b/src_c/IMB_pingping.c
similarity index 57%
rename from src/IMB_pingping.c
rename to src_c/IMB_pingping.c
index 8691c187..8811adf9 100644
--- a/src/IMB_pingping.c
+++ b/src_c/IMB_pingping.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -93,106 +92,96 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_pingping(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                  MODES RUN_MODE, double* time)
+                  MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      2 process exchange; MPI_Isend + MPI_Recv 
-                      
+                          MPI-1 benchmark kernel
+                          2 process exchange; MPI_Isend + MPI_Recv
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)                      
-                      Basic message size in bytes
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  double t1,t2;
-  int    i;
-  
-  Type_Size s_size, r_size;
-  int s_num, r_num;
-  int s_tag, r_tag;
-  int dest, source;
-  MPI_Status stat;
-  MPI_Request request;
+    double t1, t2;
+    int    i;
+
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int s_tag, r_tag;
+    int dest, source;
+    MPI_Status stat;
+    MPI_Request request;
 
 #ifdef CHECK 
-  defect=0;
+    defect = 0;
 #endif
-  ierr = 0;
-
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-
-  if ((s_size!=0) && (r_size!=0))
-   {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    } 
-  s_tag = 1;
-  r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;
-  
-  dest = -1;
-  if (c_info->rank == c_info->pair0)
-      dest = c_info->pair1;
-  else if (c_info->rank == c_info->pair1)
-      dest = c_info->pair0;
-
-  source = c_info->select_source ? dest : MPI_ANY_SOURCE;
-      
-  if( dest != -1 )
-    {
-      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-      t1 = MPI_Wtime();
-      for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-	  ierr= MPI_Isend((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                          s_num,
-			  c_info->s_data_type,dest,s_tag,
-			  c_info->communicator,&request);
-	  MPI_ERRHAND(ierr);
-	  ierr = MPI_Recv((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                          r_num,c_info->r_data_type,source,
-			  r_tag,c_info->communicator,&stat);
-	  MPI_ERRHAND(ierr);
-
-	  ierr = MPI_Wait(&request, &stat);
-	  MPI_ERRHAND(ierr);
-
-          CHK_DIFF("PingPing",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                    0, size, size, asize,
-                    put, 0, ITERATIONS->n_sample, i,
-                    dest, &defect);
-	}
-      t2 = MPI_Wtime();
-      
-      *time=(t2 - t1)/ITERATIONS->n_sample;
+    ierr = 0;
+
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
-  else
-    { 
-      *time = 0.; 
+    s_tag = 1;
+    r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;
+
+    size *= c_info->size_scale;
+
+    dest = -1;
+    if (c_info->rank == c_info->pair0)
+        dest = c_info->pair1;
+    else if (c_info->rank == c_info->pair1)
+        dest = c_info->pair0;
+
+    source = c_info->select_source ? dest : MPI_ANY_SOURCE;
+
+    *time = 0.;
+
+    if (dest != -1) {
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time -= MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_Isend((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                             s_num,
+                             c_info->s_data_type, dest, s_tag,
+                             c_info->communicator, &request);
+            MPI_ERRHAND(ierr);
+            ierr = MPI_Recv((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                            r_num, c_info->r_data_type, source,
+                            r_tag, c_info->communicator, &stat);
+            MPI_ERRHAND(ierr);
+
+            ierr = MPI_Wait(&request, &stat);
+            MPI_ERRHAND(ierr);
+
+            CHK_DIFF("PingPing", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                     0, size, size, asize,
+                     put, 0, ITERATIONS->n_sample, i,
+                     dest, &defect);
+        }
+        *time += MPI_Wtime();
+        *time /= ITERATIONS->n_sample;
     }
 }
 
diff --git a/src/IMB_pingpong.c b/src_c/IMB_pingpong.c
similarity index 53%
rename from src/IMB_pingpong.c
rename to src_c/IMB_pingpong.c
index 680313bf..09cd9f73 100644
--- a/src/IMB_pingpong.c
+++ b/src_c/IMB_pingpong.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -93,130 +92,115 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_pingpong(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                  MODES RUN_MODE, double* time)
+                  MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      2 process MPI_Send + MPI_Recv  pair
-                      
+                          MPI-1 benchmark kernel
+                          2 process MPI_Send + MPI_Recv  pair
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
 
--size                 (type int)                      
-                      Basic message size in bytes
+-size                     (type int)
+                          Basic message size in bytes
 
--ITERATIONS           (type struct iter_schedule *)                      
-                      Repetition scheduling
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
+Output variables:
 
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-    double t1, t2;
     int    i;
 
-    Type_Size s_size,r_size;
+    Type_Size s_size, r_size;
     int s_num, r_num;
     int s_tag, r_tag;
     int dest, source;
     MPI_Status stat;
 
 #ifdef CHECK 
-    defect=0;
+    defect = 0;
 #endif
     ierr = 0;
 
-    /*  GET SIZE OF DATA TYPE */  
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
 
-    if ((s_size!=0) && (r_size!=0))
-    {
-	s_num=size/s_size;
-	r_num=size/r_size;
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
 
     s_tag = 1;
     r_tag = c_info->select_tag ? s_tag : MPI_ANY_TAG;
 
-    if (c_info->rank == c_info->pair0)
-    {
-	/*  CALCULATE SOURCE AND DESTINATION */ 
-	dest = c_info->pair1;
-	source = c_info->select_source ? dest : MPI_ANY_SOURCE;
-
-	for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-	t1 = MPI_Wtime();
-	for(i=0;i<ITERATIONS->n_sample;i++)
-	{
-	    ierr = MPI_Send((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-			    s_num,c_info->s_data_type,dest,
-			    s_tag,c_info->communicator);
-	    MPI_ERRHAND(ierr);
-
-	    ierr = MPI_Recv((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-			    r_num,c_info->r_data_type,source,
-			    r_tag,c_info->communicator,&stat);
-	    MPI_ERRHAND(ierr);
-
-	    CHK_DIFF("PingPong",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-		     size, size, asize,
-		     put, 0, ITERATIONS->n_sample, i,
-		     dest, &defect);
-	} /*for*/
-
-	t2 = MPI_Wtime();
-	*time=(t2 - t1)/ITERATIONS->n_sample;
-    }
-    else if (c_info->rank == c_info->pair1)
-    {
-	dest =c_info->pair0 ;
-	source = c_info->select_source ? dest : MPI_ANY_SOURCE;
-
-	for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-	t1 = MPI_Wtime();
-	for(i=0;i<ITERATIONS->n_sample;i++)
-	{
-	    ierr = MPI_Recv((char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-			    r_num,c_info->r_data_type,source,
-			    r_tag,c_info->communicator,&stat);
-	    MPI_ERRHAND(ierr);
-
-	    ierr = MPI_Send((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-			    s_num,c_info->s_data_type,dest,
-			    s_tag,c_info->communicator);
-	    MPI_ERRHAND(ierr);
-
-	    CHK_DIFF("PingPong",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-		     size, size, asize,
-		     put, 0, ITERATIONS->n_sample, i,
-		     dest, &defect);
-	} /*for*/
-	t2 = MPI_Wtime();
-
-	*time=(t2 - t1)/ITERATIONS->n_sample;
-    }
-    else 
-    { 
-	*time = 0.;
+    size *= c_info->size_scale;
+
+    *time = 0.;
+    if (c_info->rank == c_info->pair0) {
+        /*  CALCULATE SOURCE AND DESTINATION */
+        dest = c_info->pair1;
+        source = c_info->select_source ? dest : MPI_ANY_SOURCE;
+
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time -= MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_Send((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                            s_num, c_info->s_data_type, dest,
+                            s_tag, c_info->communicator);
+            MPI_ERRHAND(ierr);
+
+            ierr = MPI_Recv((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                            r_num, c_info->r_data_type, source,
+                            r_tag, c_info->communicator, &stat);
+            MPI_ERRHAND(ierr);
+
+            CHK_DIFF("PingPong", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
+                     size, size, asize,
+                     put, 0, ITERATIONS->n_sample, i,
+                     dest, &defect);
+        } /*for*/
+        *time += MPI_Wtime();
+    } else if (c_info->rank == c_info->pair1) {
+        dest = c_info->pair0;
+        source = c_info->select_source ? dest : MPI_ANY_SOURCE;
+
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time -= MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_Recv((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                            r_num, c_info->r_data_type, source,
+                            r_tag, c_info->communicator, &stat);
+            MPI_ERRHAND(ierr);
+
+            ierr = MPI_Send((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                            s_num, c_info->s_data_type, dest,
+                            s_tag, c_info->communicator);
+            MPI_ERRHAND(ierr);
+
+            CHK_DIFF("PingPong", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
+                     size, size, asize,
+                     put, 0, ITERATIONS->n_sample, i,
+                     dest, &defect);
+        } /*for*/
+        *time += MPI_Wtime();
     }
+    *time /= ITERATIONS->n_sample;
 }
 
 
diff --git a/src/IMB_prototypes.h b/src_c/IMB_prototypes.h
similarity index 74%
rename from src/IMB_prototypes.h
rename to src_c/IMB_prototypes.h
index 6eabdad2..22628d2e 100644
--- a/src/IMB_prototypes.h
+++ b/src_c/IMB_prototypes.h
@@ -3,8 +3,7 @@
 
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -75,22 +74,22 @@ void IMB_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERA
 void IMB_scatterv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time);
 
-void IMB_allreduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_allreduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time);
 
-void IMB_alltoall(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_alltoall(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time);
 
-void IMB_alltoallv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_alltoallv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time);
 
-void IMB_barrier(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_barrier(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE, double* time);
 
-void IMB_bcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_bcast(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                MODES RUN_MODE, double* time);
 
-void IMB_exchange(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_exchange(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time);
 
 void IMB_pingping(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
@@ -99,55 +98,55 @@ void IMB_pingping(struct comm_info* c_info, int size, struct iter_schedule* ITER
 void IMB_pingpong(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time);
 
-void IMB_reduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_reduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                 MODES RUN_MODE, double* time);
 
-void IMB_reduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_reduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                         MODES RUN_MODE, double* time);
 
-void IMB_sendrecv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_sendrecv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE, double* time);
 
-void IMB_uni_bandwidth(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-              MODES RUN_MODE, double* time);
+void IMB_uni_bandwidth(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                       MODES RUN_MODE, double* time);
 
-void IMB_bi_bandwidth(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-              MODES RUN_MODE, double* time);
+void IMB_bi_bandwidth(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                      MODES RUN_MODE, double* time);
 #endif // MPI1
 
 /* MPI-2 onesided communications benchmarks */
 
 #ifdef EXT
 
-void IMB_accumulate (struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
-                     MODES RUN_MODE, double* time);
+void IMB_accumulate(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time);
 
-void IMB_bidir_get(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_bidir_get(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time);
 
-void IMB_bidir_put(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_bidir_put(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time);
 
-void IMB_unidir_put (struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
-                     MODES RUN_MODE, double* time);
+void IMB_unidir_put(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time);
 
-void IMB_unidir_get (struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
-                     MODES RUN_MODE, double* time);
+void IMB_unidir_get(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time);
 
-void IMB_ones_get(struct comm_info* c_info, int s_num, int dest, 
-                  int r_num, int sender, int size, 
+void IMB_ones_get(struct comm_info* c_info, int s_num, int dest,
+                  int r_num, int sender, int size,
                   struct iter_schedule* ITERATIONS, double* time);
 
-void IMB_ones_mget(struct comm_info* c_info, int s_num, int dest, 
-                   int r_num, int sender, int size, 
+void IMB_ones_mget(struct comm_info* c_info, int s_num, int dest,
+                   int r_num, int sender, int size,
                    struct iter_schedule* ITERATIONS, double* time);
 
-void IMB_ones_put(struct comm_info* c_info, int s_num, int dest, 
-                  int r_num, int sender, int size, 
+void IMB_ones_put(struct comm_info* c_info, int s_num, int dest,
+                  int r_num, int sender, int size,
                   struct iter_schedule* ITERATIONS, double* time);
 
-void IMB_ones_mput(struct comm_info* c_info, int s_num, int dest, 
-                   int r_num, int sender, int size, 
+void IMB_ones_mput(struct comm_info* c_info, int s_num, int dest,
+                   int r_num, int sender, int size,
                    struct iter_schedule* ITERATIONS, double* time);
 
 void IMB_window(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
@@ -161,41 +160,41 @@ void IMB_user_set_info(MPI_Info* opt_info);
 
 #ifdef MPIIO
 
-void IMB_open_close(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_open_close(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE, double* time);
 
-void IMB_read_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_read_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                      MODES RUN_MODE, double* time);
 
-void IMB_read_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_read_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time);
 
-void IMB_read_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_read_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE, double* time);
 
-void IMB_read_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                 BTYPES type, int i_sample, int j_sample, 
+void IMB_read_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                 BTYPES type, int i_sample, int j_sample,
                  int time_inner, double* time);
 
-void IMB_iread_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                  BTYPES type, int i_sample, int j_sample, 
+void IMB_iread_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                  BTYPES type, int i_sample, int j_sample,
                   int time_inner, int do_ovrlp, double* time);
 
-void IMB_write_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_write_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                       MODES RUN_MODE, double* time);
 
-void IMB_write_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_write_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE, double* time);
 
-void IMB_write_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, 
+void IMB_write_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE, double* time);
 
-void IMB_write_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                  BTYPES type, int i_sample, int j_sample, 
+void IMB_write_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                  BTYPES type, int i_sample, int j_sample,
                   int time_inner, double* time);
 
-void IMB_iwrite_ij(struct comm_info* c_info, int size, POSITIONING pos, 
-                   BTYPES type, int i_sample, int j_sample, 
+void IMB_iwrite_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                   BTYPES type, int i_sample, int j_sample,
                    int time_inner, int do_ovrlp, double* time);
 
 void IMB_user_set_info(MPI_Info* opt_info);
@@ -269,13 +268,13 @@ void IMB_iscatter(struct comm_info* c_info,
                   struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE,
                   double* time);
-                  
+
 void IMB_iscatter_pure(struct comm_info* c_info,
                        int size,
                        struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE,
                        double* time);
-                       
+
 void IMB_iscatterv(struct comm_info* c_info,
                    int size,
                    struct iter_schedule* ITERATIONS,
@@ -364,65 +363,65 @@ void IMB_ibarrier_pure(struct comm_info* c_info,
 
 #ifdef RMA
 
-void IMB_rma_single_put (struct comm_info* c_info, int size, 
-                         struct iter_schedule* ITERATIONS, 
-                         MODES RUN_MODE, double* time);
-
-void IMB_rma_single_get (struct comm_info* c_info, int size, 
-                         struct iter_schedule* ITERATIONS, 
-                         MODES RUN_MODE, double* time);
-
-void IMB_rma_put_all (struct comm_info* c_info, int size, 
-                      struct iter_schedule* iterations, 
-                      MODES run_mode, double* time);
+void IMB_rma_single_put(struct comm_info* c_info, int size,
+                        struct iter_schedule* ITERATIONS,
+                        MODES RUN_MODE, double* time);
 
-void IMB_rma_get_all (struct comm_info* c_info, int size, 
-                      struct iter_schedule* iterations, 
-                      MODES run_mode, double* time);
+void IMB_rma_single_get(struct comm_info* c_info, int size,
+                        struct iter_schedule* ITERATIONS,
+                        MODES RUN_MODE, double* time);
 
-void IMB_rma_put_local (struct comm_info* c_info, int size, 
-                        struct iter_schedule* iterations, 
-                        MODES run_mode, double* time);
+void IMB_rma_put_all(struct comm_info* c_info, int size,
+                     struct iter_schedule* iterations,
+                     MODES run_mode, double* time);
 
-void IMB_rma_get_local (struct comm_info* c_info, int size, 
-                        struct iter_schedule* iterations, 
-                        MODES run_mode, double* time);
+void IMB_rma_get_all(struct comm_info* c_info, int size,
+                     struct iter_schedule* iterations,
+                     MODES run_mode, double* time);
 
-void IMB_rma_put_all_local (struct comm_info* c_info, int size, 
-                            struct iter_schedule* iterations, 
-                            MODES run_mode, double* time);
+void IMB_rma_put_local(struct comm_info* c_info, int size,
+                       struct iter_schedule* iterations,
+                       MODES run_mode, double* time);
 
-void IMB_rma_get_all_local (struct comm_info* c_info, int size, 
-                            struct iter_schedule* iterations, 
-                            MODES run_mode, double* time);
+void IMB_rma_get_local(struct comm_info* c_info, int size,
+                       struct iter_schedule* iterations,
+                       MODES run_mode, double* time);
 
-void IMB_rma_exchange_put (struct comm_info* c_info, int size, 
-                           struct iter_schedule* iterations, 
+void IMB_rma_put_all_local(struct comm_info* c_info, int size,
+                           struct iter_schedule* iterations,
                            MODES run_mode, double* time);
 
-void IMB_rma_exchange_get (struct comm_info* c_info, int size, 
-                           struct iter_schedule* iterations, 
+void IMB_rma_get_all_local(struct comm_info* c_info, int size,
+                           struct iter_schedule* iterations,
                            MODES run_mode, double* time);
 
-void IMB_rma_accumulate (struct comm_info* c_info, int size,  
-                         struct iter_schedule* iterations,
-                         MODES run_mode, double* time);
+void IMB_rma_exchange_put(struct comm_info* c_info, int size,
+                          struct iter_schedule* iterations,
+                          MODES run_mode, double* time);
 
-void IMB_rma_get_accumulate (struct comm_info* c_info, int size,  
-                             struct iter_schedule* iterations,
-                             MODES run_mode, double* time);
+void IMB_rma_exchange_get(struct comm_info* c_info, int size,
+                          struct iter_schedule* iterations,
+                          MODES run_mode, double* time);
 
-void IMB_rma_fetch_and_op (struct comm_info* c_info, int size,  
-                       struct iter_schedule* iterations,
-                       MODES run_mode, double* time);
+void IMB_rma_accumulate(struct comm_info* c_info, int size,
+                        struct iter_schedule* iterations,
+                        MODES run_mode, double* time);
 
-void IMB_rma_compare_and_swap (struct comm_info* c_info, int size,  
-                               struct iter_schedule* iterations,
-                               MODES run_mode, double* time);
+void IMB_rma_get_accumulate(struct comm_info* c_info, int size,
+                            struct iter_schedule* iterations,
+                            MODES run_mode, double* time);
 
-void IMB_rma_passive_put (struct comm_info* c_info, int size, 
-                           struct iter_schedule* iterations, 
-                           MODES run_mode, double* time);
+void IMB_rma_fetch_and_op(struct comm_info* c_info, int size,
+                          struct iter_schedule* iterations,
+                          MODES run_mode, double* time);
+
+void IMB_rma_compare_and_swap(struct comm_info* c_info, int size,
+                              struct iter_schedule* iterations,
+                              MODES run_mode, double* time);
+
+void IMB_rma_passive_put(struct comm_info* c_info, int size,
+                         struct iter_schedule* iterations,
+                         MODES run_mode, double* time);
 #endif /*RMA*/
 
 
@@ -433,19 +432,23 @@ void IMB_rma_passive_put (struct comm_info* c_info, int size,
 int main(int argc, char **argv);
 
 /* IMB 3.1 << */
-int IMB_basic_input(struct comm_info* c_info, struct Bench** P_BList, 
+int IMB_basic_input(struct comm_info* c_info, struct Bench** P_BList,
                     struct iter_schedule* ITERATIONS,
                     int *argc, char ***argv, int* NP_min);
+
+#ifdef USE_MPI_INIT_THREAD
+void IMB_chk_arg_level_of_threading(char ***argv, int *argc);
+#endif
 /* >> IMB 3.1  */
 
-void IMB_get_rank_portion(int rank, int NP, size_t size, 
+void IMB_get_rank_portion(int rank, int NP, size_t size,
                           size_t unit_size, size_t* pos1, size_t* pos2);
 
 int IMB_init_communicator(struct comm_info* c_info, int NP);
 
-void IMB_adjust_timings_scale(struct comm_info *c_info, struct Bench *bmark);            
+void IMB_adjust_timings_scale(struct comm_info *c_info, struct Bench *bmark);
 
-void IMB_set_communicator(struct comm_info *c_info );
+void IMB_set_communicator(struct comm_info *c_info);
 
 int IMB_valid(struct comm_info * c_info, struct Bench* Bmark, int NP);
 
@@ -488,31 +491,32 @@ void IMB_destruct_blist(struct Bench ** P_BList);
 
 void IMB_print_blist(struct comm_info * c_info, struct Bench *BList);
 
-void IMB_init_transfer(struct comm_info* c_info, struct Bench* Bmark, int size, MPI_Aint acc_size); 
+void IMB_init_transfer(struct comm_info* c_info, struct Bench* Bmark, int size, MPI_Aint acc_size);
 
-void IMB_close_transfer (struct comm_info* c_info, struct Bench* Bmark, int size);
+void IMB_close_transfer(struct comm_info* c_info, struct Bench* Bmark, int size);
 
-void IMB_warm_up (struct comm_info* c_info, struct Bench* Bmark, struct iter_schedule* ITERATIONS, int iter);
+void IMB_warm_up(struct Bench* Bmark, struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, int iter);
 
 void IMB_cpu_exploit(float target_secs, int initialize);
+double IMB_cpu_exploit_reworked(float target_secs, int initialize);
 
 void IMB_general_info();
 
 void IMB_make_sys_info();
 
-void IMB_end_msg(struct comm_info* c_info );
+void IMB_end_msg(struct comm_info* c_info);
 
 /* IMB 3.1 << */
-void IMB_output(struct comm_info* c_info, struct Bench* Bmark, MODES BMODE, 
-                int header, int size, struct iter_schedule* ITERATIONS,
-                double *time);
+void IMB_output(struct comm_info* c_info, struct Bench* Bmark, MODES BMODE,
+    int header, int size, struct iter_schedule* ITERATIONS,
+    double *time);
 /* >> IMB 3.1  */
 
 /* New in IMB_3.0 */
 void IMB_help();
 
-void IMB_display_times(struct Bench* Bmark, double* tlist, struct comm_info* c_info, 
-                       int group, int n_sample, int size, 
+void IMB_display_times(struct Bench* Bmark, double* tlist, struct comm_info* c_info,
+                       int group, int n_sample, int size,
                        int edit_type);
 
 /* IMB 3.1 << */
@@ -520,26 +524,26 @@ void IMB_show_selections(struct comm_info* c_info, struct Bench* BList, int *arg
 /* >> IMB 3.1  */
 
 
-void IMB_calculate_times (int ntimes, struct comm_info* c_info, int group_id, 
-                          double *tlist, Timing* timing
+void IMB_calculate_times(int ntimes, struct comm_info* c_info, int group_id,
+                         double *tlist, Timing* timing
 #ifdef CHECK
-                          , double *defect
+                         , double *defect
 #endif
-                          );
+    );
 
 void IMB_show_procids(struct comm_info* c_info);
 
-void IMB_print_array(int* Array, int N, int disp_N, 
+void IMB_print_array(int* Array, int N, int disp_N,
                      int M, char* txt, FILE* unit);
 
 void IMB_print_int_row(FILE* unit, int* Array, int M);
 
 void IMB_print_info();
 
-void IMB_print_header(int out_format, struct Bench* bmark, 
+void IMB_print_header(int out_format, struct Bench* bmark,
                       struct comm_info* c_info, MODES bench_mode);
 
-void IMB_edit_format(int n_ints , int n_floats);
+void IMB_edit_format(int n_ints, int n_floats);
 
 void IMB_make_line(int li_len);
 
@@ -547,7 +551,7 @@ void* IMB_v_alloc(size_t Len, char* where);
 
 /*void IMB_i_alloc(int** B, size_t Len, char* where );  ==> define macro*/
 
-void IMB_alloc_buf(struct comm_info* c_info, char* where, size_t s_len, 
+void IMB_alloc_buf(struct comm_info* c_info, char* where, size_t s_len,
                    size_t r_len);
 
 void IMB_alloc_aux(size_t L, char* where);
@@ -558,21 +562,21 @@ void IMB_v_free(void **B);
 
 void IMB_ass_buf(void* buf, int rank, size_t pos1, size_t pos2, int value);
 
-void IMB_set_buf(struct comm_info* c_info, int selected_rank, size_t s_pos1, 
+void IMB_set_buf(struct comm_info* c_info, int selected_rank, size_t s_pos1,
                  size_t s_pos2, size_t r_pos1, size_t r_pos2);
 
-void IMB_init_pointers(struct comm_info *c_info );
+void IMB_init_pointers(struct comm_info *c_info);
 
 /* IMB 3.1 << */
-void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, 
-                           struct Bench* Bmark, MODES BMODE, int iter, int size);
+void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS,
+struct Bench* Bmark, MODES BMODE, int iter, int size);
 
 void IMB_free_all(struct comm_info* c_info, struct Bench** P_BList, struct iter_schedule* ITERATIONS);
 /* >> IMB 3.1  */
 
-void IMB_del_s_buf(struct comm_info* c_info );
+void IMB_del_s_buf(struct comm_info* c_info);
 
-void IMB_del_r_buf(struct comm_info* c_info );
+void IMB_del_r_buf(struct comm_info* c_info);
 
 char* IMB_str(const char* Bname);
 
@@ -582,7 +586,7 @@ int IMB_str_atoi(char s[]);
 
 void IMB_str_erase(char* string, int Nblnc);
 
-void IMB_err_hand(int ERR_IS_MPI, int ERR_CODE );
+void IMB_err_hand(int ERR_IS_MPI, int ERR_CODE);
 
 void IMB_errors_mpi(MPI_Comm * comm, int* ierr, ...);
 
@@ -591,7 +595,7 @@ void IMB_errors_win(MPI_Win * WIN, int* ierr, ...);
 #endif
 
 #ifdef MPIIO
-void IMB_errors_io (MPI_File * fh, int* ierr, ...);
+void IMB_errors_io(MPI_File * fh, int* ierr, ...);
 #endif
 
 void IMB_init_errhand(struct comm_info* c_info);
@@ -600,41 +604,41 @@ void IMB_set_errhand(struct comm_info* c_info);
 
 void IMB_del_errhand(struct comm_info* c_info);
 
-void IMB_chk_dadd(void* AUX, int Locsize, size_t buf_pos, 
+void IMB_chk_dadd(void* AUX, int Locsize, size_t buf_pos,
                   int rank0, int rank1);
 
-double IMB_ddiff(assign_type *A, assign_type *B, size_t len, 
+double IMB_ddiff(assign_type *A, assign_type *B, size_t len,
                  size_t *fault_pos);
 
-void IMB_show(char* text, struct comm_info* c_info, void* buf, 
-              size_t loclen, size_t totlen, int j_sample, 
+void IMB_show(char* text, struct comm_info* c_info, void* buf,
+              size_t loclen, size_t totlen, int j_sample,
               POSITIONING fpos);
 
-void IMB_err_msg(struct comm_info* c_info, char* text, size_t totsize, 
+void IMB_err_msg(struct comm_info* c_info, char* text, size_t totsize,
                  int j_sample);
 
-void IMB_chk_diff(char* text, struct comm_info* c_info, void* RECEIVED, 
-                  size_t buf_pos, int Locsize, size_t Totalsize, 
-                  int unit_size, DIRECTION mode, POSITIONING fpos, 
-                  int n_sample, int j_sample, int source, 
-                  double* diff );
+void IMB_chk_diff(char* text, struct comm_info* c_info, void* RECEIVED,
+                  size_t buf_pos, int Locsize, size_t Totalsize,
+                  int unit_size, DIRECTION mode, POSITIONING fpos,
+                  int n_sample, int j_sample, int source,
+                  double* diff);
 
-void IMB_cmp_cat(struct comm_info *c_info, void* RECEIVED, size_t size, 
-                 size_t bufpos, int unit_size, int perm, 
-                 size_t* lengths, int*ranks, int* Npos, 
+void IMB_cmp_cat(struct comm_info *c_info, void* RECEIVED, size_t size,
+                 size_t bufpos, int unit_size, int perm,
+                 size_t* lengths, int*ranks, int* Npos,
                  size_t *faultpos, double* diff);
 
-void IMB_chk_contiguous(struct comm_info *c_info, int* rdispl, int* sizes, 
+void IMB_chk_contiguous(struct comm_info *c_info, int* rdispl, int* sizes,
                         double*diff);
 
-void IMB_chk_distr(struct comm_info *c_info, size_t size, int n_sample, 
-                   size_t* lengths, int* ranks, int Npos, 
+void IMB_chk_distr(struct comm_info *c_info, size_t size, int n_sample,
+                   size_t* lengths, int* ranks, int Npos,
                    double *diff);
 
-void IMB_chk_contained(void* part, size_t p_size, void* whole, 
-                       size_t w_size, size_t* pos, size_t* fpos, 
+void IMB_chk_contained(void* part, size_t p_size, void* whole,
+                       size_t w_size, size_t* pos, size_t* fpos,
                        double* D, char*msg);
 
-long IMB_compute_crc (register char* buf, register size_t size);
+long IMB_compute_crc(register char* buf, register size_t size);
 
 #endif // IMB_PROTOTYPES_H
diff --git a/src_c/IMB_read.c b/src_c/IMB_read.c
new file mode 100644
index 00000000..f5137c36
--- /dev/null
+++ b/src_c/IMB_read.c
@@ -0,0 +1,612 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+    
+ File: IMB_read.c 
+
+ Implemented functions: 
+
+ IMB_read_shared;
+ IMB_read_indv;
+ IMB_read_expl;
+ IMB_read_ij;
+ IMB_iread_ij;
+
+ ***************************************************************************/
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+ 
+/*************************************************************************/
+
+/* ===================================================================== */
+/* 
+IMB 3.1 changes
+July 2007
+Hans-Joachim Plum, Intel GmbH
+
+- replace "int n_sample" by iteration scheduling object "ITERATIONS"
+  (see => IMB_benchmark.h)
+
+*/
+/* ===================================================================== */
+
+
+void IMB_read_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                     MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-IO benchmark kernel
+                          Driver for read benchmarks with shared file pointers
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    if (c_info->File_rank >= 0) {
+        if (RUN_MODE->AGGREGATE)
+            IMB_read_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
+        else
+            IMB_read_ij(c_info, size, shared, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
+
+        if (RUN_MODE->NONBLOCKING) {
+            MPI_File_close(&c_info->fh);
+            IMB_open_file(c_info);
+
+            if (RUN_MODE->AGGREGATE)
+                IMB_iread_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time + 1);
+            else
+                IMB_iread_ij(c_info, size, shared, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, 1, time + 1);
+        }
+    }
+}
+
+/*************************************************************************/
+
+
+void IMB_read_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                   MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-IO benchmark kernel
+                          Driver for read benchmarks with individual file pointers
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    if (c_info->File_rank >= 0) {
+        if (RUN_MODE->AGGREGATE)
+            IMB_read_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
+        else
+            IMB_read_ij(c_info, size, indv_block, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
+
+        if (RUN_MODE->NONBLOCKING) {
+            MPI_File_close(&c_info->fh);
+            IMB_open_file(c_info);
+
+            if (RUN_MODE->AGGREGATE)
+                IMB_iread_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time + 1);
+            else
+                IMB_iread_ij(c_info, size, indv_block, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, 1, time + 1);
+        }
+    }
+}
+
+
+
+void IMB_read_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                   MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-IO benchmark kernel
+                          Driver for read benchmarks with explicit offsets
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    if (c_info->File_rank >= 0) {
+        if (RUN_MODE->AGGREGATE)
+            IMB_read_ij(c_info, size, explic, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
+        else
+            IMB_read_ij(c_info, size, explic, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
+
+        if (RUN_MODE->NONBLOCKING) {
+            MPI_File_close(&c_info->fh);
+            IMB_open_file(c_info);
+
+            if (RUN_MODE->AGGREGATE)
+                IMB_iread_ij(c_info, size, explic, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time + 1);
+            else
+                IMB_iread_ij(c_info, size, explic, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, 1, time + 1);
+        }
+    }
+}
+
+
+
+
+void IMB_read_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                 BTYPES type, int i_sample, int j_sample,
+                 int time_inner, double* time) {
+/*
+
+                          Calls the proper read functions, blocking case
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          portion size in bytes
+
+-pos                      (type POSITIONING)
+                          (see IMB_benchmark.h for definition of this enum type)
+                          descriptor for the file positioning
+
+-type                     (type BTYPES)
+                          (see IMB_benchmark.h for definition of this enum type)
+                          descriptor for the file access synchronism
+
+-i_sample,j_sample        (type int)
+                          aggregate case:
+                          i_sample=1, j_sample=n_sample (set by driving function)
+                          non aggregate case:
+                          i_sample=n_sample, j_sample=1 (set by driving function)
+                          Benchmark logistics then:
+                          for( i=0 .. i_sample-1 )
+                          for( j=0 .. j_sample-1 )
+                          input ...
+                          Synchronize (!)
+
+-time_inner               (type int)
+                          logical flag: should timing be issued for the innermost loop
+                          (and then averaged by outermost count), or for outermost loop
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    int i, j;
+    int Locsize, Totalsize;
+    MPI_Status stat;
+    MPI_Offset Offset;
+
+    int(*GEN_File_read)(MPI_File fh, void* buf, int count,
+        MPI_Datatype datatype, MPI_Status *status);
+    int(*GEN_File_read_shared)
+        (MPI_File fh, void* buf, int count,
+        MPI_Datatype datatype, MPI_Status *status);
+    int(*GEN_File_read_at)
+        (MPI_File fh, MPI_Offset offset, void* buf, int count,
+        MPI_Datatype datatype, MPI_Status *status);
+
+#ifdef CHECK
+    defect = 0.;
+#endif
+    ierr = 0;
+
+    *time = 0.;
+
+    if (c_info->File_rank >= 0) {
+        if (type == Collective) {
+            GEN_File_read = MPI_File_read_all;
+            GEN_File_read_shared = MPI_File_read_ordered;
+            GEN_File_read_at = MPI_File_read_at_all;
+#ifdef DEBUG
+            fprintf(unit, "Collective input\n");
+#endif
+        } else {
+            GEN_File_read = MPI_File_read;
+            GEN_File_read_shared = MPI_File_read_shared;
+            GEN_File_read_at = MPI_File_read_at;
+#ifdef DEBUG
+            fprintf(unit, "Non collective input\n");
+#endif
+        }
+
+
+        Locsize = c_info->split.Locsize;
+        Totalsize = c_info->split.Totalsize;
+
+        /*
+        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        BLOCKING COLLECTIVE/NON COLLECTIVE INPUT CASES COMBINED
+        (function pointer GEN_File_read_XXX
+        either standard or collective MPI_File_read_XXX
+        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        */
+
+        if (!time_inner)
+            *time = MPI_Wtime();
+
+        for (i = 0; i < i_sample; i++) {
+            if (time_inner) {
+
+                for (i = 0; i < N_BARR; i++)
+                    MPI_Barrier(c_info->File_comm);
+
+                *time = MPI_Wtime();
+            }
+            if (pos == indv_block) {
+                for (j = 0; j < j_sample; j++) {
+
+                    ierr = GEN_File_read(c_info->fh, c_info->r_buffer, Locsize, c_info->etype, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("Read standard ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+                    CHK_DIFF("Read_indv", c_info, c_info->r_buffer, 0,
+                             Locsize, Totalsize, asize,
+                             get, pos, i_sample*j_sample, j,
+                             -2, &defect);
+                }
+            } /*if( pos == indv_block )*/
+            else if (pos == explic) {
+                for (j = 0; j < j_sample; j++) {
+                    Offset = c_info->split.Offset + (MPI_Offset)(j*Totalsize);
+
+                    ierr = GEN_File_read_at
+                        (c_info->fh, Offset, c_info->r_buffer, Locsize, c_info->etype, &stat);
+
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("Read explicit ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+
+                    CHK_DIFF("Read_expl", c_info, c_info->r_buffer, 0,
+                             Locsize, Totalsize, asize,
+                             get, pos, i_sample*j_sample, j,
+                             -2, &defect);
+                } /*for( j=0; j<j_sample; j++ )*/
+            } else if (pos == shared) {
+                for (j = 0; j < j_sample; j++) {
+
+                    ierr = GEN_File_read_shared
+                        (c_info->fh, c_info->r_buffer, Locsize, c_info->etype, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("Read shared ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+
+#ifdef CHECK
+                    IMB_chk_diff("Read_shared", c_info, c_info->r_buffer, 0,
+                                 Locsize, Totalsize, asize,
+                                 get, pos, i_sample*j_sample, j,
+                                 -3, &defect);
+
+                    MPI_Barrier(c_info->File_comm);
+#endif
+                } /*for*/
+            }
+            if (time_inner)
+                *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+        } /*for ( i=0; i<i_sample; i++ )*/
+
+        if (!time_inner)
+            *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+    } /* end if File_rank >= 0 */
+}
+
+
+void IMB_iread_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                  BTYPES type, int i_sample, int j_sample,
+                  int time_inner, int do_ovrlp, double* time) {
+/*
+
+                          Calls the proper read functions, non blocking case
+
+                          (See IMB_read_ij for documentation of calling sequence)
+
+*/
+    int i, j;
+    int Locsize, Totalsize;
+    MPI_Status *STAT, stat;
+    MPI_Request *REQUESTS;
+    MPI_Offset Offset;
+
+#ifdef CHECK
+    defect = 0.;
+#endif
+    ierr = 0;
+
+    *time = 0.;
+
+    if (c_info->File_rank >= 0) {
+        Locsize = c_info->split.Locsize;
+        Totalsize = c_info->split.Totalsize;
+
+        if (type == Collective) {
+            /*
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            NON BLOCKING COLLECTIVE INPUT CASES
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            */
+
+            for (i = 0; i < N_BARR; i++)
+                MPI_Barrier(c_info->File_comm);
+
+            *time = MPI_Wtime();
+
+            if (pos == indv_block) {
+                for (j = 0; j < i_sample*j_sample; j++) {
+                    ierr = MPI_File_read_all_begin
+                        (c_info->fh, c_info->r_buffer, Locsize, c_info->etype);
+                    MPI_ERRHAND(ierr);
+
+                    if (do_ovrlp)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                    ierr = MPI_File_read_all_end
+                        (c_info->fh, c_info->r_buffer, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("IRead coll. ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+                    CHK_DIFF("Coll. IRead_indv", c_info, c_info->r_buffer, 0,
+                             Locsize, Totalsize, asize,
+                             get, pos, i_sample*j_sample, j,
+                             -2, &defect);
+                }
+            } else if (pos == explic) {
+                for (j = 0; j < i_sample*j_sample; j++) {
+
+                    Offset = c_info->split.Offset + (MPI_Offset)(j*Totalsize);
+
+                    ierr = MPI_File_read_at_all_begin
+                        (c_info->fh, Offset, c_info->r_buffer, Locsize, c_info->etype);
+                    MPI_ERRHAND(ierr);
+
+
+                    if (do_ovrlp)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                    ierr = MPI_File_read_at_all_end
+                        (c_info->fh, c_info->r_buffer, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("IRead expl coll. ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+                    CHK_DIFF("Coll. IRead_expl", c_info, c_info->r_buffer, 0,
+                             Locsize, Totalsize, asize,
+                             get, pos, i_sample*j_sample, j,
+                             -2, &defect);
+                }
+            } else if (pos == shared) {
+                for (j = 0; j < i_sample*j_sample; j++) {
+                    ierr = MPI_File_read_ordered_begin
+                        (c_info->fh, c_info->r_buffer, Locsize, c_info->etype);
+                    MPI_ERRHAND(ierr);
+
+                    if (do_ovrlp)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                    ierr = MPI_File_read_ordered_end
+                        (c_info->fh, c_info->r_buffer, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("IRead shared coll. ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+                    CHK_DIFF("Coll. IRead_shared", c_info, c_info->r_buffer, 0,
+                             Locsize, Totalsize, asize,
+                             get, pos, i_sample*j_sample, j,
+                             -3, &defect);
+                }
+            }
+            *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+        } else {  /* type non-Collective */
+            /*
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            NON BLOCKING NON COLLECTIVE INPUT CASES
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            */
+            REQUESTS = (MPI_Request*)IMB_v_alloc(j_sample*sizeof(MPI_Request), "IRead_ij");
+            STAT = (MPI_Status *)IMB_v_alloc(j_sample*sizeof(MPI_Status), "IRead_ij");
+
+            for (j = 0; j < j_sample; j++)
+                REQUESTS[j] = MPI_REQUEST_NULL;
+
+            for (i = 0; i < N_BARR; i++)
+                MPI_Barrier(c_info->File_comm);
+
+            if (!time_inner)
+                *time = MPI_Wtime();
+
+            for (i = 0; i < i_sample; i++) {
+                if (time_inner) {
+                    MPI_Barrier(c_info->File_comm);
+
+                    *time = MPI_Wtime();
+                }
+                if (pos == indv_block) {
+                    for (j = 0; j < j_sample; j++) {
+                        ierr = MPI_File_iread
+                            (c_info->fh, c_info->r_buffer, Locsize, c_info->etype, &REQUESTS[j]);
+                        MPI_ERRHAND(ierr);
+
+
+#if (defined CHECK || defined DEBUG)
+                        MPI_Wait(REQUESTS + j, STAT);
+                        DIAGNOSTICS("IRead standard ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+                        CHK_DIFF("IRead_indv", c_info, c_info->r_buffer, 0,
+                            Locsize, Totalsize, asize,
+                            get, pos, i_sample*j_sample, j,
+                            -2, &defect);
+#endif
+                    }
+                } else if (pos == explic) {
+                    for (j = 0; j < j_sample; j++) {
+                        Offset = c_info->split.Offset + (MPI_Offset)(j*Totalsize);
+
+                        ierr = MPI_File_iread_at
+                            (c_info->fh, Offset, c_info->r_buffer, Locsize, c_info->etype, &REQUESTS[j]);
+                        MPI_ERRHAND(ierr);
+
+#if (defined CHECK || defined DEBUG)
+                        MPI_Wait(REQUESTS + j, STAT);
+                        DIAGNOSTICS("IRead expl ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+                        CHK_DIFF("IRead_expl", c_info, c_info->r_buffer, 0,
+                            Locsize, Totalsize, asize,
+                            get, pos, i_sample*j_sample, j,
+                            -2, &defect);
+#endif
+                    }
+                } else if (pos == shared) {
+                    for (j = 0; j < j_sample; j++) {
+                        ierr = MPI_File_iread_shared
+                            (c_info->fh, c_info->r_buffer, Locsize, c_info->etype, &REQUESTS[j]);
+                        MPI_ERRHAND(ierr);
+
+#if (defined CHECK || defined DEBUG)
+                        MPI_Wait(REQUESTS + j, STAT);
+                        DIAGNOSTICS("IRead shared ", c_info, c_info->r_buffer, Locsize, Totalsize, j, pos);
+
+                        CHK_DIFF("IRead_shared", c_info, c_info->r_buffer, 0,
+                            Locsize, Totalsize, asize,
+                            get, pos, i_sample*j_sample, j,
+                            -3, &defect);
+
+                        MPI_Barrier(c_info->File_comm);
+#endif
+                    }
+                }
+                if (do_ovrlp)
+                    for (j = 0; j < j_sample; j++)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                if (j_sample == 1)
+                    MPI_Wait(REQUESTS, STAT);
+                else
+                    MPI_Waitall(j_sample, REQUESTS, STAT);
+
+                if (time_inner)
+                    *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+            }
+            if (!time_inner)
+                *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+
+            IMB_v_free((void**)&REQUESTS);
+            IMB_v_free((void**)&STAT);
+        } /* end if type */
+    } /* end if File_rank >= 0 */
+}
diff --git a/src/IMB_reduce.c b/src_c/IMB_reduce.c
similarity index 78%
rename from src/IMB_reduce.c
rename to src_c/IMB_reduce.c
index c18d7c41..30cab1f2 100644
--- a/src/IMB_reduce.c
+++ b/src_c/IMB_reduce.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -94,69 +93,64 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_reduce(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                MODES RUN_MODE, double* time)
+                MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      Benchmarks MPI_Reduce
-                      
+                          MPI-1 benchmark kernel
+                          Benchmarks MPI_Reduce
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)                      
-                      Basic message size in bytes
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  int    i;
-  Type_Size s_size;
-  int s_num;
-  double t1, t2;
-  
+    int    i;
+    Type_Size s_size;
+    int s_num;
+    double t1, t2;
+
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
 
-  ierr = 0;
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->red_data_type, &s_size);
+
+    if (s_size != 0)
+        s_num = size / s_size;
+
+    size *= c_info->size_scale;
 
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->red_data_type,&s_size);
-  if (s_size!=0) s_num=size/s_size;
-  
     *time = 0.;
-    if(c_info->rank != -1)
-    {
+    if (c_info->rank != -1) {
         int root = 0;
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0;i< ITERATIONS->n_sample;i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t1 = MPI_Wtime();
-            ierr = MPI_Reduce((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                              (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+            ierr = MPI_Reduce((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                              (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                               s_num,
-                              c_info->red_data_type,c_info->op_type,
+                              c_info->red_data_type, c_info->op_type,
                               root,
                               c_info->communicator);
             MPI_ERRHAND(ierr);
@@ -164,12 +158,11 @@ Output variables:
             *time += (t2 - t1);
 
 #ifdef CHECK
-            if( c_info->rank == root )
-            {
-                 CHK_DIFF("Reduce",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
-                          size, size, asize, 
-                          put, 0, ITERATIONS->n_sample, i,
-                          -1, &defect);
+            if (c_info->rank == root) {
+                CHK_DIFF("Reduce", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0,
+                         size, size, asize,
+                         put, 0, ITERATIONS->n_sample, i,
+                         -1, &defect);
             }
 #endif
             /*  CHANGE THE ROOT NODE */
@@ -188,8 +181,7 @@ void IMB_ireduce(struct comm_info* c_info,
                  int size,
                  struct iter_schedule* ITERATIONS,
                  MODES RUN_MODE,
-                 double* time)
-{
+                 double* time) {
     int         i = 0;
     Type_Size   s_size;
     int         s_num = 0;
@@ -200,17 +192,17 @@ void IMB_ireduce(struct comm_info* c_info,
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
-    MPI_Type_size(c_info->red_data_type,&s_size);
+    MPI_Type_size(c_info->red_data_type, &s_size);
     if (s_size != 0) {
         s_num = size / s_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         IMB_ireduce_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
@@ -219,7 +211,7 @@ void IMB_ireduce(struct comm_info* c_info,
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++) {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Ireduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
@@ -248,7 +240,7 @@ void IMB_ireduce(struct comm_info* c_info,
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
-        t_comp  /= ITERATIONS->n_sample;
+        t_comp /= ITERATIONS->n_sample;
     }
 
     time[0] = t_pure;
@@ -262,8 +254,7 @@ void IMB_ireduce_pure(struct comm_info* c_info,
                       int size,
                       struct iter_schedule* ITERATIONS,
                       MODES RUN_MODE,
-                      double* time)
-{
+                      double* time) {
     int         i = 0;
     Type_Size   s_size;
     int         s_num = 0;
@@ -271,22 +262,22 @@ void IMB_ireduce_pure(struct comm_info* c_info,
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
     /* GET SIZE OF DATA TYPE */
-    MPI_Type_size(c_info->red_data_type,&s_size);
+    MPI_Type_size(c_info->red_data_type, &s_size);
     if (s_size != 0) {
         s_num = size / s_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++) {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Ireduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
diff --git a/src/IMB_reduce_scatter.c b/src_c/IMB_reduce_scatter.c
similarity index 72%
rename from src/IMB_reduce_scatter.c
rename to src_c/IMB_reduce_scatter.c
index 066f72c7..6dc4f632 100644
--- a/src/IMB_reduce_scatter.c
+++ b/src_c/IMB_reduce_scatter.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -94,100 +93,98 @@ Hans-Joachim Plum, Intel GmbH
 /* ===================================================================== */
 
 void IMB_reduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                        MODES RUN_MODE, double* time)
+                        MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      Benchmarks MPI_Reduce_scatter
-                      
+                          MPI-1 benchmark kernel
+                          Benchmarks MPI_Reduce_scatter
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
 
--size                 (type int)                      
-                      Basic message size in bytes
+-size                     (type int)
+                          Basic message size in bytes
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
+Output variables:
 
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
-
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
     double t1, t2;
-    int    i;
-    size_t pos1,pos2;
+    int    i, s_buff_size = 0;
+    size_t pos1, pos2;
 #ifdef CHECK
     size_t pos;
     int    Locsize;
 #endif
 
     Type_Size s_size;
-  
+
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
-    /*  GET SIZE OF DATA TYPE */  
-    MPI_Type_size(c_info->red_data_type,&s_size);
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->red_data_type, &s_size);
 
-    for (i=0;i<c_info->num_procs ;i++)
-    {
-        if( size > 0)
-        {
+    for (i = 0; i < c_info->num_procs; i++) {
+        if (size > 0) {
             IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2);
-            c_info->reccnt[i] = (pos2-pos1+1)/s_size;
+            c_info->reccnt[i] = (pos2 - pos1 + 1) / s_size;
+            s_buff_size += c_info->reccnt[i] * s_size;
 #ifdef CHECK
-            if( i==c_info->rank ) {pos=pos1; Locsize= s_size*c_info->reccnt[i];}
+            if (i == c_info->rank) {
+                pos = pos1;
+                Locsize = s_size * c_info->reccnt[i];
+            }
 #endif
-        } 
-        else
-        {
+        } else {
             c_info->reccnt[i] = 0;
 #ifdef CHECK
-            if( i==c_info->rank ) {pos=0; Locsize= 0;}
+            if (i == c_info->rank) {
+                pos = 0;
+                Locsize = 0;
+            }
 #endif
         }
     }
-    
-    *time = 0.; 
 
-    if(c_info->rank!=-1)
-    {
-       IMB_do_n_barriers (c_info->communicator, N_BARR);
+    *time = 0.;
 
-       for(i=0;i< ITERATIONS->n_sample;i++)
-       {
+    size *= c_info->size_scale;
+    s_buff_size *= c_info->size_scale;
+
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t1 = MPI_Wtime();
-            ierr = MPI_Reduce_scatter ((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                                       (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                                       c_info->reccnt, c_info->red_data_type,c_info->op_type, c_info->communicator);
+            ierr = MPI_Reduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
+                                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                      c_info->reccnt, c_info->red_data_type, c_info->op_type, c_info->communicator);
             MPI_ERRHAND(ierr);
             t2 = MPI_Wtime();
             *time += (t2 - t1);
-            
-            CHK_DIFF("Reduce_scatter",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+
+            CHK_DIFF("Reduce_scatter", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
                      pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
-            
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
-       }
-       *time /= ITERATIONS->n_sample;
-    } 
+
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -198,8 +195,7 @@ void IMB_ireduce_scatter(struct comm_info* c_info,
                          int size,
                          struct iter_schedule* ITERATIONS,
                          MODES RUN_MODE,
-                         double* time)
-{
+                         double* time) {
     int         i = 0;
     Type_Size   s_size;
     MPI_Request request;
@@ -207,11 +203,10 @@ void IMB_ireduce_scatter(struct comm_info* c_info,
     double      t_pure = 0.,
                 t_comp = 0.,
                 t_ovrlp = 0.;
+    size_t pos1, pos2;
 
 #ifdef CHECK
-    size_t      pos     = 0,
-                pos1    = 0,
-                pos2    = 0;
+    size_t      pos = 0;
     int         Locsize = 0;
 
     defect = 0.;
@@ -221,27 +216,38 @@ void IMB_ireduce_scatter(struct comm_info* c_info,
     /* GET SIZE OF DATA TYPE */
     MPI_Type_size(c_info->red_data_type, &s_size);
 
-#ifdef CHECK
-    if(size > 0) {
+    if (size > 0) {
         for (i = 0; i < c_info->num_procs; i++) {
             IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2);
+            c_info->reccnt[i] = (pos2 - pos1 + 1) / s_size;
+#ifdef CHECK
             if (i == c_info->rank) {
                 pos = pos1;
                 Locsize = s_size * c_info->reccnt[i];
             }
+#endif // CHECK
         }
-    }
+    } else {
+        for (i = 0; i < c_info->num_procs; i++) {
+            c_info->reccnt[i] = 0;
+#ifdef CHECK
+            if (i == c_info->rank) {
+                pos = 0;
+                Locsize = 0;
+            }
 #endif // CHECK
+        }
+    }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         IMB_ireduce_scatter_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
         /* INITIALIZATION CALL */
         IMB_cpu_exploit(t_pure, 1);
 
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++) {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                        (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
@@ -251,21 +257,21 @@ void IMB_ireduce_scatter(struct comm_info* c_info,
                                        c_info->communicator,
                                        &request);
             MPI_ERRHAND(ierr);
-            
+
             t_comp -= MPI_Wtime();
             IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
-            
+
             MPI_Wait(&request, &status);
             t_ovrlp += MPI_Wtime();
-            
+
             CHK_DIFF("Ireduce_scatter", c_info,
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
-        t_comp  /= ITERATIONS->n_sample;
+        t_comp /= ITERATIONS->n_sample;
     }
 
     time[0] = t_pure;
@@ -279,8 +285,7 @@ void IMB_ireduce_scatter_pure(struct comm_info* c_info,
                               int size,
                               struct iter_schedule* ITERATIONS,
                               MODES RUN_MODE,
-                              double* time)
-{
+                              double* time) {
     int         i = 0;
     Type_Size   s_size;
     size_t      pos1 = 0,
@@ -301,7 +306,7 @@ void IMB_ireduce_scatter_pure(struct comm_info* c_info,
     MPI_Type_size(c_info->red_data_type, &s_size);
 
     for (i = 0; i < c_info->num_procs; i++) {
-        if( size > 0) {
+        if (size > 0) {
             IMB_get_rank_portion(i, c_info->num_procs, size, s_size, &pos1, &pos2);
             c_info->reccnt[i] = (pos2 - pos1 + 1) / s_size;
 #ifdef CHECK
@@ -314,18 +319,17 @@ void IMB_ireduce_scatter_pure(struct comm_info* c_info,
             c_info->reccnt[i] = 0;
 #ifdef CHECK
             if (i == c_info->rank) {
-                pos=0;
+                pos = 0;
                 Locsize = 0;
             }
 #endif
         }
     }
 
-    if(c_info->rank != -1) {
-        IMB_do_n_barriers (c_info->communicator, N_BARR);
+    if (c_info->rank != -1) {
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        t_pure = MPI_Wtime();
-        for(i = 0; i < ITERATIONS->n_sample; i++) {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Ireduce_scatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                        (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
@@ -341,12 +345,11 @@ void IMB_ireduce_scatter_pure(struct comm_info* c_info,
             CHK_DIFF("Ireduce_scatter_pure", c_info,
                      (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
                      pos, Locsize, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect);
-            
-            IMB_do_n_barriers (c_info->communicator, c_info->sync);
+
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_pure /= ITERATIONS->n_sample;
     }
-
     time[0] = t_pure;
 }
 
diff --git a/src_c/IMB_rma_atomic.c b/src_c/IMB_rma_atomic.c
new file mode 100644
index 00000000..4a130336
--- /dev/null
+++ b/src_c/IMB_rma_atomic.c
@@ -0,0 +1,321 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+
+ File: IMB_rma_atomic.c 
+
+ Implemented functions: 
+
+ IMB_rma_accumulate;
+ IMB_rma_get_accumulate;
+ IMB_rma_fetch_and_op;
+ IMB_rma_compare_and_swap;
+ 
+ ***************************************************************************/
+
+/*-----------------------------------------------------------*/
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+#include "IMB_prototypes.h"
+
+void IMB_rma_accumulate(struct comm_info* c_info, int size,
+                        struct iter_schedule* iterations,
+                        MODES run_mode, double* time) {
+    double res_time = -1.;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    /* IMB 3.1 << */
+    int r_off;
+    int i;
+    int root = c_info->pair1;
+    ierr = 0;
+
+    if (c_info->rank < 0) {
+        *time = res_time;
+        return;
+    }
+
+    MPI_Type_size(c_info->red_data_type, &s_size);
+    s_num = size / s_size;
+    r_size = s_size;
+    r_num = s_num;
+    r_off = iterations->r_offs / r_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == c_info->pair0) {
+        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
+        if (run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Accumulate((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                                      s_num, c_info->red_data_type, root,
+                                      i%iterations->r_cache_iter*r_off, r_num,
+                                      c_info->red_data_type, c_info->op_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            ierr = MPI_Win_flush(root, c_info->WIN);
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Accumulate((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                                      s_num, c_info->red_data_type, root,
+                                      i%iterations->r_cache_iter*r_off, r_num,
+                                      c_info->red_data_type, c_info->op_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+
+                ierr = MPI_Win_flush(root, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        }
+        MPI_Win_unlock(root, c_info->WIN);
+    }
+    MPI_Barrier(c_info->communicator);
+
+    *time = res_time;
+    return;
+}
+
+
+void IMB_rma_get_accumulate(struct comm_info* c_info, int size,
+                            struct iter_schedule* iterations,
+                            MODES run_mode, double* time) {
+    double res_time = -1.;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int r_off;
+    int i;
+    int root = c_info->pair1;
+    ierr = 0;
+
+    if (c_info->rank < 0) {
+        *time = res_time;
+        return;
+    }
+
+    MPI_Type_size(c_info->red_data_type, &s_size);
+    s_num = size / s_size;
+    r_size = s_size;
+    r_num = s_num;
+    r_off = iterations->r_offs / r_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == c_info->pair0) {
+        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
+        if (run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Get_accumulate((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                                          s_num, c_info->red_data_type,
+                                          (char*)c_info->r_buffer + i%iterations->r_cache_iter*iterations->r_offs,
+                                          r_num, c_info->red_data_type,
+                                          root, i%iterations->r_cache_iter*r_off, r_num,
+                                          c_info->red_data_type, c_info->op_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            ierr = MPI_Win_flush(root, c_info->WIN);
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Get_accumulate((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                                          s_num, c_info->red_data_type,
+                                          (char*)c_info->r_buffer + i%iterations->r_cache_iter*iterations->r_offs,
+                                          r_num, c_info->red_data_type,
+                                          root, i%iterations->r_cache_iter*r_off, r_num,
+                                          c_info->red_data_type, c_info->op_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+
+                ierr = MPI_Win_flush(root, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        }
+        MPI_Win_unlock(root, c_info->WIN);
+    }
+    MPI_Barrier(c_info->communicator);
+
+    *time = res_time;
+    return;
+}
+
+void IMB_rma_fetch_and_op(struct comm_info* c_info, int size,
+                          struct iter_schedule* iterations,
+                          MODES run_mode, double* time) {
+    double res_time = -1.;
+    Type_Size r_size;
+    int r_off;
+    int i;
+    int root = c_info->pair1;
+    ierr = 0;
+
+    if (c_info->rank < 0) {
+        *time = res_time;
+        return;
+    }
+
+    MPI_Type_size(c_info->red_data_type, &r_size);
+    r_off = iterations->r_offs / r_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == c_info->pair0) {
+        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
+        if (run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Fetch_and_op((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                                        (char*)c_info->r_buffer + i%iterations->r_cache_iter*iterations->r_offs,
+                                        c_info->red_data_type, root,
+                                        i%iterations->r_cache_iter*r_off, c_info->op_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            ierr = MPI_Win_flush(root, c_info->WIN);
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Fetch_and_op((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                                        (char*)c_info->r_buffer + i%iterations->r_cache_iter*iterations->r_offs,
+                                        c_info->red_data_type, root,
+                                        i%iterations->r_cache_iter*r_off, c_info->op_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+
+                ierr = MPI_Win_flush(root, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        }
+        MPI_Win_unlock(root, c_info->WIN);
+    }
+    MPI_Barrier(c_info->communicator);
+
+    *time = res_time;
+    return;
+}
+
+void IMB_rma_compare_and_swap(struct comm_info* c_info, int size,
+                              struct iter_schedule* iterations,
+                              MODES run_mode, double* time) {
+    double res_time = -1.;
+    int root = c_info->pair1;
+    int s_size;
+    int i;
+    void *comp_b, *orig_b, *res_b;
+    MPI_Datatype data_type = MPI_INT;
+    ierr = 0;
+
+    if (c_info->rank < 0) {
+        *time = res_time;
+        return;
+    }
+
+    MPI_Type_size(data_type, &s_size);
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+
+    if (c_info->rank == c_info->pair0) {
+        /* use r_buffer for all buffers required by compare_and_swap, because
+         * on all ranks r_buffer is zero-initialized in IMB_set_buf function */
+        orig_b = (char*)c_info->r_buffer + s_size * 2;
+        comp_b = (char*)c_info->r_buffer + s_size;
+        res_b = c_info->r_buffer;
+
+        MPI_Win_lock(MPI_LOCK_SHARED, root, 0, c_info->WIN);
+        if (run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Compare_and_swap((char*)orig_b + i%iterations->r_cache_iter*iterations->r_offs,
+                                            (char*)comp_b + i%iterations->r_cache_iter*iterations->r_offs,
+                                            (char*)res_b + i%iterations->r_cache_iter*iterations->r_offs,
+                                            data_type, root, i%iterations->r_cache_iter*iterations->r_offs,
+                                            c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            ierr = MPI_Win_flush(root, c_info->WIN);
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
+            res_time = MPI_Wtime();
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Compare_and_swap((char*)orig_b + i%iterations->s_cache_iter*iterations->s_offs,
+                                            (char*)comp_b + i%iterations->s_cache_iter*iterations->s_offs,
+                                            (char*)res_b + i%iterations->r_cache_iter*iterations->r_offs,
+                                            data_type, root, i%iterations->r_cache_iter*iterations->r_offs,
+                                            c_info->WIN);
+                MPI_ERRHAND(ierr);
+
+                ierr = MPI_Win_flush(root, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        }
+        MPI_Win_unlock(root, c_info->WIN);
+    }
+    MPI_Barrier(c_info->communicator);
+
+    *time = res_time;
+    return;
+}
+
diff --git a/src/IMB_rma_get.c b/src_c/IMB_rma_get.c
similarity index 50%
rename from src/IMB_rma_get.c
rename to src_c/IMB_rma_get.c
index 3010b272..b39f5590 100644
--- a/src/IMB_rma_get.c
+++ b/src_c/IMB_rma_get.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -76,10 +75,9 @@ For more documentation than found here, see
 
 /* Unidirectional and bidirectional get: communication is done 
  * between two processes only. */ 
-void IMB_rma_single_get (struct comm_info* c_info, int size, 
-                         struct iter_schedule* iterations, 
-                         MODES run_mode, double* time)
-{
+void IMB_rma_single_get(struct comm_info* c_info, int size,
+                        struct iter_schedule* iterations,
+                        MODES run_mode, double* time) {
     double res_time = -1.;
     int target = -1;
     int receiver = 0;
@@ -92,89 +90,73 @@ void IMB_rma_single_get (struct comm_info* c_info, int size,
 #endif
     ierr = 0;
 
-    if (c_info->rank == c_info->pair0)
-    {
+    if (c_info->rank == c_info->pair0) {
         target = c_info->pair1;
         receiver = 1;
-    } 
-    else if (c_info->rank == c_info->pair1)
-    {
+    } else if (c_info->rank == c_info->pair1) {
         target = c_info->pair0;
-        if (run_mode->BIDIR) 
-        {
-            /* pair1 acts as origin  
+        if (run_mode->BIDIR) {
+            /* pair1 acts as origin
              * in bidirectional mode only */
             receiver = 1;
-        }    
-    }    
-    else if (c_info->rank < 0)
-    {
+        }
+    } else if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
+    }
+
     /* Get size: recv and send sizes are equial, so just use one set of vars*/
     MPI_Type_size(c_info->s_data_type, &r_size);
-    r_num = size/r_size;
+    r_num = size / r_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
 
-    for(i = 0; i < N_BARR; i++) MPI_Barrier(c_info->communicator);
-     
-    /* in case of MPI_Get sender is target */     
-    if (receiver)
-    {    
+    /* in case of MPI_Get sender is target */
+    if (receiver) {
         MPI_Win_lock(MPI_LOCK_SHARED, target, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
+        if (run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                                r_num, c_info->r_data_type, target, 
-                                i%iterations->s_cache_iter*iterations->s_offs,
-                                r_num, c_info->s_data_type, c_info->WIN);
-                
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                               r_num, c_info->r_data_type, target,
+                               i%iterations->s_cache_iter*iterations->s_offs,
+                               r_num, c_info->s_data_type, c_info->WIN);
+
                 MPI_ERRHAND(ierr);
             }
             ierr = MPI_Win_flush(target, c_info->WIN);
             MPI_ERRHAND(ierr);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                                r_num, c_info->r_data_type, target, 
-                                i%iterations->s_cache_iter*iterations->s_offs,
-                                r_num, c_info->s_data_type, c_info->WIN);
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                               r_num, c_info->r_data_type, target,
+                               i%iterations->s_cache_iter*iterations->s_offs,
+                               r_num, c_info->s_data_type, c_info->WIN);
 
                 MPI_ERRHAND(ierr);
                 ierr = MPI_Win_flush(target, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
         }
-
         MPI_Win_unlock(target, c_info->WIN);
-    }     
-   
+    }
+
     /* Synchronize target and origin processes */
     MPI_Barrier(c_info->communicator);
 
 #ifdef CHECK
-    if (receiver || run_mode->BIDIR)
-    {
-         for(i = 0; i < ITER_MIN(iterations); i++)
-         {
-             CHK_DIFF("MPI_Get",c_info, (void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                      0, size, size, asize, get, 0, iterations->n_sample, i, target , &defect);
-             
-         }
-    }     
+    if (receiver || run_mode->BIDIR) {
+        for (i = 0; i < ITER_MIN(iterations); i++) {
+            CHK_DIFF("MPI_Get", c_info, (void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                     0, size, size, asize, get, 0, iterations->n_sample, i, target, &defect);
+        }
+    }
 #endif     
-    
-    *time = res_time; 
+    *time = res_time;
     return;
 }
 
@@ -183,10 +165,9 @@ void IMB_rma_single_get (struct comm_info* c_info, int size,
  * run_mode Collective corresponds to "All_get_all",
  * run_mode MultPassiveTransfer corresponds to "One_get_all"
  * */
-void IMB_rma_get_all (struct comm_info* c_info, int size, 
-                      struct iter_schedule* iterations, 
-                      MODES run_mode, double* time)
-{
+void IMB_rma_get_all(struct comm_info* c_info, int size,
+                     struct iter_schedule* iterations,
+                     MODES run_mode, double* time) {
     double res_time = -1.;
     int target = 0;
     int peer = 0;
@@ -197,67 +178,59 @@ void IMB_rma_get_all (struct comm_info* c_info, int size,
     char *recv = (char *)c_info->r_buffer;
     ierr = 0;
 
-    if (c_info->rank < 0)
-    {
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
+    }
+
     if (c_info->rank == 0 || run_mode->type == Collective)
-    {
         receiver = 1;
-    }    
 
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    r_num=size/r_size;
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    r_num = size / r_size;
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (receiver)
-    {    
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (receiver) {
         MPI_Win_lock_all(0, c_info->WIN);
-        
+
         res_time = MPI_Wtime();
-        for (i = 0; i < iterations->n_sample; i++)
-        {
-            for (peer = 0; peer < c_info->num_procs; peer++)
-            {
+        for (i = 0; i < iterations->n_sample; i++) {
+            for (peer = 0; peer < c_info->num_procs; peer++) {
                 /* choose different target for each process to avoid congestion */
-                target = (peer + c_info->rank)%c_info->num_procs;
+                target = (peer + c_info->rank) % c_info->num_procs;
                 if (target == c_info->rank)
-                {
                     continue; /* do not get from itself*/
-                }    
-                
-                ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                                r_num, c_info->r_data_type, target, 
-                                i%iterations->s_cache_iter*iterations->s_offs,
-                                r_num, c_info->s_data_type, c_info->WIN);
+
+                ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                                       r_num, c_info->r_data_type, target,
+                                       i%iterations->s_cache_iter*iterations->s_offs,
+                                       r_num, c_info->s_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-        }    
+        }
         ierr = MPI_Win_flush_all(c_info->WIN);
         MPI_ERRHAND(ierr);
-        res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-    
+        res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+
         MPI_Win_unlock_all(c_info->WIN);
-    }     
-   
+    }
+
     /* Synchronize origin and target processes */
     MPI_Barrier(c_info->communicator);
-    
-    *time = res_time; 
+
+    *time = res_time;
     return;
 }
 
 
 /* Implements "Get_local" benchmark. One process gets some data
- * from the other and make sure of completion by MPI_Win_flush_local call 
+ * from the other and make sure of completion by MPI_Win_flush_local call
  * */
-void IMB_rma_get_local (struct comm_info* c_info, int size, 
-                        struct iter_schedule* iterations, 
-                        MODES run_mode, double* time)
-{
+void IMB_rma_get_local(struct comm_info* c_info, int size,
+                       struct iter_schedule* iterations,
+                       MODES run_mode, double* time) {
     double res_time = -1.;
     Type_Size r_size;
     int r_num;
@@ -265,84 +238,70 @@ void IMB_rma_get_local (struct comm_info* c_info, int size,
     char *recv = (char *)c_info->r_buffer;
     ierr = 0;
 
-    if (c_info->rank < 0)
-    {
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    r_num=size/r_size;
+    }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (c_info->rank == c_info->pair0)
-    {    
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    r_num = size / r_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == c_info->pair0) {
         MPI_Win_lock(MPI_LOCK_SHARED, c_info->pair1, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
+        if (run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                                r_num, c_info->r_data_type, c_info->pair1, 
-                                i%iterations->s_cache_iter*iterations->s_offs,
-                                r_num, c_info->s_data_type, c_info->WIN);
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                               r_num, c_info->r_data_type, c_info->pair1,
+                               i%iterations->s_cache_iter*iterations->s_offs,
+                               r_num, c_info->s_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
             ierr = MPI_Win_flush_local(c_info->pair1, c_info->WIN);
             MPI_ERRHAND(ierr);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if (!run_mode->AGGREGATE)    
-        {
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-
-                ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                                r_num, c_info->r_data_type, c_info->pair1, 
-                                i%iterations->s_cache_iter*iterations->s_offs,
-                                r_num, c_info->s_data_type, c_info->WIN);
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                               r_num, c_info->r_data_type, c_info->pair1,
+                               i%iterations->s_cache_iter*iterations->s_offs,
+                               r_num, c_info->s_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
                 ierr = MPI_Win_flush_local(c_info->pair1, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
         }
-
         MPI_Win_unlock(c_info->pair1, c_info->WIN);
-    }     
-   
+    }
     /* Synchronize target and origin processes */
     MPI_Barrier(c_info->communicator);
-
 #ifdef CHECK
-    if (c_info->rank == c_info->pair0)
-    {
-         /* Local completion of MPI_Get guarantees that recv buffer already contains target data,
-          * so let's check the result */
-         for(i = 0; i < ITER_MIN(iterations); i++)
-         {
-             CHK_DIFF("MPI_Get",c_info, (void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                      0, size, size, asize, get, 0, iterations->n_sample, i, c_info->pair1, &defect);
-             
-         }
-    }     
+    if (c_info->rank == c_info->pair0) {
+        /* Local completion of MPI_Get guarantees that recv buffer already contains target data,
+         * so let's check the result */
+        for (i = 0; i < ITER_MIN(iterations); i++) {
+            CHK_DIFF("MPI_Get", c_info, (void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                     0, size, size, asize, get, 0, iterations->n_sample, i, c_info->pair1, &defect);
+        }
+    }
 #endif     
-    
-    *time = res_time; 
+
+    *time = res_time;
     return;
 }
 
 /* Implements "get_all_local" benchmark. One process gets some data
- * from all other processes and make sure of completion by 
- * MPI_Win_flush_local_all call 
+ * from all other processes and make sure of completion by
+ * MPI_Win_flush_local_all call
  * */
-void IMB_rma_get_all_local (struct comm_info* c_info, int size, 
-                            struct iter_schedule* iterations, 
-                            MODES run_mode, double* time)
-{
+void IMB_rma_get_all_local(struct comm_info* c_info, int size,
+                           struct iter_schedule* iterations,
+                           MODES run_mode, double* time) {
     double res_time = -1.;
     int target = 0;
     int peer = 0;
@@ -352,63 +311,56 @@ void IMB_rma_get_all_local (struct comm_info* c_info, int size,
     char *recv = (char *)c_info->r_buffer;
     ierr = 0;
 
-    if (c_info->rank < 0)
-    {
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
+    }
 
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    r_num=size/r_size;
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    r_num = size / r_size;
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (c_info->rank == 0)
-    {    
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == 0) {
         MPI_Win_lock_all(0, c_info->WIN);
-        
+
         res_time = MPI_Wtime();
-        for (i = 0; i < iterations->n_sample; i++)
-        {
-            for (peer = 0; peer < c_info->num_procs; peer++)
-            {
+        for (i = 0; i < iterations->n_sample; i++) {
+            for (peer = 0; peer < c_info->num_procs; peer++) {
                 /* choose different target for each process to avoid congestion */
-                target = (peer + c_info->rank)%c_info->num_procs;
+                target = (peer + c_info->rank) % c_info->num_procs;
                 if (target == c_info->rank)
-                {
                     continue; /* do not get from itself*/
-                }    
-                
-                ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                                r_num, c_info->r_data_type, target, 
-                                i%iterations->s_cache_iter*iterations->s_offs,
-                                r_num, c_info->s_data_type, c_info->WIN);
+
+                ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                               r_num, c_info->r_data_type, target,
+                               i%iterations->s_cache_iter*iterations->s_offs,
+                               r_num, c_info->s_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
-            } 
-        }    
+            }
+        }
         ierr = MPI_Win_flush_local_all(c_info->WIN);
         MPI_ERRHAND(ierr);
-        res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-    
+        res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+
         MPI_Win_unlock_all(c_info->WIN);
-    }     
-   
+    }
     /* Synchronize origin and target processes */
     MPI_Barrier(c_info->communicator);
-    
-    *time = res_time; 
+
+    *time = res_time;
     return;
 }
 
 /* Implements "Exchange_get" benchmark. Each process gets some data
  * from two neighbor processes
  * */
-void IMB_rma_exchange_get (struct comm_info* c_info, int size, 
-                           struct iter_schedule* iterations, 
-                           MODES run_mode, double* time)
-{
+void IMB_rma_exchange_get(struct comm_info* c_info, int size,
+                          struct iter_schedule* iterations,
+                          MODES run_mode, double* time) {
     double res_time = -1.;
-    int left  = -1;
+    int left = -1;
     int right = -1;
     Type_Size r_size;
     int r_num;
@@ -418,87 +370,75 @@ void IMB_rma_exchange_get (struct comm_info* c_info, int size,
     defect = 0;
 #endif
     ierr = 0;
-    
-    if (c_info->rank < 0)
-    {
+
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    r_num=size/r_size;
-   
-    left  = (c_info->rank == 0) ? c_info->num_procs - 1 : c_info->rank - 1;
-    right = (c_info->rank + 1)%c_info->num_procs;     
+    }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (left != right)
-    {    
-        MPI_Win_lock(MPI_LOCK_SHARED, left,  0, c_info->WIN);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    r_num = size / r_size;
+
+    left = (c_info->rank == 0) ? c_info->num_procs - 1 : c_info->rank - 1;
+    right = (c_info->rank + 1) % c_info->num_procs;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (left != right) {
+        MPI_Win_lock(MPI_LOCK_SHARED, left, 0, c_info->WIN);
         MPI_Win_lock(MPI_LOCK_SHARED, right, 0, c_info->WIN);
-    }
-    else
-    {
+    } else {
         /* Just two processes in the chain. Both messages will come
          * to the one peer */
-        MPI_Win_lock(MPI_LOCK_SHARED, left,  0, c_info->WIN);
-    }    
+        MPI_Win_lock(MPI_LOCK_SHARED, left, 0, c_info->WIN);
+    }
 
     res_time = MPI_Wtime();
-    for (i = 0; i < iterations->n_sample; i++)
-    {
-        ierr = MPI_Get((void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                               r_num, c_info->r_data_type, right, 
-                               i%iterations->s_cache_iter*iterations->s_offs,
-                               r_num, c_info->s_data_type, c_info->WIN);
+    for (i = 0; i < iterations->n_sample; i++) {
+        ierr = MPI_Get((void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                       r_num, c_info->r_data_type, right,
+                       i%iterations->s_cache_iter*iterations->s_offs,
+                       r_num, c_info->s_data_type, c_info->WIN);
         MPI_ERRHAND(ierr);
-        
-        ierr = MPI_Get((void*)(recv + size + i%iterations->r_cache_iter*iterations->r_offs), 
-                               r_num, c_info->r_data_type, left, 
-                               size + i%iterations->s_cache_iter*iterations->s_offs,
-                               r_num, c_info->s_data_type, c_info->WIN);
+
+        ierr = MPI_Get((void*)(recv + size + i%iterations->r_cache_iter*iterations->r_offs),
+                       r_num, c_info->r_data_type, left,
+                       size + i%iterations->s_cache_iter*iterations->s_offs,
+                       r_num, c_info->s_data_type, c_info->WIN);
         MPI_ERRHAND(ierr);
-       
-        if (left != right)
-        {    
+
+        if (left != right) {
             ierr = MPI_Win_flush(left, c_info->WIN);
             MPI_ERRHAND(ierr);
-        
+
             ierr = MPI_Win_flush(right, c_info->WIN);
             MPI_ERRHAND(ierr);
-        }
-        else
-        {
+        } else {
             ierr = MPI_Win_flush(left, c_info->WIN);
             MPI_ERRHAND(ierr);
-        }    
+        }
     }
-    res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-    if (left != right)
-    {    
+    res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+    if (left != right) {
         MPI_Win_unlock(left, c_info->WIN);
         MPI_Win_unlock(right, c_info->WIN);
-    }
-    else
-    {
+    } else
         MPI_Win_unlock(left, c_info->WIN);
-    }    
-      
+
     /* Synchronize target and origin processes */
     MPI_Barrier(c_info->communicator);
 
 #ifdef CHECK
-    for(i = 0; i < ITER_MIN(iterations); i++)
-    {
-        CHK_DIFF("MPI_Get",c_info, (void*)(recv+size+i%iterations->r_cache_iter*iterations->r_offs), 
-                  size, size, size, asize, get, 0, iterations->n_sample, i, left , &defect);
-        
-        CHK_DIFF("MPI_Get",c_info, (void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                  0, size, size, asize, get, 0, iterations->n_sample, i, right , &defect);
+    for (i = 0; i < ITER_MIN(iterations); i++) {
+        CHK_DIFF("MPI_Get", c_info, (void*)(recv + size + i%iterations->r_cache_iter*iterations->r_offs),
+                 size, size, size, asize, get, 0, iterations->n_sample, i, left, &defect);
+
+        CHK_DIFF("MPI_Get", c_info, (void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                 0, size, size, asize, get, 0, iterations->n_sample, i, right, &defect);
     }
 #endif     
 
-    *time = res_time; 
+    *time = res_time;
     return;
 }
diff --git a/src/IMB_rma_put.c b/src_c/IMB_rma_put.c
similarity index 56%
rename from src/IMB_rma_put.c
rename to src_c/IMB_rma_put.c
index ac794347..bb5bb1ad 100644
--- a/src/IMB_rma_put.c
+++ b/src_c/IMB_rma_put.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -75,12 +74,11 @@ For more documentation than found here, see
 #include "IMB_prototypes.h"
 
 
-/* Unidirectional and bidirectional put: communication is done 
- * between two processes only. */ 
-void IMB_rma_single_put (struct comm_info* c_info, int size, 
-                         struct iter_schedule* iterations, 
-                         MODES run_mode, double* time)
-{
+/* Unidirectional and bidirectional put: communication is done
+ * between two processes only. */
+void IMB_rma_single_put(struct comm_info* c_info, int size,
+                        struct iter_schedule* iterations,
+                        MODES run_mode, double* time) {
     double res_time = -1.;
     int target = -1;
     int sender = 0;
@@ -93,85 +91,67 @@ void IMB_rma_single_put (struct comm_info* c_info, int size,
 #endif
     ierr = 0;
 
-    if (c_info->rank == c_info->pair0)
-    {
+    if (c_info->rank == c_info->pair0) {
         target = c_info->pair1;
         sender = 1;
-    } 
-    else if (c_info->rank == c_info->pair1)
-    {
+    } else if (c_info->rank == c_info->pair1) {
         target = c_info->pair0;
-        if (run_mode->BIDIR) 
-        {
+        if (run_mode->BIDIR) {
             /* pair1 acts as origin in bidirectional mode only */
             sender = 1;
-        }    
-    }    
-    else if (c_info->rank < 0)
-    {
+        }
+    } else if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    s_num=size/s_size;
+    }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (sender)
-    {    
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    s_num = size / s_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (sender) {
         MPI_Win_lock(MPI_LOCK_SHARED, target, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
+        if (run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                               s_num, c_info->s_data_type, target, 
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                               s_num, c_info->s_data_type, target,
                                i%iterations->r_cache_iter*iterations->r_offs,
                                s_num, c_info->r_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
             ierr = MPI_Win_flush(target, c_info->WIN);
             MPI_ERRHAND(ierr);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-
-                ierr = MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                               s_num, c_info->s_data_type, target, 
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                               s_num, c_info->s_data_type, target,
                                i%iterations->r_cache_iter*iterations->r_offs,
                                s_num, c_info->r_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
                 ierr = MPI_Win_flush(target, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
         }
-
         MPI_Win_unlock(target, c_info->WIN);
-    }     
-   
+    }
     /* Synchronize target and origin processes */
     MPI_Barrier(c_info->communicator);
 
 #ifdef CHECK
-    if ( !sender || run_mode->BIDIR )
-    {
-         for(i = 0; i < ITER_MIN(iterations); i++)
-         {
-             CHK_DIFF("MPI_Put",c_info, (void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                      0, size, size, asize, put, 0, iterations->n_sample, i, target , &defect);
-             
-         }
-    }     
+    if (!sender || run_mode->BIDIR) {
+        for (i = 0; i < ITER_MIN(iterations); i++) {
+            CHK_DIFF("MPI_Put", c_info, (void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                     0, size, size, asize, put, 0, iterations->n_sample, i, target, &defect);
+        }
+    }
 #endif     
-    
-    *time = res_time; 
+    *time = res_time;
     return;
 }
 
@@ -179,10 +159,9 @@ void IMB_rma_single_put (struct comm_info* c_info, int size,
  * run_mode Collective corresponds to "All_put_all",
  * run_mode MultPassiveTransfer corresponds to "One_put_all"
  * */
-void IMB_rma_put_all (struct comm_info* c_info, int size, 
-                      struct iter_schedule* iterations, 
-                      MODES run_mode, double* time)
-{
+void IMB_rma_put_all(struct comm_info* c_info, int size,
+                     struct iter_schedule* iterations,
+                     MODES run_mode, double* time) {
     double res_time = -1.;
     int target = 0;
     int peer = 0;
@@ -192,138 +171,116 @@ void IMB_rma_put_all (struct comm_info* c_info, int size,
     int i;
     ierr = 0;
 
-    if (c_info->rank < 0)
-    {
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
+    }
+
     if (c_info->rank == 0 || run_mode->type == Collective)
-    {
         sender = 1;
-    }    
 
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    s_num=size/s_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    s_num = size / s_size;
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (sender)
-    {    
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (sender) {
         MPI_Win_lock_all(0, c_info->WIN);
-        
+
         res_time = MPI_Wtime();
-        for (i = 0; i < iterations->n_sample; i++)
-        {
-            for (peer = 0; peer < c_info->num_procs; peer++)
-            {
-                
+        for (i = 0; i < iterations->n_sample; i++) {
+            for (peer = 0; peer < c_info->num_procs; peer++) {
                 /* choose different target for each process to avoid congestion */
-                target = (peer + c_info->rank)%c_info->num_procs;
+                target = (peer + c_info->rank) % c_info->num_procs;
                 if (target == c_info->rank)
-                {
                     continue; /* do not put to itself*/
-                }    
-                ierr = MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                               s_num, c_info->s_data_type, target, 
+                ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                               s_num, c_info->s_data_type, target,
                                i%iterations->r_cache_iter*iterations->r_offs,
                                s_num, c_info->r_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-        }    
+        }
         ierr = MPI_Win_flush_all(c_info->WIN);
         MPI_ERRHAND(ierr);
-        res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-    
+        res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+
         MPI_Win_unlock_all(c_info->WIN);
-    }     
-   
+    }
     /* Synchronize origin and target processes */
     MPI_Barrier(c_info->communicator);
 
-    
-    *time = res_time; 
+    *time = res_time;
     return;
 }
 
-
 /* Implements "Put_local" benchmark. One process puts some data
- * to the other and make sure of completion by MPI_Win_flush_local call 
+ * to the other and make sure of completion by MPI_Win_flush_local call
  * */
-void IMB_rma_put_local (struct comm_info* c_info, int size, 
-                        struct iter_schedule* iterations, 
-                        MODES run_mode, double* time)
-{
+void IMB_rma_put_local(struct comm_info* c_info, int size,
+                       struct iter_schedule* iterations,
+                       MODES run_mode, double* time) {
     double res_time = -1.;
     Type_Size s_size;
     int s_num;
     int i;
     ierr = 0;
 
-    if (c_info->rank < 0)
-    {
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    s_num=size/s_size;
+    }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (c_info->rank == c_info->pair0)
-    {    
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    s_num = size / s_size;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == c_info->pair0) {
         MPI_Win_lock(MPI_LOCK_SHARED, c_info->pair1, 0, c_info->WIN);
-        if (run_mode->AGGREGATE)
-        {
+        if (run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-                ierr = MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                               s_num, c_info->s_data_type, c_info->pair1, 
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                               s_num, c_info->s_data_type, c_info->pair1,
                                i%iterations->r_cache_iter*iterations->r_offs,
                                s_num, c_info->r_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
             ierr = MPI_Win_flush_local(c_info->pair1, c_info->WIN);
             MPI_ERRHAND(ierr);
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-        }    
-        else if ( !run_mode->AGGREGATE )    
-        {
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+        } else if (!run_mode->AGGREGATE) {
             res_time = MPI_Wtime();
-            for (i = 0; i < iterations->n_sample; i++)
-            {
-
-                ierr = MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                               s_num, c_info->s_data_type, c_info->pair1, 
+            for (i = 0; i < iterations->n_sample; i++) {
+                ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                               s_num, c_info->s_data_type, c_info->pair1,
                                i%iterations->r_cache_iter*iterations->r_offs,
                                s_num, c_info->r_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
                 ierr = MPI_Win_flush_local(c_info->pair1, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-            res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
+            res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
         }
-
         MPI_Win_unlock(c_info->pair1, c_info->WIN);
-    }     
-   
+    }
     /* Synchronize target and origin processes */
     MPI_Barrier(c_info->communicator);
-    
-    *time = res_time; 
+
+    *time = res_time;
     return;
 }
 
 /* Implements "put_all_local" benchmark. One process puts some data
- * to all other processes and make sure of completion by 
- * MPI_Win_flush_local_all call 
+ * to all other processes and make sure of completion by
+ * MPI_Win_flush_local_all call
  * */
-void IMB_rma_put_all_local (struct comm_info* c_info, int size, 
-                            struct iter_schedule* iterations, 
-                            MODES run_mode, double* time)
-{
+void IMB_rma_put_all_local(struct comm_info* c_info, int size,
+                           struct iter_schedule* iterations,
+                           MODES run_mode, double* time) {
     double res_time = -1.;
     int target = 0;
     int peer = 0;
@@ -332,64 +289,56 @@ void IMB_rma_put_all_local (struct comm_info* c_info, int size,
     int i;
     ierr = 0;
 
-    if (c_info->rank < 0)
-    {
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
+    }
 
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    s_num=size/s_size;
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    s_num = size / s_size;
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (c_info->rank == 0)
-    {    
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (c_info->rank == 0) {
         MPI_Win_lock_all(0, c_info->WIN);
-        
+
         res_time = MPI_Wtime();
-        for (i = 0; i < iterations->n_sample; i++)
-        {
-            for (peer = 0; peer < c_info->num_procs; peer++)
-            {
+        for (i = 0; i < iterations->n_sample; i++) {
+            for (peer = 0; peer < c_info->num_procs; peer++) {
                 /* choose different target for each process to avoid congestion */
-                target = (peer + c_info->rank)%c_info->num_procs;
+                target = (peer + c_info->rank) % c_info->num_procs;
                 if (target == c_info->rank)
-                {
                     continue; /* do not put to itself*/
-                }    
-                
-                ierr = MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-                               s_num, c_info->s_data_type, target, 
+
+                ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                               s_num, c_info->s_data_type, target,
                                i%iterations->r_cache_iter*iterations->r_offs,
                                s_num, c_info->r_data_type, c_info->WIN);
                 MPI_ERRHAND(ierr);
             }
-        }    
+        }
         ierr = MPI_Win_flush_local_all(c_info->WIN);
         MPI_ERRHAND(ierr);
-        res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-    
+        res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+
         MPI_Win_unlock_all(c_info->WIN);
-    }     
-   
+    }
     /* Synchronize origin and target processes */
     MPI_Barrier(c_info->communicator);
 
-    
-    *time = res_time; 
+    *time = res_time;
     return;
 }
 
 /* Implements "Exchange_put" benchmark. Each process puts some data
  * to two neighbor processes
  * */
-void IMB_rma_exchange_put (struct comm_info* c_info, int size, 
-                           struct iter_schedule* iterations, 
-                           MODES run_mode, double* time)
-{
+void IMB_rma_exchange_put(struct comm_info* c_info, int size,
+                          struct iter_schedule* iterations,
+                          MODES run_mode, double* time) {
     double res_time = -1.;
-    int left  = -1;
+    int left = -1;
     int right = -1;
     Type_Size s_size;
     int s_num;
@@ -399,191 +348,163 @@ void IMB_rma_exchange_put (struct comm_info* c_info, int size,
     defect = 0;
 #endif
     ierr = 0;
-    
-    if (c_info->rank < 0)
-    {
+
+    if (c_info->rank < 0) {
         *time = res_time;
         return;
-    } 
-    
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    s_num=size/s_size;
-   
-    left  = (c_info->rank == 0) ? c_info->num_procs - 1 : c_info->rank - 1;
-    right = (c_info->rank + 1)%c_info->num_procs;     
+    }
 
-    for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-    
-    if (left != right)
-    {    
-        MPI_Win_lock(MPI_LOCK_SHARED, left,  0, c_info->WIN);
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    s_num = size / s_size;
+
+    left = (c_info->rank == 0) ? c_info->num_procs - 1 : c_info->rank - 1;
+    right = (c_info->rank + 1) % c_info->num_procs;
+
+    for (i = 0; i < N_BARR; i++)
+        MPI_Barrier(c_info->communicator);
+
+    if (left != right) {
+        MPI_Win_lock(MPI_LOCK_SHARED, left, 0, c_info->WIN);
         MPI_Win_lock(MPI_LOCK_SHARED, right, 0, c_info->WIN);
-    }
-    else
-    {
+    } else {
         /* Just two processes in the chain. Both messages will come
          * to the one peer */
-        MPI_Win_lock(MPI_LOCK_SHARED, left,  0, c_info->WIN);
-    }    
+        MPI_Win_lock(MPI_LOCK_SHARED, left, 0, c_info->WIN);
+    }
 
     res_time = MPI_Wtime();
-    for (i = 0; i < iterations->n_sample; i++)
-    {
+    for (i = 0; i < iterations->n_sample; i++) {
         ierr = MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
-                        s_num, c_info->s_data_type, left, 
-                        i%iterations->r_cache_iter*iterations->r_offs,
-                        s_num, c_info->r_data_type, c_info->WIN);
+                       s_num, c_info->s_data_type, left, 
+                       i%iterations->r_cache_iter*iterations->r_offs,
+                       s_num, c_info->r_data_type, c_info->WIN);
         MPI_ERRHAND(ierr);
-        
+
         ierr = MPI_Put((char*)c_info->s_buffer + size + i%iterations->s_cache_iter*iterations->s_offs,
-                        s_num, c_info->s_data_type, right, 
-                        i%iterations->r_cache_iter*iterations->r_offs + size,
-                        s_num, c_info->r_data_type, c_info->WIN);
+                       s_num, c_info->s_data_type, right,
+                       i%iterations->r_cache_iter*iterations->r_offs + size,
+                       s_num, c_info->r_data_type, c_info->WIN);
         MPI_ERRHAND(ierr);
-       
-        if (left != right)
-        {    
+
+        if (left != right) {
             ierr = MPI_Win_flush(left, c_info->WIN);
             MPI_ERRHAND(ierr);
-        
+
             ierr = MPI_Win_flush(right, c_info->WIN);
             MPI_ERRHAND(ierr);
-        }
-        else
-        {
+        } else {
             ierr = MPI_Win_flush(left, c_info->WIN);
             MPI_ERRHAND(ierr);
-        }    
+        }
     }
-    res_time = (MPI_Wtime() - res_time)/iterations->n_sample;
-    if (left != right)
-    {    
+    res_time = (MPI_Wtime() - res_time) / iterations->n_sample;
+    if (left != right) {
         MPI_Win_unlock(left, c_info->WIN);
         MPI_Win_unlock(right, c_info->WIN);
-    }
-    else
-    {
+    } else
         MPI_Win_unlock(left, c_info->WIN);
-    }    
-      
+
     /* Synchronize target and origin processes */
     MPI_Barrier(c_info->communicator);
 
 #ifdef CHECK
-    for(i = 0; i < ITER_MIN(iterations); i++)
-    {
-        CHK_DIFF("MPI_Put",c_info, (void*)(recv+size+i%iterations->r_cache_iter*iterations->r_offs), 
-                  size, size, size, asize, put, 0, iterations->n_sample, i, left , &defect);
-        
-        CHK_DIFF("MPI_Put",c_info, (void*)(recv+i%iterations->r_cache_iter*iterations->r_offs), 
-                  0, size, size, asize, put, 0, iterations->n_sample, i, right , &defect);
+    for (i = 0; i < ITER_MIN(iterations); i++) {
+        CHK_DIFF("MPI_Put", c_info, (void*)(recv + size + i%iterations->r_cache_iter*iterations->r_offs),
+                 size, size, size, asize, put, 0, iterations->n_sample, i, left, &defect);
+
+        CHK_DIFF("MPI_Put", c_info, (void*)(recv + i%iterations->r_cache_iter*iterations->r_offs),
+                 0, size, size, asize, put, 0, iterations->n_sample, i, right, &defect);
     }
 #endif     
 
-    *time = res_time; 
+    *time = res_time;
     return;
 }
 
 
-static double IMB_put_target(struct iter_schedule* iterations, struct comm_info* c_info, 
-        double time_to_compute)
-{
+static double IMB_put_target(struct iter_schedule* iterations, struct comm_info* c_info,
+                             double time_to_compute) {
     double time = 0.;
     int i;
     time = MPI_Wtime();
-    for(i=0; i < iterations->n_sample; i++)
-    {
+    for (i = 0; i < iterations->n_sample; i++) {
         if (time_to_compute)
-        {
             IMB_cpu_exploit(time_to_compute, 0);
-        }    
+
         MPI_Barrier(c_info->communicator);
     }
-    time = (MPI_Wtime() - time)/iterations->n_sample;
+    time = (MPI_Wtime() - time) / iterations->n_sample;
     return time;
-}    
+}
 
 static double IMB_put_origin(int msg_size, int dst, struct comm_info* c_info,
-                      struct iter_schedule* iterations)
+                             struct iter_schedule* iterations)
 {
     double time = 0.;
     int i;
     time = MPI_Wtime();
-    for(i=0; i < iterations->n_sample; i++)
-    {
-       MPI_Put((char*)c_info->s_buffer+i%iterations->s_cache_iter*iterations->s_offs,
-               msg_size, c_info->s_data_type, dst, i%iterations->r_cache_iter*iterations->r_offs,
-               msg_size, c_info->r_data_type, c_info->WIN);
-       MPI_Win_flush(dst, c_info->WIN);
-       MPI_Barrier(c_info->communicator);
-    }    
-    time = (MPI_Wtime() - time)/iterations->n_sample;
+    for (i = 0; i < iterations->n_sample; i++) {
+        MPI_Put((char*)c_info->s_buffer + i%iterations->s_cache_iter*iterations->s_offs,
+                msg_size, c_info->s_data_type, dst, i%iterations->r_cache_iter*iterations->r_offs,
+                msg_size, c_info->r_data_type, c_info->WIN);
+        MPI_Win_flush(dst, c_info->WIN);
+        MPI_Barrier(c_info->communicator);
+    }
+    time = (MPI_Wtime() - time) / iterations->n_sample;
     return time;
-}    
+}
 
-/* Implements "Truly_passive_put" benchmark. It allows to check whether 
- * MPI implementation supports truly one sided mode. 
+/* Implements "Truly_passive_put" benchmark. It allows to check whether
+ * MPI implementation supports truly one sided mode.
  * The flow is:
- * 1) Execution time of MPI_Put() is measured for the case when the target is 
+ * 1) Execution time of MPI_Put() is measured for the case when the target is
  *    waiting in MPI_Barrier() call.
  * 2) This time is sent to the target
  * 3) Execution time of MPI_Put is measured for the case when the target performs
- *    some computation for about the time measued at step 1) and then enters 
+ *    some computation for about the time measued at step 1) and then enters
  *    MPI_Barrier() call */
-void IMB_rma_passive_put (struct comm_info* c_info, int size, 
-                           struct iter_schedule* iterations, 
-                           MODES run_mode, double* time)
-{
-    
-    double t_pure = 0.; 
+void IMB_rma_passive_put(struct comm_info* c_info, int size,
+                         struct iter_schedule* iterations,
+                         MODES run_mode, double* time) {
+    double t_pure = 0.;
     double t_with_comp = 0.;
 
-    Type_Size s_size,r_size;
+    Type_Size s_size, r_size;
     int s_num, r_num;
 
     time[0] = 0.;
     time[1] = 0.;
     if (c_info->rank < 0)
-    {
         return;
-    }    
-    
-    /*  GET SIZE OF DATA TYPE */  
-    MPI_Type_size(c_info->s_data_type,&s_size);
-    MPI_Type_size(c_info->r_data_type,&r_size);
-    if ((s_size!=0) && (r_size!=0))
-    {
-        s_num=size/s_size;
-        r_num=size/r_size;
-    }
-    else
-    {
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    } else
         return;
-    }
 
     MPI_Barrier(c_info->communicator);
-    
-    if (c_info->rank == c_info->pair0)
-    {
-        MPI_Win_lock(MPI_LOCK_SHARED, c_info->pair1, 0 , c_info->WIN);
+
+    if (c_info->rank == c_info->pair0) {
+        MPI_Win_lock(MPI_LOCK_SHARED, c_info->pair1, 0, c_info->WIN);
         t_pure = IMB_put_origin(s_num, c_info->pair1, c_info, iterations);
-        MPI_Send(&t_pure, 1, MPI_DOUBLE, c_info->pair1, 1 ,c_info->communicator);
+        MPI_Send(&t_pure, 1, MPI_DOUBLE, c_info->pair1, 1, c_info->communicator);
 
         MPI_Barrier(c_info->communicator);
 
-        t_with_comp = IMB_put_origin(s_num, c_info->pair1,c_info, iterations);
+        t_with_comp = IMB_put_origin(s_num, c_info->pair1, c_info, iterations);
         time[0] = t_pure;
         time[1] = t_with_comp;
 
-
         MPI_Win_unlock(c_info->pair1, c_info->WIN);
-    }    
-    else if (c_info->rank == c_info->pair1)
-    {
+    } else if (c_info->rank == c_info->pair1) {
         IMB_put_target(iterations, c_info, 0.);
-        MPI_Recv(&t_pure, 1, MPI_DOUBLE, c_info->pair0, 1 ,c_info->communicator,
-                MPI_STATUS_IGNORE);
-        
+        MPI_Recv(&t_pure, 1, MPI_DOUBLE, c_info->pair0, 1, c_info->communicator,
+            MPI_STATUS_IGNORE);
+
         /*initialize IMB_cpu_exploit with received time*/
         IMB_cpu_exploit(t_pure, 1);
 
@@ -591,6 +512,6 @@ void IMB_rma_passive_put (struct comm_info* c_info, int size,
 
         IMB_put_target(iterations, c_info, t_pure);
     }
-    return;    
-}    
-    
+    return;
+}
+
diff --git a/src/IMB_scatter.c b/src_c/IMB_scatter.c
similarity index 66%
rename from src/IMB_scatter.c
rename to src_c/IMB_scatter.c
index 3bd35559..24f5b336 100644
--- a/src/IMB_scatter.c
+++ b/src_c/IMB_scatter.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -89,92 +88,83 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                 MODES RUN_MODE, double* time)
+                 MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      Benchmarks MPI_Scatter
-                      
+                          MPI-1 benchmark kernel
+                          Benchmarks MPI_Scatter
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
+-size                     (type int)
+                          Basic message size in bytes
 
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
-Output variables: 
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  int    i;
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  double t1, t2;
+    int    i;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    double t1, t2;
 
 #ifdef CHECK
-defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
-
-  *time = 0.;
-
-  if(c_info->rank!=-1)
-  {
-      int root = 0;
-
-      IMB_do_n_barriers(c_info->communicator, N_BARR);
-
-      for(i=0;i<ITERATIONS->n_sample;i++)
-      {
-          t1 = MPI_Wtime();
-          ierr = MPI_Scatter((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                              s_num, c_info->s_data_type,
-                              (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                              r_num, c_info->r_data_type,
-                              root,
-                              c_info->communicator);
-          MPI_ERRHAND(ierr);
-          t2 = MPI_Wtime();
-          *time += (t2 - t1);
-          
-          CHK_DIFF("Scatter",c_info,
-                 (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                 (size_t) s_num* (size_t) c_info->rank, size, size, 1, 
-                 put, 0, ITERATIONS->n_sample, i,
-                 root, &defect);
-          
-          root = (root + c_info->root_shift) % c_info->num_procs;
-          IMB_do_n_barriers(c_info->communicator, c_info->sync);
-      }
-
-      *time /= ITERATIONS->n_sample;
-  }
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    size *= c_info->size_scale;
+
+    *time = 0.;
+
+    if (c_info->rank != -1) {
+        int root = 0;
+
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            t1 = MPI_Wtime();
+            ierr = MPI_Scatter((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                               s_num, c_info->s_data_type,
+                               (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                               r_num, c_info->r_data_type,
+                               root,
+                               c_info->communicator);
+            MPI_ERRHAND(ierr);
+            t2 = MPI_Wtime();
+            *time += (t2 - t1);
+
+            CHK_DIFF("Scatter", c_info,
+                     (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                     (size_t)s_num* (size_t)c_info->rank, size, size, 1,
+                     put, 0, ITERATIONS->n_sample, i,
+                     root, &defect);
+
+            root = (root + c_info->root_shift) % c_info->num_procs;
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -185,39 +175,32 @@ void IMB_iscatter(struct comm_info* c_info,
                   int size,
                   struct iter_schedule* ITERATIONS,
                   MODES RUN_MODE,
-                  double* time)
+                  double* time) {
 /*
 
-
-                      MPI-NBC benchmark kernel
-                      Benchmarks MPI_Iscatter
-
-
+                          MPI-NBC benchmark kernel
+                          Benchmarks MPI_Iscatter
 
 Input variables:
 
--c_info               (type struct comm_info*)
-                      Collection of all base data for MPI;
-                      see [1] for more information
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
+-RUN_MODE                 (type MODES)
 
 Output variables:
 
--time                 (type double*)
-                      Timing result per sample
-
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -230,7 +213,7 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -242,7 +225,7 @@ Output variables:
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         IMB_iscatter_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
 
@@ -251,8 +234,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Iscatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
@@ -267,7 +249,7 @@ Output variables:
             MPI_ERRHAND(ierr);
 
             t_comp -= MPI_Wtime();
-                IMB_cpu_exploit(t_pure, 0);
+            IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
 
             MPI_Wait(&request, &status);
@@ -281,7 +263,7 @@ Output variables:
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
-        t_comp  /= ITERATIONS->n_sample;
+        t_comp /= ITERATIONS->n_sample;
     }
 
     time[0] = t_pure;
@@ -295,39 +277,32 @@ void IMB_iscatter_pure(struct comm_info* c_info,
                        int size,
                        struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE,
-                       double* time)
+                       double* time) {
 /*
 
-
-                      MPI-NBC benchmark kernel
-                      Benchmarks IMB_Iscatter_pure
-
-
+                          MPI-NBC benchmark kernel
+                          Benchmarks IMB_Iscatter_pure
 
 Input variables:
 
--c_info               (type struct comm_info*)
-                      Collection of all base data for MPI;
-                      see [1] for more information
-
-
--size                 (type int)
-                      Basic message size in bytes
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-size                     (type int)
+                          Basic message size in bytes
 
--RUN_MODE             (type MODES)
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
+-RUN_MODE                 (type MODES)
 
 Output variables:
 
--time                 (type double*)
-                      Timing result per sample
-
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -338,7 +313,7 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -350,13 +325,12 @@ Output variables:
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Iscatter((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                 s_num,
@@ -380,7 +354,6 @@ Output variables:
         }
         t_pure /= ITERATIONS->n_sample;
     }
-
     time[0] = t_pure;
 }
 
diff --git a/src/IMB_scatterv.c b/src_c/IMB_scatterv.c
similarity index 65%
rename from src/IMB_scatterv.c
rename to src_c/IMB_scatterv.c
index 41cf0a6c..74642dab 100644
--- a/src/IMB_scatterv.c
+++ b/src_c/IMB_scatterv.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -89,100 +88,91 @@ Hans-Joachim Plum, Intel GmbH
 
 
 void IMB_scatterv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                 MODES RUN_MODE, double* time)
+                  MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      Benchmarks MPI_Scatterv
-                      
+                          MPI-1 benchmark kernel
+                          Benchmarks MPI_Scatterv
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
-
--size                 (type int)                      
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
+-size                     (type int)
+                          Basic message size in bytes
 
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
-Output variables: 
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  int    i;
-  Type_Size s_size,r_size;
-  int s_num, r_num;
-  double t1, t2;
+    int    i;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    double t1, t2;
 
 #ifdef CHECK
-  defect=0.;
+    defect = 0.;
 #endif
-  ierr = 0;
-
-  /*  GET SIZE OF DATA TYPE */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-  {
-      s_num=size/s_size;
-      r_num=size/r_size;
-  } 
-
-  /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
-
-  for (i=0;i<c_info->num_procs ;i++)
-  {
-    c_info->sdispl[i] = s_num*i;
-    c_info->sndcnt[i] = s_num;
-  }
-
-  *time = 0.;
-
-  if(c_info->rank!=-1)
-  {
-      int root = 0;
-
-      IMB_do_n_barriers(c_info->communicator, N_BARR);
-
-      for(i=0;i<ITERATIONS->n_sample;i++)
-      {
-          t1 = MPI_Wtime();
-          ierr = MPI_Scatterv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                              c_info->sndcnt,c_info->sdispl,
-                              c_info->s_data_type,
-                              (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
-                              r_num,
-                              c_info->r_data_type,
-                              root,
-                              c_info->communicator);
-          MPI_ERRHAND(ierr);
-          t2 = MPI_Wtime();
-          *time += (t2 - t1);
-          
-          CHK_DIFF("Scatterv",c_info,
-                   (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                   c_info->sdispl[c_info->rank], size, size, 1,
-                   put, 0, ITERATIONS->n_sample, i,
-                   root, &defect);
-          root = (root + c_info->root_shift) % c_info->num_procs;
-          IMB_do_n_barriers(c_info->communicator, c_info->sync);
-      }
-      *time /= ITERATIONS->n_sample;
-  }
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
+    }
+
+    /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
+
+    for (i = 0; i < c_info->num_procs; i++) {
+        c_info->sdispl[i] = s_num*i;
+        c_info->sndcnt[i] = s_num;
+    }
+
+    size *= c_info->size_scale;
+
+    *time = 0.;
+
+    if (c_info->rank != -1) {
+        int root = 0;
+
+        IMB_do_n_barriers(c_info->communicator, N_BARR);
+
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            t1 = MPI_Wtime();
+            ierr = MPI_Scatterv((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                                c_info->sndcnt, c_info->sdispl,
+                                c_info->s_data_type,
+                                (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs,
+                                r_num,
+                                c_info->r_data_type,
+                                root,
+                                c_info->communicator);
+            MPI_ERRHAND(ierr);
+            t2 = MPI_Wtime();
+            *time += (t2 - t1);
+
+            CHK_DIFF("Scatterv", c_info,
+                     (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                     c_info->sdispl[c_info->rank], size, size, 1,
+                     put, 0, ITERATIONS->n_sample, i,
+                     root, &defect);
+            root = (root + c_info->root_shift) % c_info->num_procs;
+            IMB_do_n_barriers(c_info->communicator, c_info->sync);
+        }
+        *time /= ITERATIONS->n_sample;
+    }
 }
 
 #elif defined NBC // MPI1
@@ -193,39 +183,33 @@ void IMB_iscatterv(struct comm_info* c_info,
                    int size,
                    struct iter_schedule* ITERATIONS,
                    MODES RUN_MODE,
-                   double* time)
+                   double* time) {
 /*
 
-
-                      MPI-NBC benchmark kernel
-                      Benchmarks MPI_Iscatterv
-
-
+                          MPI-NBC benchmark kernel
+                          Benchmarks MPI_Iscatterv
 
 Input variables:
 
--c_info               (type struct comm_info*)
-                      Collection of all base data for MPI;
-                      see [1] for more information
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)
-                      Basic message size in bytes
-
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
-
--RUN_MODE             (type MODES)
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
+-RUN_MODE                 (type MODES)
 
 Output variables:
 
--time                 (type double*)
-                      Timing result per sample
+-time                     (type double*)
+                          Timing result per sample
 
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -238,7 +222,7 @@ Output variables:
                 t_ovrlp = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -250,7 +234,7 @@ Output variables:
         r_num = size / r_size;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */
         IMB_iscatterv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure);
@@ -260,8 +244,7 @@ Output variables:
 
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i=0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_ovrlp -= MPI_Wtime();
             ierr = MPI_Iscatterv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                  c_info->sndcnt,
@@ -277,7 +260,7 @@ Output variables:
             MPI_ERRHAND(ierr);
 
             t_comp -= MPI_Wtime();
-                IMB_cpu_exploit(t_pure, 0);
+            IMB_cpu_exploit(t_pure, 0);
             t_comp += MPI_Wtime();
 
             MPI_Wait(&request, &status);
@@ -291,9 +274,8 @@ Output variables:
             IMB_do_n_barriers(c_info->communicator, c_info->sync);
         }
         t_ovrlp /= ITERATIONS->n_sample;
-        t_comp  /= ITERATIONS->n_sample;
+        t_comp /= ITERATIONS->n_sample;
     }
-
     time[0] = t_pure;
     time[1] = t_ovrlp;
     time[2] = t_comp;
@@ -302,42 +284,35 @@ Output variables:
 /*************************************************************************/
 
 void IMB_iscatterv_pure(struct comm_info* c_info,
-                       int size,
-                       struct iter_schedule* ITERATIONS,
-                       MODES RUN_MODE,
-                       double* time)
+                        int size,
+                        struct iter_schedule* ITERATIONS,
+                        MODES RUN_MODE,
+                        double* time) {
 /*
 
-
-                      MPI-NBC benchmark kernel
-                      Benchmarks IMB_Iscatterv_pure
-
-
+                          MPI-NBC benchmark kernel
+                          Benchmarks IMB_Iscatterv_pure
 
 Input variables:
 
--c_info               (type struct comm_info*)
-                      Collection of all base data for MPI;
-                      see [1] for more information
-
-
--size                 (type int)
-                      Basic message size in bytes
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-size                     (type int)
+                          Basic message size in bytes
 
--RUN_MODE             (type MODES)
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
+-RUN_MODE                 (type MODES)
 
 Output variables:
 
--time                 (type double*)
-                      Timing result per sample
-
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
     int         i = 0;
     Type_Size   s_size,
                 r_size;
@@ -348,7 +323,7 @@ Output variables:
     double      t_pure = 0.;
 
 #ifdef CHECK
-    defect=0.;
+    defect = 0.;
 #endif
     ierr = 0;
 
@@ -361,17 +336,16 @@ Output variables:
     }
 
     /* INITIALIZATION OF DISPLACEMENT and RECEIVE COUNTS */
-    for (i= 0; i < c_info->num_procs; i++) {
+    for (i = 0; i < c_info->num_procs; i++) {
         c_info->sdispl[i] = s_num * i;
         c_info->sndcnt[i] = s_num;
     }
 
-    if(c_info->rank != -1) {
+    if (c_info->rank != -1) {
         int root = 0;
         IMB_do_n_barriers(c_info->communicator, N_BARR);
 
-        for(i = 0; i < ITERATIONS->n_sample; i++)
-        {
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
             t_pure -= MPI_Wtime();
             ierr = MPI_Iscatterv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                                  c_info->sndcnt,
@@ -396,7 +370,6 @@ Output variables:
         }
         t_pure /= ITERATIONS->n_sample;
     }
-
     time[0] = t_pure;
 }
 
diff --git a/src/IMB_sendrecv.c b/src_c/IMB_sendrecv.c
similarity index 57%
rename from src/IMB_sendrecv.c
rename to src_c/IMB_sendrecv.c
index a5cb559a..f757afa4 100644
--- a/src/IMB_sendrecv.c
+++ b/src_c/IMB_sendrecv.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -91,94 +90,84 @@ Hans-Joachim Plum, Intel GmbH
 */
 /* ===================================================================== */
 
-void IMB_sendrecv(struct comm_info* c_info, int size,  struct iter_schedule* ITERATIONS,
-                  MODES RUN_MODE, double* time)
+void IMB_sendrecv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                  MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-1 benchmark kernel
-                      Benchmarks MPI_Sendrecv
-                      
+                          MPI-1 benchmark kernel
+                          Benchmarks MPI_Sendrecv
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)                      
-                      Basic message size in bytes
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
 
--ITERATIONS           (type struct iter_schedule *)
-                      Repetition scheduling
+-RUN_MODE                 (type MODES)
+                          (only MPI-2 case: see [1])
 
--RUN_MODE             (type MODES)                      
-                      (only MPI-2 case: see [1])
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  double t1,t2;
-  int i;
-  Type_Size s_size, r_size;
-  int s_num,r_num;
-  int s_tag, r_tag;
-  int dest, source;
-  MPI_Status stat;
-
-#ifdef CHECK 
-  defect=0;
+    int i;
+    Type_Size s_size, r_size;
+    int s_num, r_num;
+    int s_tag, r_tag;
+    int dest, source;
+    MPI_Status stat;
+    *time = 0.;
+
+#ifdef CHECK
+    defect = 0;
 #endif
-  ierr = 0;
-
-  /*  GET SIZE OF DATA TYPE's in s_size and r_size */  
-  MPI_Type_size(c_info->s_data_type,&s_size);
-  MPI_Type_size(c_info->r_data_type,&r_size);
-  if ((s_size!=0) && (r_size!=0))
-    {
-      s_num=size/s_size;
-      r_num=size/r_size;
-    }   
-  s_tag = 1;
-  r_tag = MPI_ANY_TAG;
-  
-  if(c_info->rank!=-1)
-    {  
-      /*  CALCULATE SOURCE AND DESTINATION */  
-      dest   = (c_info->rank + 1)                   % (c_info->num_procs);
-      source = (c_info->rank + c_info->num_procs-1) % (c_info->num_procs);
-
-      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-      
-      t1 = MPI_Wtime();
-      for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-	  ierr= MPI_Sendrecv((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
-                             s_num,c_info->s_data_type, dest,s_tag,
-                             (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                             r_num,c_info->r_data_type,source,r_tag,
-			     c_info->communicator,&stat);
-	  MPI_ERRHAND(ierr);
-
-          CHK_DIFF("Sendrecv",c_info,(char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
-                    0, size, size, asize,
-                    put, 0, ITERATIONS->n_sample, i,
-                    source, &defect);
-	}
-      t2 = MPI_Wtime();
-      *time=(t2 - t1)/ITERATIONS->n_sample;
+    ierr = 0;
+
+    /*  GET SIZE OF DATA TYPE's in s_size and r_size */
+    MPI_Type_size(c_info->s_data_type, &s_size);
+    MPI_Type_size(c_info->r_data_type, &r_size);
+    if ((s_size != 0) && (r_size != 0)) {
+        s_num = size / s_size;
+        r_num = size / r_size;
     }
-  else
-    { 
-      *time = 0.;
+
+    size *= c_info->size_scale;
+
+    s_tag = 1;
+    r_tag = MPI_ANY_TAG;
+
+    if (c_info->rank != -1) {
+        /*  CALCULATE SOURCE AND DESTINATION */
+        dest = (c_info->rank + 1) % (c_info->num_procs);
+        source = (c_info->rank + c_info->num_procs - 1) % (c_info->num_procs);
+
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        *time -= MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_Sendrecv((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
+                                s_num, c_info->s_data_type, dest, s_tag,
+                                (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                                r_num, c_info->r_data_type, source, r_tag,
+                                c_info->communicator, &stat);
+            MPI_ERRHAND(ierr);
+
+            CHK_DIFF("Sendrecv", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
+                     0, size, size, asize,
+                     put, 0, ITERATIONS->n_sample, i,
+                     source, &defect);
+        }
+        *time += MPI_Wtime();
     }
+    *time /= ITERATIONS->n_sample;
 }
 
diff --git a/src/IMB_settings.h b/src_c/IMB_settings.h
similarity index 96%
rename from src/IMB_settings.h
rename to src_c/IMB_settings.h
index d9b0ad0a..e12934d7 100644
--- a/src/IMB_settings.h
+++ b/src_c/IMB_settings.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -157,14 +156,14 @@ DON'T change anything below here !!
 
 #ifdef BUFFERS_INT
 
-typedef int assign_type ;
+typedef int assign_type;
 #define BUF_VALUE(rank,i)  10000000*(1+rank)+i%10000000
 
 #endif
 
 #ifdef BUFFERS_FLOAT
 
-typedef float assign_type ;
+typedef float assign_type;
 #define BUF_VALUE(rank,i)  (0.1*((rank)+1)+(float)((i)))
 
 #endif
diff --git a/src/IMB_settings_io.h b/src_c/IMB_settings_io.h
similarity index 95%
rename from src/IMB_settings_io.h
rename to src_c/IMB_settings_io.h
index 732318b5..65fb9cb5 100644
--- a/src/IMB_settings_io.h
+++ b/src_c/IMB_settings_io.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -157,14 +156,14 @@ DON'T change anything below here !!
 /* How to set communications buffers for process rank, index i */
 #ifdef BUFFERS_INT
 
-typedef int assign_type ;
+typedef int assign_type;
 #define BUF_VALUE(rank,i)  10000000*(1+rank)+i%10000000
 
 #endif
 
 #ifdef BUFFERS_FLOAT
 
-typedef float assign_type ;
+typedef float assign_type;
 #define BUF_VALUE(rank,i)  (0.1*((rank)+1)+(float)((i)))
 
 #endif
diff --git a/src/IMB_strgs.c b/src_c/IMB_strgs.c
similarity index 70%
rename from src/IMB_strgs.c
rename to src_c/IMB_strgs.c
index b61192bd..8f2825ba 100644
--- a/src/IMB_strgs.c
+++ b/src_c/IMB_strgs.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -81,57 +80,43 @@ For more documentation than found here, see
 
 
 
-char* IMB_str(const char* Bname)
+char* IMB_str(const char* Bname) {
 /*
 
-                      
                       Copies string Bname to new memory which is returned
-                      
 
+Input variables:
 
-Input variables: 
-
--Bname                (type char*)                      
+-Bname                (type char*)
                       String to be copied
-                      
-
 
-Return value          (type char*)                      
+Return value          (type char*)
                       Copy of Bname with newly allocated memory
-                      
-
 
 */
-{
-    char* strg = (char*) IMB_v_alloc( 1+strlen(Bname),"str" );
+    char* strg = (char*)IMB_v_alloc(1 + strlen(Bname), "str");
 
-    if(strg)
-	return strcpy(strg,Bname);
-    else 
-	return NULL;
+    if (strg)
+        return strcpy(strg, Bname);
+    else
+        return NULL;
 }
 
 
 /***************************************************************************/
-void IMB_lwr(char* Bname)
+void IMB_lwr(char* Bname) {
 /*
 
+In/out variables:
 
-
-In/out variables: 
-
--Bname                (type char*)                      
+-Bname                (type char*)
                       Uper case alphabetic characters are converted to lower case
-                      
-
 
 */
-{
     int i;
-
-    for(i=0; i<strlen(Bname); i++)
-	if ( Bname[i] >= 'A' && Bname[i] <= 'Z' )
-	    Bname[i] = Bname[i] - ('A'-'a');
+    for (i = 0; i < strlen(Bname); i++)
+        if (Bname[i] >= 'A' && Bname[i] <= 'Z')
+            Bname[i] = Bname[i] - ('A' - 'a');
 }
 
 
@@ -140,81 +125,61 @@ In/out variables:
 
 
 
-int IMB_str_atoi(char s[])
+int IMB_str_atoi(char s[]) {
 /*
 
-                      
                       Evaluates int value of a numeric string
-                      
 
+Input variables:
 
-Input variables: 
-
--s                    (type char [])                      
+-s                    (type char [])
                       String with only numeric characters
-                      
-
 
-Return value          (type int)                      
+Return value          (type int)
                       Numeric value
-                      
-
 
 */
-{
-    int i,n,len ;
-
-    n=0;
-    len=strlen(s);
-
-    for(i=0; s[i] >= '0' && s[i] <= '9' || s[i]==' '; ++i)
-    {
-	if (s[i]!=' ') 
-	{
-	    n=10*n+(s[i]-'0');
-	}
-    }
-/* IMB_3.0 */
-    if (len==0 || i<len)
-    {
-	n=-1;
+
+    int i, n, len;
+
+    n = 0;
+    len = strlen(s);
+
+    for (i = 0; s[i] >= '0' && s[i] <= '9' || s[i] == ' '; ++i) {
+        if (s[i] != ' ')
+            n = 10 * n + (s[i] - '0');
     }
+    /* IMB_3.0 */
+    if (len == 0 || i < len)
+        n = -1;
     return n;
 }
 
 
 /* str_erase   */
-void IMB_str_erase(char* string, int Nblnc)
+void IMB_str_erase(char* string, int Nblnc) {
 /*
 
-                      
                       Fills blancs into a string
-                      
 
+Input variables:
 
-Input variables: 
-
--Nblnc                (type int)                      
+-Nblnc                (type int)
                       #blancs to fill
-                      
-
 
-In/out variables: 
+In/out variables:
 
--string               (type char*)                      
+-string               (type char*)
                       Null terminated string with Nblnc many blancs
-                      
-
 
 */
-{
-    if( Nblnc > 0 )
-    {
-	int i;
-	for(i=0;i<Nblnc; i++ ) string[i]=' ';
-
-	i=max(0,Nblnc);
-	string[i]='\0';
+    if (Nblnc > 0) {
+        int i;
+        for (i = 0; i < Nblnc; i++)
+            string[i] = ' ';
+
+        i = max(0, Nblnc);
+        string[i] = '\0';
     }
 }
 
diff --git a/src/IMB_user_set_info.c b/src_c/IMB_user_set_info.c
similarity index 87%
rename from src/IMB_user_set_info.c
rename to src_c/IMB_user_set_info.c
index 7df6b6cc..a7e37605 100644
--- a/src/IMB_user_set_info.c
+++ b/src_c/IMB_user_set_info.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -78,43 +77,41 @@ For more documentation than found here, see
 
 
 
-void IMB_user_set_info(MPI_Info* opt_info)
+void IMB_user_set_info(MPI_Info* opt_info) {
 /*
 
 
 
-In/out variables: 
+In/out variables:
 
--opt_info             (type MPI_Info*)                      
+-opt_info             (type MPI_Info*)
                       Is set. Default is MPI_INFO_NULL, everything beyond
                       this is user decision and system dependent.
-                      
 
 
-*/
-{
 
-#ifdef MPIIO
-/*
-Set info for all MPI I/O functions
 */
+#ifdef MPIIO
+    /*
+    Set info for all MPI I/O functions
+    */
 
-*opt_info = MPI_INFO_NULL;
+    *opt_info = MPI_INFO_NULL;
 
 #endif
 
 #ifdef EXT
-/*
-Set info for all MPI_Win_create calls
-*/
+    /*
+    Set info for all MPI_Win_create calls
+    */
 
-*opt_info = MPI_INFO_NULL;
+    *opt_info = MPI_INFO_NULL;
 
 #endif
 
 #ifdef RMA
 
-*opt_info = MPI_INFO_NULL;
+    *opt_info = MPI_INFO_NULL;
 
 #endif
 
diff --git a/src/IMB_utils.c b/src_c/IMB_utils.c
similarity index 87%
rename from src/IMB_utils.c
rename to src_c/IMB_utils.c
index 8c1e9de7..5c7fe137 100644
--- a/src/IMB_utils.c
+++ b/src_c/IMB_utils.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In
     doc/IMB_Users_Guide.pdf
@@ -71,11 +70,9 @@ For more documentation than found here, see
 int IMB_internal_barrier = 0;
 /*
 
-
                       MPI-independent implementation of barrier syncronization.
                       Implements  Dissemination barrier algorithm.
 
-
 Input variables:
 
 -comm                 (type MPI_Comm)
@@ -83,25 +80,24 @@ Input variables:
 
 */
 
-void IMB_Barrier ( MPI_Comm comm )
-{
+void IMB_Barrier(MPI_Comm comm) {
     int size = 0;
     int rank = 0;
 
     int mask = 0x1;
-    int dst,src;
+    int dst, src;
 
     int tmp = 0;
 
-    MPI_Comm_size( comm, &size );
-    MPI_Comm_rank( comm, &rank );
+    MPI_Comm_size(comm, &size);
+    MPI_Comm_rank(comm, &rank);
 
-    for( ; mask < size; mask <<=1 ) {
+    for (; mask < size; mask <<= 1) {
         dst = (rank + mask) % size;
         src = (rank - mask + size) % size;
-        MPI_Sendrecv( &tmp, 0, MPI_BYTE, dst, IMB_BARRIER_TAG,
-                      &tmp, 0, MPI_BYTE, src, IMB_BARRIER_TAG,
-                      comm, MPI_STATUS_IGNORE);
+        MPI_Sendrecv(&tmp, 0, MPI_BYTE, dst, IMB_BARRIER_TAG,
+                     &tmp, 0, MPI_BYTE, src, IMB_BARRIER_TAG,
+                     comm, MPI_STATUS_IGNORE);
     }
 }
 
diff --git a/src/IMB_warm_up.c b/src_c/IMB_warm_up.c
similarity index 85%
rename from src/IMB_warm_up.c
rename to src_c/IMB_warm_up.c
index c35eaf5f..9cd1640b 100644
--- a/src/IMB_warm_up.c
+++ b/src_c/IMB_warm_up.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -83,56 +82,45 @@ For more documentation than found here, see
 Use ITERATIONS object;
 perform warmup with the minimum message size, no longer with the maximum one;
 */
-void IMB_warm_up (struct comm_info* c_info, struct Bench* Bmark, struct iter_schedule* ITERATIONS, int iter)
+void IMB_warm_up(struct Bench* Bmark, struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, int iter) {
 /* >> IMB 3.1  */
 /*
 
-                      
                       'Warm up' run of the particular benchmark, so the
                       system can eventually set up internal structures before
                       the actual benchmark
-                      
 
+Input variables:
 
-Input variables: 
-
--c_info               (type struct comm_info*)                      
+-c_info               (type struct comm_info*)
                       Collection of all base data for MPI;
                       see [1] for more information
-                      
 
--Bmark                (type struct Bench*)                      
+-Bmark                (type struct Bench*)
                       (For explanation of struct Bench type:
                       describes all aspects of modes of a benchmark;
                       see [1] for more information)
-                      
+
                       The actual benchmark
-                      
+
 IMB 3.1 <<
 -ITERATIONS           (type struct iter_schedule *)
                       Repetition scheduling
 >> IMB 3.1
 
-
--iter                 (type int)                      
+-iter                 (type int)
                       Number of the outer iteration of the benchmark. Only
                       for iter==0, the WamrUp is carried out
-                      
-
 
 */
-{
     struct cmode MD;
 
     MD.AGGREGATE = 1;
 
-    if( c_info->rank >= 0 )
-    {
+    if (c_info->rank >= 0) {
 #ifndef MPIIO
-        if( iter == 0 )
-        {
-/* IMB 3.1: other warm up settings */
-            int size = asize;
+        if (iter == 0) {
+            /* IMB 3.1: other warm up settings */
             double t[MAX_TIME_ID];
             int n_sample = ITERATIONS->n_sample;
 
@@ -145,9 +133,9 @@ IMB 3.1 <<
             Bmark->Benchmark(c_info, size, ITERATIONS, Bmark->RUN_MODES, t);
 
 #else    
-            /* It is erroneous to pass unitialized MD to the bench. it may 
+            /* It is erroneous to pass unitialized MD to the bench. it may
              * depend on the particular mode values! Keep it for existing benchmarks
-             * to save their bahvior */        
+             * to save their bahvior */
             Bmark->Benchmark(c_info, size, ITERATIONS, &MD, t);
 #endif            
 
diff --git a/src/IMB_window.c b/src_c/IMB_window.c
similarity index 60%
rename from src/IMB_window.c
rename to src_c/IMB_window.c
index 009c97ca..0ad4e9be 100644
--- a/src/IMB_window.c
+++ b/src_c/IMB_window.c
@@ -1,7 +1,6 @@
 /*****************************************************************************
  *                                                                           *
- * Copyright (c) 2003-2016 Intel Corporation.                                *
- * All rights reserved.                                                      *
+ * Copyright 2003-2018 Intel Corporation.                                    *
  *                                                                           *
  *****************************************************************************
 
@@ -52,7 +51,7 @@ For more documentation than found here, see
 
 [1] doc/ReadMe_IMB.txt 
 
-[2] Intel (R) MPI Benchmarks
+[2] Intel(R) MPI Benchmarks
     Users Guide and Methodology Description
     In 
     doc/IMB_Users_Guide.pdf
@@ -81,78 +80,67 @@ For more documentation than found here, see
 
 
 void IMB_window(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
-                MODES RUN_MODE, double* time)
+                MODES RUN_MODE, double* time) {
 /*
 
-                      
-                      MPI-2 benchmark kernel
-                      MPI_Win_create + MPI_Win_fence + MPI_Win_free
-                      
+                          MPI-2 benchmark kernel
+                          MPI_Win_create + MPI_Win_fence + MPI_Win_free
 
+Input variables:
 
-Input variables: 
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
 
--c_info               (type struct comm_info*)                      
-                      Collection of all base data for MPI;
-                      see [1] for more information
-                      
+-size                     (type int)
+                          Basic message size in bytes
 
--size                 (type int)                      
-                      Basic message size in bytes
+-ITERATIONS               (type struct iter_schedule)
+                          Repetition scheduling
 
--ITERATIONS           (type struct iter_schedule)                      
-                      Repetition scheduling
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
 
--RUN_MODE             (type MODES)                      
-                      Mode (aggregate/non aggregate; blocking/nonblocking);
-                      see "IMB_benchmark.h" for definition
-
-
-Output variables: 
-
--time                 (type double*)                      
-                      Timing result per sample
+Output variables:
 
+-time                     (type double*)
+                          Timing result per sample
 
 */
-{
-  double t1, t2;
-  int    i, dum;
-
-  ierr = 0;
-
-  if(c_info->rank!=-1)
-    {
-      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);
-
-      t1 = MPI_Wtime();
-      for(i=0;i< ITERATIONS->n_sample;i++)
-	{
-          ierr = MPI_Win_create(c_info->r_buffer,size,1,MPI_INFO_NULL,
-                                c_info->communicator, &c_info->WIN);
-          MPI_ERRHAND(ierr);
-          ierr = MPI_Win_fence(0, c_info->WIN);
-          MPI_ERRHAND(ierr);
-/* July 2002 fix V2.2.1, empty window case */
-          if(size>0)
-          {
-          ierr = MPI_Put(c_info->s_buffer, 1, c_info->s_data_type,
-                         c_info->rank, 0, 1, c_info->r_data_type, c_info->WIN);
-          MPI_ERRHAND(ierr);
-          }
-
-          ierr = MPI_Win_fence(0, c_info->WIN);
-          MPI_ERRHAND(ierr);
-	  
-          ierr = MPI_Win_free(&c_info->WIN);
-          MPI_ERRHAND(ierr);
-	}
-      t2 = MPI_Wtime();
-      *time=(t2 - t1)/(ITERATIONS->n_sample);
-    }
-  else
-    { 
-      *time = 0.; 
+    double t1, t2;
+    int    i, dum;
+
+    ierr = 0;
+
+    if (c_info->rank != -1) {
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->communicator);
+
+        t1 = MPI_Wtime();
+        for (i = 0; i < ITERATIONS->n_sample; i++) {
+            ierr = MPI_Win_create(c_info->r_buffer, size, 1, MPI_INFO_NULL,
+                                  c_info->communicator, &c_info->WIN);
+            MPI_ERRHAND(ierr);
+            ierr = MPI_Win_fence(0, c_info->WIN);
+            MPI_ERRHAND(ierr);
+            /* July 2002 fix V2.2.1, empty window case */
+            if (size > 0) {
+                ierr = MPI_Put(c_info->s_buffer, 1, c_info->s_data_type,
+                               c_info->rank, 0, 1, c_info->r_data_type, c_info->WIN);
+                MPI_ERRHAND(ierr);
+            }
+
+            ierr = MPI_Win_fence(0, c_info->WIN);
+            MPI_ERRHAND(ierr);
+
+            ierr = MPI_Win_free(&c_info->WIN);
+            MPI_ERRHAND(ierr);
+        }
+        t2 = MPI_Wtime();
+        *time = (t2 - t1) / (ITERATIONS->n_sample);
     }
+    else
+        *time = 0.;
 }
 
diff --git a/src_c/IMB_write.c b/src_c/IMB_write.c
new file mode 100644
index 00000000..c7f0a0bc
--- /dev/null
+++ b/src_c/IMB_write.c
@@ -0,0 +1,556 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2003-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory. 
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+
+For more documentation than found here, see
+
+[1] doc/ReadMe_IMB.txt 
+
+[2] Intel(R) MPI Benchmarks
+    Users Guide and Methodology Description
+    In 
+    doc/IMB_Users_Guide.pdf
+
+ File: IMB_write.c 
+
+ Implemented functions: 
+
+ IMB_write_shared;
+ IMB_write_indv;
+ IMB_write_expl;
+ IMB_write_ij;
+ IMB_iwrite_ij;
+
+ ***************************************************************************/
+
+
+
+
+#include "IMB_declare.h"
+#include "IMB_benchmark.h"
+
+#include "IMB_prototypes.h"
+
+
+/*************************************************************************/
+
+
+/*************************************************************************/
+
+/* ===================================================================== */
+/* 
+IMB 3.1 changes
+July 2007
+Hans-Joachim Plum, Intel GmbH
+
+- replace "int n_sample" by iteration scheduling object "ITERATIONS"
+  (see => IMB_benchmark.h)
+
+*/
+/* ===================================================================== */
+
+void IMB_write_shared(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                      MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-IO benchmark kernel
+                          Driver for write benchmarks with shared file pointers
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    if (c_info->File_rank >= 0) {
+        if (RUN_MODE->AGGREGATE)
+            IMB_write_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
+        else
+            IMB_write_ij(c_info, size, shared, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
+
+        if (RUN_MODE->NONBLOCKING) {
+            MPI_File_close(&c_info->fh);
+            IMB_open_file(c_info);
+
+            if (RUN_MODE->AGGREGATE)
+                IMB_iwrite_ij(c_info, size, shared, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time + 1);
+            else
+                IMB_iwrite_ij(c_info, size, shared, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, 1, time + 1);
+        }
+    }
+}
+
+/*************************************************************************/
+
+void IMB_write_indv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-IO benchmark kernel
+                          Driver for write benchmarks with individual file pointers
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    if (c_info->File_rank >= 0) {
+        if (RUN_MODE->AGGREGATE)
+            IMB_write_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
+        else
+            IMB_write_ij(c_info, size, indv_block, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
+
+        if (RUN_MODE->NONBLOCKING) {
+            MPI_File_close(&c_info->fh);
+            IMB_open_file(c_info);
+
+            if (RUN_MODE->AGGREGATE)
+                IMB_iwrite_ij(c_info, size, indv_block, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time + 1);
+            else
+                IMB_iwrite_ij(c_info, size, indv_block, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, 1, time + 1);
+        }
+    }
+}
+
+/*************************************************************************/
+
+void IMB_write_expl(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
+                    MODES RUN_MODE, double* time) {
+/*
+
+                          MPI-IO benchmark kernel
+                          Driver for write benchmarks with explicit offsets
+
+Input variables:
+
+-c_info                   (type struct comm_info*)
+                          Collection of all base data for MPI;
+                          see [1] for more information
+
+-size                     (type int)
+                          Basic message size in bytes
+
+-ITERATIONS               (type struct iter_schedule *)
+                          Repetition scheduling
+
+-RUN_MODE                 (type MODES)
+                          Mode (aggregate/non aggregate; blocking/nonblocking);
+                          see "IMB_benchmark.h" for definition
+
+Output variables:
+
+-time                     (type double*)
+                          Timing result per sample
+
+*/
+    if (c_info->File_rank >= 0) {
+        if (RUN_MODE->AGGREGATE)
+            IMB_write_ij(c_info, size, explic, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, time);
+        else
+            IMB_write_ij(c_info, size, explic, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, time);
+
+        if (RUN_MODE->NONBLOCKING) {
+            MPI_File_close(&c_info->fh);
+            IMB_open_file(c_info);
+
+            if (RUN_MODE->AGGREGATE)
+                IMB_iwrite_ij(c_info, size, explic, RUN_MODE->type, 1, ITERATIONS->n_sample, 1, 1, time + 1);
+            else
+                IMB_iwrite_ij(c_info, size, explic, RUN_MODE->type, ITERATIONS->n_sample, 1, 0, 1, time + 1);
+        }
+    }
+}
+
+void IMB_write_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                  BTYPES type, int i_sample, int j_sample,
+                  int time_inner, double* time) {
+/*
+
+                          Calls the proper write functions, blocking case
+
+                          (See IMB_read_ij for documentation of calling sequence)
+
+*/
+    int i, j;
+    int Locsize, Totalsize, Ioffs;
+    MPI_Status stat;
+    MPI_Offset Offset;
+
+    ierr = 0;
+
+    *time = 0.;
+    if (c_info->File_rank >= 0) {
+        int(*GEN_File_write)(MPI_File fh, const void* buf, int count,
+            MPI_Datatype datatype, MPI_Status *status);
+        int(*GEN_File_write_shared)
+            (MPI_File fh, const void* buf, int count,
+            MPI_Datatype datatype, MPI_Status *status);
+        int(*GEN_File_write_at)
+            (MPI_File fh, MPI_Offset offset, const void* buf, int count,
+            MPI_Datatype datatype, MPI_Status *status);
+
+#ifdef CHECK
+        int chk_mode;
+
+        if (pos == shared && type != Collective)
+            chk_mode = -3;
+        else
+            chk_mode = -2;
+
+        defect = 0.;
+#endif
+        if (type == Collective) {
+            GEN_File_write = MPI_File_write_all;
+            GEN_File_write_shared = MPI_File_write_ordered;
+            GEN_File_write_at = MPI_File_write_at_all;
+#ifdef DEBUG
+            fprintf(unit, "Collective output\n");
+#endif
+        } else {
+            GEN_File_write = MPI_File_write;
+            GEN_File_write_shared = MPI_File_write_shared;
+            GEN_File_write_at = MPI_File_write_at;
+#ifdef DEBUG
+            fprintf(unit, "Non collective output\n");
+#endif
+        }
+
+        Locsize = c_info->split.Locsize;
+        Totalsize = c_info->split.Totalsize;
+        Offset = (MPI_Offset)c_info->split.Offset;
+
+        /*
+        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        BLOCKING COLLECTIVE/NON COLLECTIVE OUTPUT CASES COMBINED
+        (function pointer GEN_File_write_XXX
+        either standard or collective MPI_File_write_XXX
+        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        */
+
+        for (i = 0; i < N_BARR; i++)
+            MPI_Barrier(c_info->File_comm);
+
+        if (!time_inner)
+            *time = MPI_Wtime();
+
+        for (i = 0; i < i_sample; i++) {
+            if (time_inner)
+                *time = MPI_Wtime();
+
+            if (pos == indv_block) {
+                for (j = 0; j < j_sample; j++) {
+                    ierr = GEN_File_write(c_info->fh, c_info->s_buffer, Locsize, c_info->etype, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("Write standard ", c_info, c_info->s_buffer, Locsize, Totalsize, i + j, pos);
+                }
+            } else if (pos == explic) {
+                for (j = 0; j < j_sample; j++) {
+                    Offset = c_info->split.Offset + (MPI_Offset)((i + j)*Totalsize);
+
+                    ierr = GEN_File_write_at
+                        (c_info->fh, Offset, c_info->s_buffer, Locsize, c_info->etype, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("Write explicit ", c_info, c_info->s_buffer, Locsize, Totalsize, i + j, pos);
+                }
+            } else if (pos == shared) {
+                for (j = 0; j < j_sample; j++)
+                {
+
+                    ierr = GEN_File_write_shared
+                        (c_info->fh, c_info->s_buffer, Locsize, c_info->etype, &stat);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("Write shared ", c_info, c_info->s_buffer, Locsize, Totalsize, i + j, pos);
+
+                }
+            }
+            // IMB_3.1 fix: use the following triple operation to assure write completion
+            MPI_File_sync(c_info->fh);
+            MPI_Barrier(c_info->File_comm);
+            MPI_File_sync(c_info->fh);
+
+            if (time_inner)
+                *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+
+            CHK_DIFF("Write_xxx", c_info, c_info->r_buffer, 0,
+                     Locsize, Totalsize, asize,
+                     put, pos, j_sample, time_inner ? -1 : i,
+                     chk_mode, &defect);
+            CHK_STOP;
+
+        }
+
+        if (!time_inner)
+            *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+    }  /* end if (File_comm ) */
+}
+
+
+
+void IMB_iwrite_ij(struct comm_info* c_info, int size, POSITIONING pos,
+                   BTYPES type, int i_sample, int j_sample,
+                   int time_inner, int do_ovrlp, double* time) {
+/*
+
+                          Calls the proper write functions, non blocking case
+
+                          (See IMB_read_ij for documentation of calling sequence)
+
+*/
+    int i, j;
+    int Locsize, Totalsize, Ioffs;
+    MPI_Offset Offset;
+
+    MPI_Status*  STAT, stat;
+    MPI_Request* REQUESTS;
+
+    ierr = 0;
+
+    *time = 0;
+
+    if (c_info->File_rank >= 0) {
+#ifdef CHECK
+        int chk_mode;
+
+        if (pos == shared && type != Collective)
+            chk_mode = -3;
+        else
+            chk_mode = -2;
+
+        defect = 0.;
+#endif
+        Locsize = c_info->split.Locsize;
+        Totalsize = c_info->split.Totalsize;
+        Offset = (MPI_Offset)c_info->split.Offset;
+
+
+        if (type == Collective) {
+            /*
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            NON BLOCKING COLLECTIVE OUTPUT CASES
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            */
+            for (i = 0; i < N_BARR; i++)
+                MPI_Barrier(c_info->File_comm);
+
+            *time = MPI_Wtime();
+
+            if (pos == indv_block) {
+                for (j = 0; j < i_sample*j_sample; j++) {
+                    ierr = MPI_File_write_all_begin
+                        (c_info->fh, c_info->s_buffer, Locsize, c_info->etype);
+                    MPI_ERRHAND(ierr);
+                    DIAGNOSTICS("IWrite coll. ", c_info, c_info->s_buffer, Locsize, Totalsize, j, pos);
+
+                    if (do_ovrlp)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                    ierr = MPI_File_write_all_end
+                        (c_info->fh, c_info->s_buffer, &stat);
+                    MPI_ERRHAND(ierr);
+                }
+            } else if (pos == explic) {
+                for (j = 0; j < i_sample*j_sample; j++) {
+                    Offset = c_info->split.Offset + (MPI_Offset)(j*Totalsize);
+
+                    ierr = MPI_File_write_at_all_begin
+                        (c_info->fh, Offset, c_info->s_buffer, Locsize, c_info->etype);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("IWrite expl coll. ", c_info, c_info->s_buffer, Locsize, Totalsize, j, pos);
+
+                    if (do_ovrlp)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                    ierr = MPI_File_write_at_all_end
+                        (c_info->fh, c_info->s_buffer, &stat);
+                    MPI_ERRHAND(ierr);
+                }
+            } else if (pos == shared) {
+                for (j = 0; j < i_sample*j_sample; j++) {
+                    ierr = MPI_File_write_ordered_begin
+                        (c_info->fh, c_info->s_buffer, Locsize, c_info->etype);
+                    MPI_ERRHAND(ierr);
+
+                    DIAGNOSTICS("IWrite shared coll. ", c_info, c_info->s_buffer, Locsize, Totalsize, j, pos);
+
+                    if (do_ovrlp)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                    ierr = MPI_File_write_ordered_end
+                        (c_info->fh, c_info->s_buffer, &stat);
+                    MPI_ERRHAND(ierr);
+                }
+            }
+            // IMB_3.1 fix: use the following triple operation to assure write completion
+            MPI_File_sync(c_info->fh);
+            MPI_Barrier(c_info->File_comm);
+            MPI_File_sync(c_info->fh);
+
+            *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+
+            CHK_DIFF("Coll. IWrite_xxx", c_info, c_info->r_buffer, 0,
+                     Locsize, Totalsize, asize,
+                     put, pos, i_sample*j_sample, -1,
+                     chk_mode, &defect);
+            CHK_STOP;
+        } else { /* type non Collective */
+            /*
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            NON BLOCKING NON COLLECTIVE OUTPUT CASES
+            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+            */
+            REQUESTS = (MPI_Request*)IMB_v_alloc(j_sample*sizeof(MPI_Request), "IWrite_ij");
+            STAT = (MPI_Status *)IMB_v_alloc(j_sample*sizeof(MPI_Status), "IWrite_ij");
+
+            for (j = 0; j < j_sample; j++)
+                REQUESTS[j] = MPI_REQUEST_NULL;
+
+            for (i = 0; i < N_BARR; i++)
+                MPI_Barrier(c_info->File_comm);
+
+            if (!time_inner)
+                *time = MPI_Wtime();
+
+            for (i = 0; i < i_sample; i++) {
+                if (time_inner)
+                    *time = MPI_Wtime();
+                if (pos == indv_block) {
+                    for (j = 0; j < j_sample; j++)
+                    {
+                        ierr = MPI_File_iwrite(c_info->fh, c_info->s_buffer, Locsize, c_info->etype, &REQUESTS[j]);
+                        MPI_ERRHAND(ierr);
+                        DIAGNOSTICS("IWrite standard ", c_info, c_info->s_buffer, Locsize, Totalsize, i + j, pos);
+                    }
+                } else if (pos == explic) {
+                    for (j = 0; j < j_sample; j++)
+                    {
+                        Offset = c_info->split.Offset + (MPI_Offset)((i + j)*Totalsize);
+
+                        ierr = MPI_File_iwrite_at
+                            (c_info->fh, Offset, c_info->s_buffer, Locsize, c_info->etype, &REQUESTS[j]);
+                        MPI_ERRHAND(ierr);
+
+                        DIAGNOSTICS("IWrite expl ", c_info, c_info->s_buffer, Locsize, Totalsize, i + j, pos);
+                    }
+                } else if (pos == shared) {
+                    for (j = 0; j < j_sample; j++) {
+                        ierr = MPI_File_iwrite_shared
+                            (c_info->fh, c_info->s_buffer, Locsize, c_info->etype, &REQUESTS[j]);
+                        MPI_ERRHAND(ierr);
+
+                        DIAGNOSTICS("IWrite shared ", c_info, c_info->s_buffer, Locsize, Totalsize, i + j, pos);
+                    }
+                }
+
+                if (do_ovrlp)
+                    for (j = 0; j < j_sample; j++)
+                        IMB_cpu_exploit(TARGET_CPU_SECS, 0);
+
+                if (j_sample == 1)
+                    MPI_Wait(REQUESTS, STAT);
+                else
+                    MPI_Waitall(j_sample, REQUESTS, STAT);
+
+
+                // IMB_3.1 fix: use the following triple operation to assure write completion
+                MPI_File_sync(c_info->fh);
+                MPI_Barrier(c_info->File_comm);
+                MPI_File_sync(c_info->fh);
+
+                if (time_inner)
+                    *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+
+                CHK_DIFF("IWrite_xxx", c_info, c_info->r_buffer, 0,
+                         Locsize, Totalsize, asize,
+                         put, pos, j_sample, time_inner ? -1 : i,
+                         chk_mode, &defect);
+                CHK_STOP;
+            }
+            if (!time_inner)
+                *time = (MPI_Wtime() - *time) / (i_sample*j_sample);
+
+            IMB_v_free((void**)&REQUESTS);
+            IMB_v_free((void**)&STAT);
+        }
+    }  /* end if (File_comm ) */
+}
diff --git a/src_c/Makefile b/src_c/Makefile
new file mode 100644
index 00000000..8179b417
--- /dev/null
+++ b/src_c/Makefile
@@ -0,0 +1,175 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+ifeq ($(origin CC),default)
+CC=mpicc
+endif
+
+BINARY=IMB-$(TARGET)
+BUILDDIR=build_$(TARGET)
+
+ifeq "$(TARGET)" "MPI1"
+SRC = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c IMB_mem_manager.c IMB_parse_name_mpi1.c  IMB_benchlist.c IMB_strgs.c \
+IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c \
+IMB_pingpong.c IMB_pingping.c IMB_allreduce.c IMB_reduce_scatter.c IMB_reduce.c IMB_exchange.c IMB_bcast.c IMB_barrier.c IMB_allgather.c IMB_allgatherv.c IMB_gather.c IMB_gatherv.c IMB_scatter.c IMB_scatterv.c IMB_alltoall.c IMB_alltoallv.c IMB_sendrecv.c IMB_init_transfer.c IMB_chk_diff.c IMB_cpu_exploit.c IMB_bandwidth.c
+endif 
+ifeq "$(TARGET)" "EXT"
+SRC = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c  IMB_mem_manager.c IMB_benchlist.c IMB_parse_name_ext.c IMB_strgs.c \
+IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c \
+IMB_window.c IMB_ones_unidir.c IMB_ones_bidir.c IMB_ones_accu.c IMB_init_transfer.c IMB_user_set_info.c IMB_chk_diff.c
+endif
+ifeq "$(TARGET)" "IO"
+SRC = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c  IMB_mem_manager.c IMB_init_file.c IMB_user_set_info.c\
+IMB_benchlist.c IMB_parse_name_io.c IMB_strgs.c \
+IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c IMB_cpu_exploit.c\
+IMB_open_close.c IMB_write.c IMB_read.c IMB_init_transfer.c IMB_chk_diff.c
+CFLAGS += -DMPIIO
+endif
+ifeq "$(TARGET)" "NBC"
+SRC = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c IMB_mem_manager.c IMB_parse_name_nbc.c  IMB_benchlist.c IMB_strgs.c \
+IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c \
+IMB_allreduce.c IMB_reduce_scatter.c IMB_reduce.c IMB_bcast.c IMB_barrier.c IMB_allgather.c IMB_allgatherv.c IMB_gather.c IMB_gatherv.c IMB_scatter.c IMB_scatterv.c IMB_alltoall.c IMB_alltoallv.c IMB_sendrecv.c IMB_init_transfer.c IMB_chk_diff.c IMB_cpu_exploit.c
+endif
+ifeq "$(TARGET)" "RMA"
+SRC = IMB.c IMB_utils.c IMB_declare.c  IMB_init.c  IMB_mem_manager.c IMB_benchlist.c IMB_parse_name_rma.c IMB_strgs.c \
+IMB_err_handler.c IMB_g_info.c  IMB_warm_up.c IMB_output.c  IMB_init_transfer.c IMB_user_set_info.c IMB_chk_diff.c \
+IMB_rma_put.c IMB_cpu_exploit.c IMB_rma_get.c IMB_rma_atomic.c
+endif
+
+ifeq "$(TARGET)" ""
+all:
+	make -f Makefile TARGET=MPI1
+	make -f Makefile TARGET=NBC 
+	make -f Makefile TARGET=RMA 
+	make -f Makefile TARGET=EXT 
+	make -f Makefile TARGET=IO 
+	
+clean:
+	make -f Makefile TARGET=MPI1 clean
+	make -f Makefile TARGET=NBC clean
+	make -f Makefile TARGET=RMA clean
+	make -f Makefile TARGET=EXT clean
+	make -f Makefile TARGET=IO clean
+else
+
+OBJ=$(SRC:%.c=$(BUILDDIR)/%.o)
+
+all: $(BUILDDIR) $(BINARY)
+
+$(BUILDDIR):
+	-mkdir -p $@
+
+$(BINARY): $(OBJ)
+	$(CC) $(LDFLAGS) $^ -o $@
+
+$(BUILDDIR)/%.o: %.c
+	$(CC) $(CFLAGS) -D$(TARGET) -c $< -o $@
+
+clean:
+	rm -rf $(BUILDDIR) $(OBJ) $(BINARY)
+
+
+# DEPENDENCIES
+IMB_declare.h:  IMB_settings.h IMB_comm_info.h IMB_settings_io.h IMB_bnames_mpi1.h \
+                IMB_bnames_ext.h IMB_bnames_io.h IMB_err_check.h IMB_appl_errors.h IMB_mem_info.h
+	touch IMB_declare.h
+
+IMB.o:              IMB_declare.h IMB_benchmark.h
+IMB_init.o:         IMB_declare.h IMB_benchmark.h
+IMB_mem_manager.o:          IMB_declare.h IMB_benchmark.h
+IMB_benchlist.o:        IMB_declare.h IMB_benchmark.h
+IMB_output.o:       IMB_declare.h IMB_benchmark.h
+IMB_warm_up.o:          IMB_declare.h IMB_benchmark.h
+IMB_chk_diff.o:         IMB_declare.h IMB_chk_diff.c
+IMB_declare.o:      IMB_declare.h
+IMB_g_info.o:       IMB_declare.h IMB_benchmark.h
+IMB_err_handler.o:          IMB_declare.h IMB_appl_errors.h
+IMB_init_transfer.o:    IMB_declare.h IMB_benchmark.h IMB_comments.h
+
+# IMB-MPI1:
+IMB_parse_name_mpi1.o:  IMB_declare.h IMB_benchmark.h IMB_comments.h
+IMB_pingping.o:     IMB_declare.h
+IMB_pingpong.o:     IMB_declare.h
+IMB_sendrecv.o:     IMB_declare.h
+IMB_exchange.o:     IMB_declare.h
+IMB_reduce.o:       IMB_declare.h
+IMB_reduce_scatter.o:   IMB_declare.h
+IMB_allreduce.o:            IMB_declare.h
+IMB_bcast.o:        IMB_declare.h
+IMB_allgather.o:        IMB_declare.h
+IMB_allgatherv.o:       IMB_declare.h
+IMB_alltoall.o:        IMB_declare.h
+IMB_alltoallv.o:       IMB_declare.h
+IMB_barrier.o:      IMB_declare.h
+IMB_bandwidth.o:    IMB_declare.h
+
+# IMB-NBC:
+IMB_parse_name_nbc.o:  IMB_declare.h IMB_benchmark.h IMB_comments.h
+
+# IMB-EXT:
+IMB_parse_name_ext.o:   IMB_declare.h IMB_benchmark.h IMB_comments.h
+IMB_window.o:           IMB_declare.h
+IMB_ones_unidir.o:      IMB_declare.h
+IMB_ones_bidir.o:       IMB_declare.h
+IMB_ones_accu.o:        IMB_declare.h
+
+# IMB-IO:
+IMB_parse_name_io.o:     IMB_declare.h IMB_benchmark.h IMB_comments.h
+IMB_init_file.o:         IMB_declare.h IMB_benchmark.h IMB_comments.h
+IMB_open_close.o:        IMB_declare.h IMB_benchmark.h
+IMB_write.o:             IMB_declare.h IMB_benchmark.h
+IMB_read.o:              IMB_declare.h IMB_benchmark.h
+
+#IMB-RMA:
+IMB_parse_name_rma.o:     IMB_declare.h IMB_benchmark.h IMB_comments.h
+IMB_rma_put.o:            IMB_declare.h IMB_benchmark.h
+IMB_rma_get.o:            IMB_declare.h IMB_benchmark.h
+IMB_rma_atomic.o:         IMB_declare.h IMB_benchmark.h
+
+endif
diff --git a/src/make_ict_win b/src_c/Makefile_win
old mode 100755
new mode 100644
similarity index 70%
rename from src/make_ict_win
rename to src_c/Makefile_win
index bb9dbd37..56d98c76
--- a/src/make_ict_win
+++ b/src_c/Makefile_win
@@ -1,10 +1,58 @@
-# Enter root directory of MPI for Windows install
-MPI_HOME    =
-
-MPI_INCLUDE =
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+MPI_HOME    =C:\Users\vvinnits\Desktop\imb\mpi
+
+MPI_INCLUDE =%I_MPI_ROOT%\intel64\include
 LIB_PATH    =
-LIBS        =
-CC          =
+LIBS        =%I_MPI_ROOT%\intel64\lib\release\impi.lib
+CC          =cl.exe
 OPTFLAGS    = /Ox /DWIN_IMB /D_CRT_SECURE_NO_DEPRECATE
 CLINKER     = $(CC)
 LDFLAGS     =
@@ -64,15 +112,15 @@ default: all
 all: IMB-IO  IMB-EXT IMB-MPI1 IMB-NBC IMB-RMA
 
 IMB-MPI1: $(SRC1) IMB_declare.h exe_mpi1
-	$(MAKE) /f make_ict_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=MPI1 OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" MPI1
+	$(MAKE) /f Makefile_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=MPI1 OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" MPI1
 IMB-EXT:$(SRCEXT) IMB_declare.h  exe_ext
-	$(MAKE) /f make_ict_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=EXT OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" EXT
+	$(MAKE) /f Makefile_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=EXT OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" EXT
 IMB-IO: $(SRCIO) IMB_declare.h exe_io
-	$(MAKE) /f make_ict_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=MPIIO OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" IO
+	$(MAKE) /f Makefile_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=MPIIO OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" IO
 IMB-NBC: $(SRCNBC) IMB_declare.h exe_nbc
-	$(MAKE) /f make_ict_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=NBC OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" NBC
+	$(MAKE) /f Makefile_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=NBC OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" NBC
 IMB-RMA: $(SRCRMA) IMB_declare.h exe_rma
-	$(MAKE) /f make_ict_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=RMA OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" RMA
+	$(MAKE) /f Makefile_win MPI_HOME="$(MPI_HOME)" MPI_INCLUDE="$(MPI_INCLUDE)" CPPFLAGS="$(CPPFLAGS)" CC=$(CC) CPP=RMA OPTFLAGS="$(OPTFLAGS)" LDFLAGS="$(LDFLAGS)" LIB_PATH="$(LIB_PATH)" LIBS="$(LIBS)" RMA
 
 exe_io:
 	del /f /q $(OBJIO)
diff --git a/src_cpp/EXT/EXT_benchmark.cpp b/src_cpp/EXT/EXT_benchmark.cpp
new file mode 100644
index 00000000..fcf4b566
--- /dev/null
+++ b/src_cpp/EXT/EXT_benchmark.cpp
@@ -0,0 +1,140 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+#include "smart_ptr.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+#include "original_benchmark.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+#define BENCHMARK(BMRK_FN, BMRK_NAME) template class OriginalBenchmark<BenchmarkSuite<BS_EXT>, BMRK_FN>; \
+DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME(OriginalBenchmark<BenchmarkSuite<BS_EXT>, BMRK_FN>), BMRK_NAME) \
+template<> smart_ptr<Bmark_descr> OriginalBenchmark<BenchmarkSuite<BS_EXT>, BMRK_FN>::descr = NULL; \
+template<> bool OriginalBenchmark<BenchmarkSuite<BS_EXT>, BMRK_FN>::init_description() 
+
+BENCHMARK(IMB_window, Window)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(NO);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    return true;
+}
+
+BENCHMARK(IMB_unidir_get, Unidir_Get)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_unidir_put, Unidir_Put)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_bidir_get, Bidir_Get)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_bidir_put, Bidir_Put)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_accumulate, Accumulate)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(PUT);
+    return true;
+}
diff --git a/src_cpp/EXT/EXT_suite.cpp b/src_cpp/EXT/EXT_suite.cpp
new file mode 100644
index 00000000..cff073b5
--- /dev/null
+++ b/src_cpp/EXT/EXT_suite.cpp
@@ -0,0 +1,566 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#if defined MPI1 || defined NBC || defined MPIIO || defined RMA
+#error Legacy benchmark components can't be linked together
+#endif
+
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <algorithm>
+#include <mpi.h>
+#include "args_parser.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "any.h"
+#include "benchmark_suite.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_EXT, IMB-EXT)
+
+namespace NS_EXT {
+    struct comm_info c_info;
+    struct iter_schedule ITERATIONS;
+    struct GLOBALS glob;
+    bool prepared = false;
+}
+
+bool load_msg_sizes(const char *filename)
+{
+    using namespace NS_EXT;
+
+    FILE*t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    int n_lens = 0;
+    char inp_line[72];
+    while(fgets(inp_line,72,t)) {
+        if( inp_line[0] != '#' && strlen(inp_line)>1 )
+            n_lens++;
+    }
+    fclose(t);
+
+    if (n_lens == 0)
+        return false;
+
+    t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    c_info.n_lens = n_lens;
+
+    char S[32];
+    int sz, isz;
+
+    c_info.msglen = (int *)malloc(n_lens * sizeof(int));
+
+    isz=-1;
+
+    while(fgets(inp_line,72,t)) {
+        S[0]='\0';
+        if( inp_line[0] != '#' && strlen(inp_line)-1 ) {
+            int ierr;
+            sz=0;
+
+            ierr=sscanf(&inp_line[0],"%d%s",&sz,&S[0]);
+            if( ierr<=0 || ierr==EOF || sz<0 ) {
+                ierr=-1;
+            } else if(ierr==2) {
+                if      (S[0]=='k' ||  S[0]=='K') {sz=sz*1024;}
+                else if (S[0]=='m' ||  S[0]=='M') {sz=sz*1024*1024;}
+                else {
+                    ierr=-1;
+                }
+            } /*else if(ierr==2) */
+
+            if( ierr>0 ) {
+                isz++;
+                c_info.msglen[isz]=sz;
+            } else {
+                return false;
+            }
+        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
+    } /*while(fgets(inp_line,72,t))*/
+
+    n_lens = c_info.n_lens = isz + 1;
+    fclose(t);
+
+    if (n_lens==0)
+        return false;
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_EXT>::declare_args(args_parser &parser, std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("npmin", 2).set_caption("NPmin").
+        set_description(
+            "The argument after npmin is NPmin,\n"
+            "the minimum number of processes to run on\n"
+            "(then if IMB is started on NP processes, the process numbers\n"
+            "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
+            "To run on just NP processes, run IMB on NP and select -npmin NP\n"
+            "\n"
+            "Default:\n"
+            "NPmin=2\n");
+    parser.add<int>("multi", -1).set_caption("MultiMode").
+        set_description(
+            "The argument after -multi is MultiMode (0 or 1)\n"
+            "\n"
+            "If -multi is selected, running the N process version of a benchmark\n"
+            "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
+            "\n"
+            "MultiMode only controls default (0) or extensive (1) output charts.\n"
+            "0: only lowest performance groups is output\n"
+            "1: all groups are output\n"
+            "\n"
+            "Default:\n"
+            "multi off\n");
+    parser.add_vector<float>("off_cache", "-1.0,0.0", ',', 1, 2).
+           set_caption("cache_size[,cache_line_size]").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+                "the argument after off_cache can be either 1 single number (cache_size),\n"
+                "or 2 comma separated numbers (cache_size,cache_line_size), or just -1\n"
+                "\n"
+                "By default, without this flag, the communications buffer is\n"
+                "the same within all repetitions of one message size sample;\n"
+                "most likely, cache reusage is yielded and thus throughput results\n"
+                "that might be non realistic.\n"
+                "\n"
+                "With -off_cache, it is attempted to avoid cache reusage.\n"
+                "cache_size is a float for an upper bound of the size of the last level cache in MBytes\n"
+                "cache_line_size is assumed to be the size (Bytes) of a last level cache line\n"
+                "(can be an upper estimate).\n"
+                "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );\n"
+                "when repetitively using messages of a particular size, their addresses are advanced within those\n"
+                "buffers so that a single message is at least 2 cache lines after the end of the previous message.\n"
+                "Only when those buffers have been marched through (eventually), they will re-used from the beginning.\n"
+                "\n"
+                "A cache_size and a cache_line_size are assumed as statically defined\n"
+                "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered\n"
+                "\n"
+                "remark: -off_cache is effective for IMB-RMA, IMB-EXT, but not IMB-IO\n"
+                "\n"
+                "Examples:\n"
+                "-off_cache -1 (use defaults of IMB_mem_info.h);\n"
+                "-off_cache 2.5 (2.5 MB last level cache, default line size);\n"
+                "-off_cache 16,128 (16 MB last level cache, line size 128);\n"
+                "\n"
+                "NOTE: the off_cache mode might also be influenced by eventual internal\n"
+                "caching with the MPI library. This could make the interpretation\n"
+                "intricate.\n"
+                "\n"
+                "Default:\n"
+                "no cache control, data likely to come out of cache most of the time\n");
+    parser.add_vector<int>("iter", "1000,40,100", ',', 1, 3).
+           set_caption("msgspersample[,overall_vol[,msgs_nonaggr]]").
+           set_description(
+                "The argument after -iter can contain from 1 to 3 comma separated values\n"
+                "3 integer numbers override the defaults\n"
+                "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of IMB_settings.h\n"
+                "Examples:\n"
+                "-iter 2000        (override MSGSPERSAMPLE by value 2000)\n"
+                "-iter 1000,100    (override OVERALL_VOL by 100)\n"
+                "-iter 1000,40,150 (override MSGS_NONAGGR by 150)\n"
+                "\n"   
+                "Default:\n"
+                "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR => IMB_settings.h\n");
+    parser.add<string>("iter_policy", "dynamic").set_caption("iter_policy").
+           set_description(
+                "The argument after -iter_policy is a one from possible strings,\n"
+                "Specifying that policy will be used for auto iteration control:\n"
+                "dynamic,multiple_np,auto,off\n"
+                "\n"
+                "Example:\n"
+                "-iter_policy auto\n"
+                "\n"
+                "Default:\n"
+                "dynamic\n");
+    parser.add<float>("time", 10.0f).set_caption("max_runtime per sample").
+           set_description(
+                "The argument after -time is a float, specifying that\n"
+                "a benchmark will run at most that many seconds per message size\n"
+                "the combination with the -iter flag or its defaults is so that always\n"
+                "the maximum number of repetitions is chosen that fulfills all restrictions\n"
+                "\n"
+                "Example:\n"
+                "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, if\n"
+                "the default (or -iter selected) number of repetitions would take longer than that)\n"
+                "\n"
+                "remark: per sample, the rough number of repetitions to fulfill the -time request\n"
+                "is estimated in preparatory runs that use ~ 1 second overhead\n"
+                "\n"
+                "Default:\n"
+                "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10\n"
+                "(new default in IMB_3.2)\n");
+    parser.add<float>("mem", 1.0f).
+           set_caption("max. per process memory for overall message buffers").
+           set_description(
+               "The argument after -mem is a float, specifying that\n"
+               "at most that many GBytes are allocated per process for the message buffers\n"
+               "if the size is exceeded, a warning will be output, stating how much memory\n"
+               "would have been necessary, but the overall run is not interrupted\n"
+               "\n"
+               "Example:\n"
+               "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process)\n"
+               "\n"
+               "Default:\n"
+               "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h\n");
+    parser.add<string>("msglen", "").set_caption("Lengths_file").
+           set_description(
+               "The argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
+               "message lengths, 1 per line\n"
+               "\n"
+               "Default:\n"
+               "no lengths_file, lengths defined by settings.h, settings_io.h\n");
+    parser.add_vector<int>("map", "0x0", 'x', 2, 2).set_caption("PxQ").
+           set_description(
+               "The argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
+               "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
+               "the basic communicator is set up as P by Q process grid\n"
+               "\n"
+               "If, e.g., one runs on N nodes of X processors each, and inserts\n"
+               "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
+               "running PingPong with P=X, Q=2 would measure inter-node performance\n"
+               "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
+               "first priority)\n"
+               "\n"
+               "Default:\n"
+               "Q=1\n");
+    parser.add_vector<int>("msglog", "0:22", ':', 1, 2).
+           set_caption("min_msglog:max_msglog").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+               "the argument after -msglog is min:max, where min and max are non-negative integer numbers,\n"
+               "min < max, min is such that the second smallest data transfer size is max(unit, 2^min)\n"
+               "(the smallest always being 0), where unit = sizeof(float) for reductions, and unit = 1,\n"
+               "otherwise. max is such that 2^max is largest messages size, and max must be less than 31\n");
+    parser.add<bool>("imb_barrier", false).set_caption("on or off").
+           set_description(
+               "Use internal MPI-independent barrier syncronization implementation,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");                   
+    parser.set_default_current_group();
+    return true;
+}
+
+#define BASIC_INPUT_EXPERIMENT 1
+
+template <typename T>
+void preprocess_list(T &list) {
+    T tmp;
+    transform(list.begin(), list.end(), inserter(tmp, tmp.end()), tolower);
+    list = tmp;
+}
+
+template <> bool BenchmarkSuite<BS_EXT>::prepare(const args_parser &parser, const vector<string> &benchs,
+                                                  const vector<string> &unknown_args, std::ostream &output) {
+    using namespace NS_EXT;
+    for (vector<string>::const_iterator it = unknown_args.begin(); it != unknown_args.end(); ++it) {
+        output << "Invalid benchmark name " << *it << endl;
+    }
+    vector<string> all_benchs, spare_benchs = benchs, intersection = benchs;
+    BenchmarkSuite<BS_EXT>::get_full_list(all_benchs);
+    set_operations::exclude(spare_benchs, all_benchs);
+    set_operations::exclude(intersection, spare_benchs);
+    if (intersection.size() == 0)
+        return true;
+
+    prepared = true;
+
+    IMB_set_default(&c_info);
+    IMB_init_pointers(&c_info);
+
+#if BASIC_INPUT_EXPERIMENT == 1
+    {
+        /* run time control as default */
+        ITERATIONS.n_sample=0;
+        ITERATIONS.off_cache=0;
+        ITERATIONS.cache_size=-1;
+        ITERATIONS.s_offs = ITERATIONS.r_offs = 0;
+        ITERATIONS.s_cache_iter = ITERATIONS.r_cache_iter = 1;
+        ITERATIONS.msgspersample=MSGSPERSAMPLE;
+        ITERATIONS.msgs_nonaggr=MSGS_NONAGGR;
+        ITERATIONS.overall_vol=OVERALL_VOL;
+        ITERATIONS.secs=SECS_PER_SAMPLE;
+        ITERATIONS.iter_policy=ITER_POLICY;
+        ITERATIONS.numiters=(int*)NULL;
+
+        MPI_Comm_rank(MPI_COMM_WORLD,&c_info.w_rank);
+        MPI_Comm_size(MPI_COMM_WORLD,&c_info.w_num_procs);
+
+        unit = stdout;
+
+        if( c_info.w_rank == 0 && strlen(OUTPUT_FILENAME) > 0 )
+            unit = fopen(OUTPUT_FILENAME,"w");
+
+        c_info.group_mode = -1;
+        glob.NP_min=2;
+    }  
+    bool cmd_line_error = false;
+
+    // npmin
+    glob.NP_min = parser.get<int>("npmin");
+    if (glob.NP_min <= 0) {
+        cmd_line_error = true;
+    }
+
+    // multi
+    c_info.group_mode = parser.get<int>("multi");
+
+    // off_cache
+    vector<float> csize;
+    parser.get<float>("off_cache", csize);
+    if (csize.size() == 1) {
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = CACHE_LINE_SIZE;
+        if (ITERATIONS.cache_size < 0.0) {
+            ITERATIONS.cache_size = CACHE_SIZE;
+        }
+    } else {
+        assert(csize.size() == 2);
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = (int)csize[1];
+        if (csize[1] != floor(csize[1])) {
+            cmd_line_error = true;
+        }
+    }
+    if (ITERATIONS.cache_size > 0.0)
+        ITERATIONS.off_cache = 1;
+
+    // iter
+    vector<int> given_iter;
+    parser.get<int>("iter", given_iter);
+    ITERATIONS.msgspersample = given_iter[0];
+    ITERATIONS.overall_vol = given_iter[1] * 1024 * 1024;
+    ITERATIONS.msgs_nonaggr = given_iter[2];
+
+    // iter_policy
+    string given_iter_policy = parser.get<string>("iter_policy");
+    if (given_iter_policy == "dynamic") { ITERATIONS.iter_policy = imode_dynamic; }
+    if (given_iter_policy == "off") { ITERATIONS.iter_policy = imode_off; }
+    if (given_iter_policy == "multiple_np") { ITERATIONS.iter_policy = imode_multiple_np; }
+    if (given_iter_policy == "auto") { ITERATIONS.iter_policy = imode_auto; }
+
+    // time
+    ITERATIONS.secs = parser.get<float>("time");
+
+    // mem
+    c_info.max_mem = parser.get<float>("mem");
+
+    // map
+    vector<int> given_map;
+    parser.get<int>("map", given_map);
+    c_info.px = given_map[0];
+    c_info.py = given_map[1];
+    if (c_info.px * c_info.py > c_info.w_num_procs) {
+        cmd_line_error = true;
+    }
+
+    // msglen
+    string given_msglen_filename = parser.get<string>("msglen");
+    if (given_msglen_filename != "") {
+        if (!load_msg_sizes(given_msglen_filename.c_str())) {
+            output << "Sizes File " << given_msglen_filename << " invalid or doesnt exist" << endl;
+            cmd_line_error = true;
+        }
+    }
+
+    // msglog
+    vector<int> given_msglog;
+    parser.get<int>("msglog", given_msglog);
+    if (given_msglog.size() == 1) {
+        c_info.min_msg_log = 0;
+        c_info.max_msg_log = given_msglog[0];
+    } else {
+        c_info.min_msg_log = given_msglog[0];
+        c_info.max_msg_log = given_msglog[1];
+    }
+    const int MAX_INT_LOG = 31;
+    if (c_info.min_msg_log < 0 || c_info.min_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < 0 || c_info.max_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < c_info.min_msg_log)
+        cmd_line_error = true;
+    
+     // imb_barrier
+    IMB_internal_barrier = (parser.get<bool>("imb_barrier") ? 1 : 0);
+
+    if (cmd_line_error)
+        return false;
+
+    if (ITERATIONS.iter_policy != imode_off &&
+        ITERATIONS.iter_policy != imode_invalid &&
+        c_info.n_lens > 0) {
+        ITERATIONS.numiters = (int *)malloc(c_info.n_lens * sizeof(int));
+    }
+
+#endif
+    
+#if BASIC_INPUT_EXPERIMENT == 0
+    struct Bench *BList;
+    char *argv[] = { "" };
+    int argc = 0;
+    IMB_basic_input(&c_info, &BList, &ITERATIONS, &argc, (char ***)argv, &glob.NP_min);
+#endif    
+
+    if (c_info.w_rank == 0 ) {
+        IMB_general_info();
+        fprintf(unit,"\n\n# Calling sequence was: \n\n");
+        string cmd_line;
+        parser.get_command_line(cmd_line);
+        fprintf(unit, "# %s\n\n", cmd_line.c_str());
+        if (c_info.n_lens) {
+            fprintf(unit,"# Message lengths were user defined\n");
+        } else {
+            fprintf(unit,"# Minimum message length in bytes:   %d\n",0);
+            fprintf(unit,"# Maximum message length in bytes:   %d\n", 1<<c_info.max_msg_log);
+        }
+
+        fprintf(unit,"#\n");
+        fprintf(unit,"# MPI_Datatype                   :   MPI_BYTE \n");
+        fprintf(unit,"# MPI_Datatype for reductions    :   MPI_FLOAT\n");
+        fprintf(unit,"# MPI_Op                         :   MPI_SUM  \n");
+        fprintf(unit,"#\n");
+        fprintf(unit,"#\n");
+        fprintf(unit,"\n");
+        fprintf(unit,"# List of Benchmarks to run:\n\n");
+        for (vector<string>::iterator it = intersection.begin(); it != intersection.end(); ++it) {
+            printf("# %s\n", it->c_str());
+            std::vector<std::string> comments = create(it->c_str())->get_comments();
+            for (vector<string>::iterator it_com = comments.begin(); it_com != comments.end(); ++it_com) {
+                printf("#     %s\n", it_com->c_str());
+            }
+        }
+    }
+    return true;
+}
+
+template <> void BenchmarkSuite<BS_EXT>::finalize(const vector<string> &benchs,
+                                                   std::ostream &output) {
+    UNUSED(output);
+    using namespace NS_EXT;
+    if (!prepared)
+        return;
+    for (vector<string>::const_iterator it = benchs.begin(); it != benchs.end(); ++it) {
+        smart_ptr<Benchmark> b = get_instance().create(*it);
+        if (b.get() == NULL) 
+            continue;
+        // do nothing
+    }
+    if (c_info.w_rank == 0) {
+        fprintf(unit,"\n\n# All processes entering MPI_Finalize\n\n");
+    }
+}
+
+template <> void BenchmarkSuite<BS_EXT>::get_bench_list(set<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_EXT>::get_full_list(benchs);
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (set<string>::iterator it = benchs.begin(); it != benchs.end(); ++it) {
+            smart_ptr<Benchmark> b = get_instance().create(*it);
+            if (b.get() == NULL)            
+                continue;
+            if (!b->is_default()) 
+                benchs.erase(it);
+        }
+    }
+}
+
+template <> void BenchmarkSuite<BS_EXT>::get_bench_list(vector<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_EXT>::get_full_list(benchs);
+    if (benchs.size() == 0)
+        return;
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (size_t i = benchs.size() - 1; i != 0; i--) {
+            smart_ptr<Benchmark> b = get_instance().create(benchs[i]);
+            if (b.get() == NULL) {
+                continue;
+            }
+            if (!b->is_default()) 
+                benchs.erase(benchs.begin() + i);
+        }
+    }
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+
+template<> any BenchmarkSuite<BS_EXT>::get_parameter(const std::string &key) {
+    using namespace NS_EXT;
+    any result;
+    HANDLE_PARAMETER(comm_info, c_info);
+    HANDLE_PARAMETER(iter_schedule, ITERATIONS);
+    HANDLE_PARAMETER(GLOBALS, glob);
+    return result;
+}
+
+#ifdef WIN32
+template BenchmarkSuite<BS_EXT>;
+#endif
diff --git a/src_cpp/EXT/Makefile.EXT.mk b/src_cpp/EXT/Makefile.EXT.mk
new file mode 100644
index 00000000..1195aab6
--- /dev/null
+++ b/src_cpp/EXT/Makefile.EXT.mk
@@ -0,0 +1,78 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+include helpers/Makefile.*.mk
+
+override CPPFLAGS += -DEXT
+
+BECHMARK_SUITE_SRC += EXT/EXT_suite.cpp EXT/EXT_benchmark.cpp
+C_SRC = $(C_SRC_DIR)/IMB_benchlist.c \
+$(C_SRC_DIR)/IMB_chk_diff.c \
+$(C_SRC_DIR)/IMB_declare.c \
+$(C_SRC_DIR)/IMB_err_handler.c \
+$(C_SRC_DIR)/IMB_g_info.c \
+$(C_SRC_DIR)/IMB_init.c \
+$(C_SRC_DIR)/IMB_init_transfer.c \
+$(C_SRC_DIR)/IMB_mem_manager.c \
+$(C_SRC_DIR)/IMB_ones_accu.c \
+$(C_SRC_DIR)/IMB_ones_bidir.c \
+$(C_SRC_DIR)/IMB_ones_unidir.c \
+$(C_SRC_DIR)/IMB_output.c \
+$(C_SRC_DIR)/IMB_parse_name_ext.c \
+$(C_SRC_DIR)/IMB_strgs.c \
+$(C_SRC_DIR)/IMB_utils.c \
+$(C_SRC_DIR)/IMB_user_set_info.c \
+$(C_SRC_DIR)/IMB_warm_up.c \
+$(C_SRC_DIR)/IMB_window.c
+C_OBJ=$(subst $(C_SRC_DIR),EXT,$(C_SRC:.c=.o))
+ADDITIONAL_OBJ += $(C_OBJ)
+
+EXT/%.o: $(C_SRC_DIR)/%.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DEXT -c -o $@ $<
diff --git a/src_cpp/EXT/Makefile_win.EXT.mk b/src_cpp/EXT/Makefile_win.EXT.mk
new file mode 100644
index 00000000..ee013f26
--- /dev/null
+++ b/src_cpp/EXT/Makefile_win.EXT.mk
@@ -0,0 +1,89 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+!INCLUDE  helpers/Makefile_win.helpers.mk
+
+CPPFLAGS = $(CPPFLAGS) -DEXT
+CFLAGS = $(CFLAGS) -DEXT
+C_SRC_DIR = ../$(C_SRC_DIR)
+
+C_OBJ = IMB_benchlist.obj \
+             IMB_chk_diff.obj \
+             IMB_declare.obj \
+             IMB_err_handler.obj \
+             IMB_g_info.obj \
+             IMB_init.obj \
+             IMB_init_transfer.obj \
+             IMB_mem_manager.obj \
+             IMB_ones_accu.obj \
+             IMB_ones_bidir.obj \
+             IMB_ones_unidir.obj \
+             IMB_output.obj \
+             IMB_parse_name_ext.obj \
+             IMB_strgs.obj \
+             IMB_utils.obj \
+             IMB_user_set_info.obj \
+             IMB_warm_up.obj \
+             IMB_window.obj
+
+BECHMARK_SUITE_OBJ = EXT_suite.obj \
+                     EXT_benchmark.obj\
+                     imb.obj args_parser.obj \
+                     args_parser_utests.obj \
+                     scope.obj \
+                     benchmark_suites_collection.obj
+
+{$(C_SRC_DIR)/}.c.obj:
+	$(CC) /I"$(MPI_INCLUDE)" $(CFLAGS) -c $(C_SRC_DIR)/$*.c
+
+{../}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c ../$*.cpp
+
+{EXT/}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c EXT/$*.cpp
diff --git a/src_cpp/HALO/Makefile.HALO.mk b/src_cpp/HALO/Makefile.HALO.mk
new file mode 100644
index 00000000..0c3c1ffc
--- /dev/null
+++ b/src_cpp/HALO/Makefile.HALO.mk
@@ -0,0 +1,59 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+override CPPFLAGS += -DHALO
+override CPPFLAGS += -IHALO -IMT
+
+BECHMARK_SUITE_SRC += HALO/halo_suite.cpp HALO/halo_benchmark.cpp 
+
+HEADERS += MT/MT_benchmark.h HALO/halo_benchmark.h
+
+WITH_OPENMP = 1
+
diff --git a/src_cpp/HALO/halo_benchmark.cpp b/src_cpp/HALO/halo_benchmark.cpp
new file mode 100644
index 00000000..fc180208
--- /dev/null
+++ b/src_cpp/HALO/halo_benchmark.cpp
@@ -0,0 +1,75 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+
+#define WITH_VECTOR
+
+#define GLUE_TYPENAME2(A,B) A,B
+#define GLUE_TYPENAME3(A,B,C) A,B,C
+
+#include "halo_benchmark.h"
+
+using namespace std;
+
+namespace ndim_halo_benchmark {
+#include "benchmark_suite.h"    
+
+DECLARE_INHERITED_TEMPLATE(HaloBenchmark<BenchmarkSuite<BS_GENERIC> >, simple_halo)
+    
+}
diff --git a/src_cpp/HALO/halo_benchmark.h b/src_cpp/HALO/halo_benchmark.h
new file mode 100644
index 00000000..93b7e738
--- /dev/null
+++ b/src_cpp/HALO/halo_benchmark.h
@@ -0,0 +1,320 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+#include <mpi.h>
+
+namespace ndim_halo_benchmark {
+
+#include "MT_benchmark.h"
+
+template <class bs>
+class HaloBenchmark : public Benchmark { 
+    public:    
+    MPI_Datatype datatype;
+    size_t datatype_size;
+    std::vector<thread_local_data_t> input;
+    std::vector<int> count;
+    int mode_multiple;
+    int num_threads;
+    int malloc_align;
+    double time_avg, time_min, time_max;
+
+    int rank, nranks;
+    int ndims, required_nranks;
+    std::vector<std::vector<std::vector<void *> > > buffs;
+    std::vector<std::vector<int> > partner;
+    std::vector<int> ranksperdim;
+    std::vector<int> mults;
+    enum { UP=0, DN }; static const int ndirs = 2;
+    enum { SEND=0, RECV }; static const int nsr = 2;
+
+    // linearize
+    int substorank(const std::vector<unsigned int> &subs)
+    {
+        int rank = 0;
+        // last subscript varies fastest
+        for (int i = 0; i < ndims; ++i)
+            rank += mults[i] * subs[i];
+        return rank;
+    }
+    // delinearize
+    void ranktosubs(int rank, std::vector<unsigned int> &subs)
+    {
+        int rem = rank;
+        for (int i = 0; i < ndims; ++i) {
+            int sub = rem / mults[i];
+            rem %= mults[i];
+            subs[i] = sub;
+        }
+    }
+
+    virtual void init() {
+        GET_GLOBAL(vector<thread_local_data_t>, input);
+        GET_GLOBAL(int, mode_multiple);
+        GET_GLOBAL(int, num_threads);
+        GET_GLOBAL(int, malloc_align);
+        GET_GLOBAL(MPI_Datatype, datatype);
+        GET_GLOBAL(std::vector<int>, count);
+
+        VarLenScope *sc = new VarLenScope(count);
+        scope = sc;
+
+        size_t maxlen = scope->get_max_len();
+        int idts;
+        MPI_Type_size(datatype, &idts);
+        size_t datatype_size = idts;
+        size_t buf_size = maxlen * datatype_size;
+
+        GET_GLOBAL(int, rank);
+        GET_GLOBAL(int, nranks);
+        GET_GLOBAL(int, ndims);
+        GET_GLOBAL(int, required_nranks);
+        GET_GLOBAL(std::vector<int>, ranksperdim);
+        GET_GLOBAL(std::vector<int>, mults);
+
+        if (rank >= required_nranks)
+            return;
+        std::vector<unsigned int> mysubs;
+        mysubs.resize(ndims);
+        ranktosubs(rank, mysubs);
+
+        buffs.resize(ndims);
+        for (int i = 0; i < ndims; i++) {
+            buffs[i].resize(ndirs);
+            for (int j = 0; j < ndirs; j++) {
+                buffs[i][j].resize(nsr);
+            }
+        }
+
+        static AlignedAllocator<char> allocator(malloc_align);
+
+        for (int i = 0; i < ndims; ++i)
+            for (int j = UP; j <= DN; ++j) 
+                for (int k = SEND; k <= RECV; ++k) {
+                    buffs[i][j][k] = allocator.Alloc(buf_size);
+                }
+
+        partner.resize(ndims);
+        for (int i = 0; i < ndims; i++) {
+            partner[i].resize(ndirs);
+        }
+
+        // construct the partners
+        for (int dim = 0; dim < ndims; ++dim) {
+            std::vector<unsigned int> partnersubs(ndims);
+            for (int i = 0; i < ndims; ++i) partnersubs[i] = mysubs[i];
+            partnersubs[dim] = (mysubs[dim]+1)%ranksperdim[dim];
+            partner[dim][UP] = substorank(partnersubs);
+            partnersubs[dim] = (ranksperdim[dim]+mysubs[dim]-1)%ranksperdim[dim];
+            partner[dim][DN] = substorank(partnersubs);
+
+        }
+    }
+    struct chunk_t {
+        size_t offset;
+        size_t count;
+    };
+    void split_into_chunks(int count, int num, std::vector<chunk_t> &chunks) {
+        size_t nparts = (count > num) ? num : count;
+
+        size_t base = count / nparts;
+        size_t rest = count % nparts;
+        size_t base_off = 0;
+        chunks.resize(nparts);
+        for (size_t i = 0; i < nparts; i++) {
+            chunks[i].offset = base_off; 
+            base_off += (chunks[i].count = base + (i<rest?1:0)); 
+        }
+    }
+    virtual void run(const scope_item &item) {
+        static int ninvocations = 0;
+        double t, tavg = 0, tmin = 1e6, tmax = 0;
+        int nresults = 0;
+        double transferred_bytes = 0;
+  
+
+        int idts;
+        MPI_Type_size(datatype, &idts);
+        size_t datatype_size = (size_t)idts;
+
+        int actual_nthreads = 1;
+        if (mode_multiple) {
+            std::vector<chunk_t> chunks;
+            split_into_chunks(item.len, num_threads, chunks);
+            // NOTE: actual_nthreads might appear smaller than num_threads for small message
+            // sizes!
+            actual_nthreads = chunks.size();
+            
+        #pragma omp parallel default(shared) num_threads(actual_nthreads)
+            {
+                double t_mp;
+                int result;
+                size_t total_count;
+                run_instance(&input[omp_get_thread_num()], chunks[omp_get_thread_num()], t_mp, result, total_count);
+            #pragma omp critical
+                {
+                    tmax = max(tmax, t_mp);
+                    tmin = min(tmin, t_mp);
+                    tavg = tavg + t_mp;
+                    nresults += result;
+                    transferred_bytes += (double)(total_count * datatype_size);
+                }
+            }
+        } else {
+            chunk_t the_only_chunk = { 0, item.len };
+            size_t total_count;
+            run_instance(&input[0], the_only_chunk, t, nresults, total_count);
+            tavg = tmax = tmin = t;
+            transferred_bytes = (double)(total_count * datatype_size);
+        }
+        MPI_Allreduce(&tavg, &time_avg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+        MPI_Allreduce(&tmin, &time_min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+        MPI_Allreduce(&tmax, &time_max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+        MPI_Allreduce(MPI_IN_PLACE, &nresults, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+        if (nresults) {
+            time_avg /= (double)nresults;
+        }
+        time_avg /= (double)input[0].repeat;
+        time_min /= (double)input[0].repeat;
+        time_max /= (double)input[0].repeat;
+        if (rank == 0) {
+            if (nresults) {
+                if (ninvocations++ == 0) {
+                    std::cout << std::endl;
+                    std::cout << "# Benchmarking " << get_name() << 
+                        " (processes: " << nresults / actual_nthreads << "; threads: " << actual_nthreads <<
+                        "; dimensions: " << ndims << ")" << std::endl;
+                }
+                int ntimes = transferred_bytes / (item.len * datatype_size);
+                std::cout << item.len * datatype_size << "(x" << ntimes << ")(bytes) " << time_avg / 1e-6 << "(usec) " << transferred_bytes / time_avg / 1e6 << "(Mb/s)" << std::endl;
+                
+            }
+        }
+    }
+    virtual void run_instance(thread_local_data_t *input, chunk_t chunk, double &t, int &result, size_t &total_count) {
+        MPI_Comm comm = input->comm;
+        t = 0;
+        result = 0;
+        if (rank >= required_nranks)
+            return;
+
+        int idts;
+        MPI_Type_size(datatype, &idts);
+        size_t datatype_size = (size_t)idts;
+        
+#if 0        
+        MPI_Status status;
+        t = MPI_Wtime();
+        for (int iter = 0; iter < parent::input->repeat; ++iter) {
+            for (int i = 0; i < ndims; ++i) {
+                printf("MPI_Sendrecv(size=%u, send_to=%d, recv_from=%d);\n",
+                    count, partner[i][UP], partner[i][DN]);
+                MPI_Sendrecv(buffs[i][UP][SEND], count, parent::datatype, partner[i][UP], 1,
+                             buffs[i][UP][RECV], count, parent::datatype, partner[i][DN], 1,
+                             MPI_COMM_WORLD, &status);
+                printf("MPI_Sendrecv(size=%u, send_to=%d, recv_from=%d);\n",
+                    count, partner[i][DN], partner[i][UP]);
+                MPI_Sendrecv(buffs[i][DN][SEND], count, parent::datatype, partner[i][DN], 1,
+                             buffs[i][DN][RECV], count, parent::datatype, partner[i][UP], 1,
+                             MPI_COMM_WORLD, &status);
+                totsize += 4 * count;
+            }
+        }
+#else
+        const int maxreqs = 4 * ndims;
+        int nreqs = 0;
+        std::vector<MPI_Request> reqs(maxreqs);
+        for (int iter = 0; iter < input->warmup + input->repeat; ++iter) {
+            if (iter == input->warmup) {
+                t = MPI_Wtime();
+            }
+            nreqs = 0;
+            for (int i = 0; i < ndims; ++i) {
+                if (ranksperdim[i] == 1) 
+                    continue;
+                MPI_Irecv((char *)buffs[i][UP][RECV] + chunk.offset * datatype_size, chunk.count, datatype, 
+                          partner[i][DN], 1, comm, &reqs[nreqs++]);
+//                printf(">> Irecv: %d->%d count=%d\n", partner[i][DN], rank, chunk.count);
+            
+                MPI_Irecv((char *)buffs[i][DN][RECV] + chunk.offset * datatype_size, chunk.count, datatype, 
+                          partner[i][UP], 1, comm, &reqs[nreqs++]);
+//                printf(">> Irecv: %d->%d\n", partner[i][UP], rank);
+            }
+            for (int i = 0; i < ndims; ++i) {
+                if (ranksperdim[i] == 1)
+                    continue;
+                MPI_Isend((char *)buffs[i][UP][SEND] + chunk.offset * datatype_size, chunk.count, datatype, 
+                          partner[i][UP], 1, comm, &reqs[nreqs++]);
+//                printf(">> Isend: %d->%d\n", rank, partner[i][UP]);
+
+                MPI_Isend((char *)buffs[i][DN][SEND] + chunk.offset * datatype_size, chunk.count, datatype, 
+                          partner[i][DN], 1, comm, &reqs[nreqs++]);
+//                printf(">> Isend: %d->%d\n", rank, partner[i][DN]);
+            }
+            assert(nreqs <= maxreqs);
+            result = 1;
+            total_count = nreqs * 
+            MPI_Waitall(nreqs, &reqs[0], MPI_STATUSES_IGNORE);
+        }
+        result = 1;
+        total_count = (size_t)nreqs * (size_t)chunk.count;
+#endif        
+        t = MPI_Wtime() - t;
+    }
+    virtual void finalize() {
+        if (rank >= required_nranks)
+            return;
+    }
+    DEFINE_INHERITED(HaloBenchmark<bs>, bs);
+};
+
+}
+
diff --git a/src_cpp/HALO/halo_suite.cpp b/src_cpp/HALO/halo_suite.cpp
new file mode 100644
index 00000000..0a6d739f
--- /dev/null
+++ b/src_cpp/HALO/halo_suite.cpp
@@ -0,0 +1,233 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <omp.h>
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <math.h>
+#include <stdio.h>
+#include "utils.h"
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "args_parser.h"
+
+namespace ndim_halo_benchmark {
+
+#include "benchmark_suite.h"
+
+
+template <typename integer>
+integer gcd(integer a, integer b) {
+    if (a < 0) a = -a;
+    if (b < 0) b = -b;
+    if (a == 0) return b;
+    while (b != 0) {
+        integer remainder = a % b;
+        a = b;
+        b = remainder;
+    }
+    return a;
+}
+
+#include "MT_types.h"
+
+namespace NS_HALO {
+    std::vector<thread_local_data_t> input;
+    int mode_multiple;
+    int stride;
+    int num_threads;
+    int rank, nranks;
+    bool prepared = false;
+    std::vector<int> count;
+    int malloc_align;
+    bool do_checks;
+    MPI_Datatype datatype;
+    int required_nranks, ndims;
+    std::vector<int> ranksperdim;
+    std::vector<int> mults;
+    std::vector<unsigned int> mysubs;
+    static void fill_in(std::vector<int> &topo)
+    {
+        ndims = topo.size();
+        ranksperdim = topo;
+        {
+            int n = 0;
+            for (int i = 0; i < ndims; ++i) {
+                n = gcd(n, topo[i]);
+            }
+            assert(n > 0);
+            for (int i = 0; i < ndims; ++i) {
+                ranksperdim[i] = topo[i] / n;
+            }
+        }
+        required_nranks = 1;
+        for (int i = 0; i < ndims; ++i)
+            required_nranks *= ranksperdim[i];
+        if (nranks / required_nranks >= (1<<ndims)) {
+            int mult = (int)(pow(nranks, 1.0/ndims));
+            for (int i = 0; i < ndims; ++i)
+                ranksperdim[i] *= mult;
+            required_nranks = 1;
+            for (int i = 0; i < ndims; ++i)
+                required_nranks *= ranksperdim[i];
+        }
+        mults.resize(ndims);
+        mults[ndims - 1] = 1;
+        for (int i = ndims - 2; i >= 0; --i)
+            mults[i] = mults[i + 1] * ranksperdim[i + 1];
+    }
+}
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_GENERIC, ndim_halo_benchmark)
+
+template <> bool BenchmarkSuite<BS_GENERIC>::declare_args(args_parser &parser,
+                                                          std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("stride", 0);
+    parser.add<int>("warmup",  100);
+    parser.add<int>("repeat", 1000);
+    parser.add_vector<int>("count", "1,2,4,8").
+        set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING);
+    parser.add<int>("malloc_align", 64);
+    parser.add<std::string>("datatype", "int").
+        set_caption("int|char");
+    parser.add_vector<int>("topo", "1", '.');
+    parser.set_default_current_group();
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_GENERIC>::prepare(const args_parser &parser, 
+                                                     const std::vector<std::string> &,
+                                                     const std::vector<std::string> &unknown_args,
+                                                     std::ostream &output) {
+    if (unknown_args.size() != 0) {
+        output << "Some unknown options or extra arguments." << std::endl;
+        return false;
+    }
+    using namespace NS_HALO;
+
+    parser.get<int>("count", count);
+    mode_multiple = (parser.get<std::string>("thread_level") == "multiple");
+    stride = parser.get<int>("stride");
+
+    malloc_align = parser.get<int>("malloc_align");
+
+
+    std::string dt = parser.get<std::string>("datatype");
+    if (dt == "int") datatype = MPI_INT;
+    else if (dt == "char") datatype = MPI_CHAR;
+    else {
+        output << get_name() << ": " << "Unknown data type in datatype option" << std::endl;
+        return false;
+    }
+
+    num_threads = 1;
+    if (mode_multiple) {
+#pragma omp parallel default(shared)
+#pragma omp master
+        num_threads = omp_get_num_threads();
+    }
+    input.resize(num_threads);    
+    for (int thread_num = 0; thread_num < num_threads; thread_num++) {
+        input[thread_num].comm = duplicate_comm(mode_multiple, thread_num);
+        input[thread_num].warmup = parser.get<int>("warmup");
+        input[thread_num].repeat = parser.get<int>("repeat");
+    }
+
+    std::vector<int> topo;
+    parser.get<int>("topo", topo);
+
+    // -- HALO specific part
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+
+    fill_in(topo);
+
+    if (required_nranks > nranks && rank == 0) {
+        output << get_name() << ": " << "Not enough ranks, " << required_nranks << " min. required" << std::endl;
+        return false;
+    }
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    prepared = true;
+    return true;
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+
+template<> any BenchmarkSuite<BS_GENERIC>::get_parameter(const std::string &key) {
+    using namespace NS_HALO;
+    assert(prepared);
+    any result;
+    HANDLE_PARAMETER(std::vector<thread_local_data_t>, input);
+    HANDLE_PARAMETER(int, num_threads);   
+    HANDLE_PARAMETER(int, mode_multiple);
+    HANDLE_PARAMETER(int, malloc_align);
+    HANDLE_PARAMETER(MPI_Datatype, datatype);
+    HANDLE_PARAMETER(int, ndims);
+    HANDLE_PARAMETER(int, required_nranks);
+    HANDLE_PARAMETER(std::vector<int>, ranksperdim);
+    HANDLE_PARAMETER(std::vector<int>, mults);
+    HANDLE_PARAMETER(int, rank);
+    HANDLE_PARAMETER(int, nranks);
+    HANDLE_PARAMETER(std::vector<int>, count);
+    return result;
+}
+
+}
+
diff --git a/src_cpp/IO/IO_benchmark.cpp b/src_cpp/IO/IO_benchmark.cpp
new file mode 100644
index 00000000..4acb11e3
--- /dev/null
+++ b/src_cpp/IO/IO_benchmark.cpp
@@ -0,0 +1,709 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+#include "smart_ptr.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+#include "original_benchmark.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+#define BENCHMARK(BMRK_FN, BMRK_NAME) template class OriginalBenchmark<BenchmarkSuite<BS_IO>, BMRK_FN>; \
+DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME(OriginalBenchmark<BenchmarkSuite<BS_IO>, BMRK_FN>), BMRK_NAME) \
+template<> smart_ptr<Bmark_descr> OriginalBenchmark<BenchmarkSuite<BS_IO>, BMRK_FN>::descr = NULL; \
+template<> bool OriginalBenchmark<BenchmarkSuite<BS_IO>, BMRK_FN>::init_description() 
+
+BENCHMARK(IMB_write_indv, S_Write_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_s_iwrite_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_s_iwrite_indv, S_IWrite_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_write_expl, S_Write_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_s_iwrite_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_s_iwrite_expl, S_IWrite_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_write_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_write_indv, P_Write_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_iwrite_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iwrite_indv, P_IWrite_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_write_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_write_shared, P_Write_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_iwrite_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iwrite_shared, P_IWrite_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_write_priv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_write_priv, P_Write_Priv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_iwrite_priv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iwrite_priv, P_IWrite_Priv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_write_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_write_expl, P_Write_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_p_iwrite_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iwrite_expl, P_IWrite_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_c_write_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_write_indv, C_Write_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_c_iwrite_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_iwrite_indv, C_IWrite_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_c_write_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_write_shared, C_Write_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_c_iwrite_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_iwrite_shared, C_IWrite_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_c_write_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_write_expl, C_Write_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_c_iwrite_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_write_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_iwrite_expl, C_IWrite_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_read_indv, S_Read_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_s_iread_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_s_iread_indv, S_IRead_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_read_expl, S_Read_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_s_iread_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_s_iread_expl, S_IRead_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_read_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_read_indv, P_Read_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_iread_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iread_indv, P_IRead_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_read_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_read_shared, P_Read_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_iread_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iread_shared, P_IRead_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_read_priv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_read_priv, P_Read_Priv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_iread_priv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iread_priv, P_IRead_Priv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(PRIVATE);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_read_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_read_expl, P_Read_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_p_iread_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_p_iread_expl, P_IRead_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_c_read_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_read_indv, C_Read_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_c_iread_indv(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_indv(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_iread_indv, C_IRead_Indv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(INDV_BLOCK);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_c_read_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_read_shared, C_Read_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_c_iread_shared(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_shared(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_iread_shared, C_IRead_Shared)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SHARED);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+
+void IMB_c_read_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_read_expl, C_Read_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_c_iread_expl(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_read_expl(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_c_iread_expl, C_IRead_Expl)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_2);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_open_close, Open_Close)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_0);
+    descr->flags.insert(RECVBUF_SIZE_0);
+    descr->flags.insert(SYNC);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(EXPLICIT);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NO);
+    return true;
+}
diff --git a/src_cpp/IO/IO_suite.cpp b/src_cpp/IO/IO_suite.cpp
new file mode 100644
index 00000000..aa426166
--- /dev/null
+++ b/src_cpp/IO/IO_suite.cpp
@@ -0,0 +1,566 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#if defined MPI1 || defined NBC || defined RMA || defined EXT
+#error Legacy benchmark components can't be linked together
+#endif
+
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <algorithm>
+#include <mpi.h>
+#include "args_parser.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "any.h"
+#include "benchmark_suite.h"
+
+extern "C" {
+#include "IMB_settings.h"
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+#include "IMB_declare.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_IO, IMB-IO)
+
+namespace NS_IO {
+    struct comm_info c_info;
+    struct iter_schedule ITERATIONS;
+    struct GLOBALS glob;
+    bool prepared = false;
+}
+
+bool load_msg_sizes(const char *filename)
+{
+    using namespace NS_IO;
+
+    FILE*t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    int n_lens = 0;
+    char inp_line[72];
+    while(fgets(inp_line,72,t)) {
+        if( inp_line[0] != '#' && strlen(inp_line)>1 )
+            n_lens++;
+    }
+    fclose(t);
+
+    if (n_lens == 0)
+        return false;
+
+    t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    c_info.n_lens = n_lens;
+
+    char S[32];
+    int sz, isz;
+
+    c_info.msglen = (int *)malloc(n_lens * sizeof(int));
+
+    isz=-1;
+
+    while(fgets(inp_line,72,t)) {
+        S[0]='\0';
+        if( inp_line[0] != '#' && strlen(inp_line)-1 ) {
+            int ierr;
+            sz=0;
+
+            ierr=sscanf(&inp_line[0],"%d%s",&sz,&S[0]);
+            if( ierr<=0 || ierr==EOF || sz<0 ) {
+                ierr=-1;
+            } else if(ierr==2) {
+                if      (S[0]=='k' ||  S[0]=='K') {sz=sz*1024;}
+                else if (S[0]=='m' ||  S[0]=='M') {sz=sz*1024*1024;}
+                else {
+                    ierr=-1;
+                }
+            } /*else if(ierr==2) */
+
+            if( ierr>0 ) {
+                isz++;
+                c_info.msglen[isz]=sz;
+            } else {
+                return false;
+            }
+        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
+    } /*while(fgets(inp_line,72,t))*/
+
+    n_lens = c_info.n_lens = isz + 1;
+    fclose(t);
+
+    if (n_lens==0)
+        return false;
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_IO>::declare_args(args_parser &parser, std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("npmin", 1).set_caption("NPmin").
+        set_description(
+            "The argument after npmin is NPmin,\n"
+            "the minimum number of processes to run on\n"
+            "(then if IMB is started on NP processes, the process numbers\n"
+            "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
+            "To run on just NP processes, run IMB on NP and select -npmin NP\n"
+            "\n"
+            "Default:\n"
+            "NPmin=2\n");
+    parser.add<int>("multi", -1).set_caption("MultiMode").
+        set_description(
+            "The argument after -multi is MultiMode (0 or 1)\n"
+            "\n"
+            "If -multi is selected, running the N process version of a benchmark\n"
+            "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
+            "\n"
+            "MultiMode only controls default (0) or extensive (1) output charts.\n"
+            "0: only lowest performance groups is output\n"
+            "1: all groups are output\n"
+            "\n"
+            "Default:\n"
+            "multi off\n");
+    parser.add_vector<float>("off_cache", "-1.0,0.0", ',', 1, 2).
+           set_caption("cache_size[,cache_line_size]").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+                "the argument after off_cache can be either 1 single number (cache_size),\n"
+                "or 2 comma separated numbers (cache_size,cache_line_size), or just -1\n"
+                "\n"
+                "By default, without this flag, the communications buffer is\n"
+                "the same within all repetitions of one message size sample;\n"
+                "most likely, cache reusage is yielded and thus throughput results\n"
+                "that might be non realistic.\n"
+                "\n"
+                "With -off_cache, it is attempted to avoid cache reusage.\n"
+                "cache_size is a float for an upper bound of the size of the last level cache in MBytes\n"
+                "cache_line_size is assumed to be the size (Bytes) of a last level cache line\n"
+                "(can be an upper estimate).\n"
+                "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );\n"
+                "when repetitively using messages of a particular size, their addresses are advanced within those\n"
+                "buffers so that a single message is at least 2 cache lines after the end of the previous message.\n"
+                "Only when those buffers have been marched through (eventually), they will re-used from the beginning.\n"
+                "\n"
+                "A cache_size and a cache_line_size are assumed as statically defined\n"
+                "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered\n"
+                "\n"
+                "remark: -off_cache is effective for IMB-RMA, IMB-EXT, but not IMB-IO\n"
+                "\n"
+                "Examples:\n"
+                "-off_cache -1 (use defaults of IMB_mem_info.h);\n"
+                "-off_cache 2.5 (2.5 MB last level cache, default line size);\n"
+                "-off_cache 16,128 (16 MB last level cache, line size 128);\n"
+                "\n"
+                "NOTE: the off_cache mode might also be influenced by eventual internal\n"
+                "caching with the MPI library. This could make the interpretation\n"
+                "intricate.\n"
+                "\n"
+                "Default:\n"
+                "no cache control, data likely to come out of cache most of the time\n");
+    parser.add_vector<int>("iter", "1000,40,100", ',', 1, 3).
+           set_caption("msgspersample[,overall_vol[,msgs_nonaggr]]").
+           set_description(
+                "The argument after -iter can contain from 1 to 3 comma separated values\n"
+                "3 integer numbers override the defaults\n"
+                "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of IMB_settings.h\n"
+                "Examples:\n"
+                "-iter 2000        (override MSGSPERSAMPLE by value 2000)\n"
+                "-iter 1000,100    (override OVERALL_VOL by 100)\n"
+                "-iter 1000,40,150 (override MSGS_NONAGGR by 150)\n"
+                "\n"
+                "Default:\n"
+                "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR => IMB_settings.h\n");
+    parser.add<string>("iter_policy", "dynamic").set_caption("iter_policy").
+           set_description(
+                "The argument after -iter_policy is a one from possible strings,\n"
+                "Specifying that policy will be used for auto iteration control:\n"
+                "dynamic,multiple_np,auto,off\n"
+                "\n"
+                "Example:\n"
+                "-iter_policy auto\n"
+                "\n"
+                "Default:\n"
+                "dynamic\n");
+    parser.add<float>("time", 10.0f).set_caption("max_runtime per sample").
+           set_description(
+                "The argument after -time is a float, specifying that\n"
+                "a benchmark will run at most that many seconds per message size\n"
+                "the combination with the -iter flag or its defaults is so that always\n"
+                "the maximum number of repetitions is chosen that fulfills all restrictions\n"
+                "\n"
+                "Example:\n"
+                "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, if\n"
+                "the default (or -iter selected) number of repetitions would take longer than that)\n"
+                "\n"
+                "remark: per sample, the rough number of repetitions to fulfill the -time request\n"
+                "is estimated in preparatory runs that use ~ 1 second overhead\n"
+                "\n"
+                "Default:\n"
+                "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10\n"
+                "(new default in IMB_3.2)\n");
+    parser.add<float>("mem", 1.0f).
+           set_caption("max. per process memory for overall message buffers").
+           set_description(
+               "The argument after -mem is a float, specifying that\n"
+               "at most that many GBytes are allocated per process for the message buffers\n"
+               "if the size is exceeded, a warning will be output, stating how much memory\n"
+               "would have been necessary, but the overall run is not interrupted\n"
+               "\n"
+               "Example:\n"
+               "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process)\n"
+               "\n"
+               "Default:\n"
+               "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h\n");
+    parser.add<string>("msglen", "").set_caption("Lengths_file").
+           set_description(
+               "The argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
+               "message lengths, 1 per line\n"
+               "\n"
+               "Default:\n"
+               "no lengths_file, lengths defined by settings.h, settings_io.h\n");
+    parser.add_vector<int>("map", "0x0", 'x', 2, 2).set_caption("PxQ").
+           set_description(
+               "The argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
+               "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
+               "the basic communicator is set up as P by Q process grid\n"
+               "\n"
+               "If, e.g., one runs on N nodes of X processors each, and inserts\n"
+               "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
+               "running PingPong with P=X, Q=2 would measure inter-node performance\n"
+               "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
+               "first priority)\n"
+               "\n"
+               "Default:\n"
+               "Q=1\n");
+    parser.add_vector<int>("msglog", "0:22", ':', 1, 2).
+           set_caption("min_msglog:max_msglog").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+               "the argument after -msglog is min:max, where min and max are non-negative integer numbers,\n"
+               "min < max, min is such that the second smallest data transfer size is max(unit, 2^min)\n"
+               "(the smallest always being 0), where unit = sizeof(float) for reductions, and unit = 1,\n"
+               "otherwise. max is such that 2^max is largest messages size, and max must be less than 31\n");
+    parser.add<bool>("imb_barrier", false).set_caption("on or off").
+           set_description(
+               "Use internal MPI-independent barrier syncronization implementation,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");
+    parser.set_default_current_group();
+    return true;
+}
+
+#define BASIC_INPUT_EXPERIMENT 1
+
+template <typename T>
+void preprocess_list(T &list) {
+    T tmp;
+    transform(list.begin(), list.end(), inserter(tmp, tmp.end()), tolower);
+    list = tmp;
+}
+
+template <> bool BenchmarkSuite<BS_IO>::prepare(const args_parser &parser, const vector<string> &benchs,
+                                                  const vector<string> &unknown_args, std::ostream &output) {
+    using namespace NS_IO;
+    for (vector<string>::const_iterator it = unknown_args.begin(); it != unknown_args.end(); ++it) {
+        output << "Invalid benchmark name " << *it << endl;
+    }
+    vector<string> all_benchs, spare_benchs = benchs, intersection = benchs;
+    BenchmarkSuite<BS_IO>::get_full_list(all_benchs);
+    set_operations::exclude(spare_benchs, all_benchs);
+    set_operations::exclude(intersection, spare_benchs);
+    if (intersection.size() == 0)
+        return true;
+
+    prepared = true;
+
+    IMB_set_default(&c_info);
+    IMB_init_pointers(&c_info);
+
+#if BASIC_INPUT_EXPERIMENT == 1
+    {
+        /* run time control as default */
+        ITERATIONS.n_sample=0;
+        ITERATIONS.off_cache=0;
+        ITERATIONS.cache_size=-1;
+        ITERATIONS.s_offs = ITERATIONS.r_offs = 0;
+        ITERATIONS.s_cache_iter = ITERATIONS.r_cache_iter = 1;
+        ITERATIONS.msgspersample=MSGSPERSAMPLE;
+        ITERATIONS.msgs_nonaggr=MSGS_NONAGGR;
+        ITERATIONS.overall_vol=OVERALL_VOL;
+        ITERATIONS.secs=SECS_PER_SAMPLE;
+        ITERATIONS.iter_policy=ITER_POLICY;
+        ITERATIONS.numiters=(int*)NULL;
+
+        MPI_Comm_rank(MPI_COMM_WORLD,&c_info.w_rank);
+        MPI_Comm_size(MPI_COMM_WORLD,&c_info.w_num_procs);
+
+        unit = stdout;
+
+        if( c_info.w_rank == 0 && strlen(OUTPUT_FILENAME) > 0 )
+            unit = fopen(OUTPUT_FILENAME,"w");
+
+        c_info.group_mode = -1;
+        glob.NP_min=2;
+    }  
+    bool cmd_line_error = false;
+
+    // npmin
+    glob.NP_min = parser.get<int>("npmin");
+    if (glob.NP_min <= 0) {
+        cmd_line_error = true;
+    }
+
+    // multi
+    c_info.group_mode = parser.get<int>("multi");
+
+    // off_cache
+    vector<float> csize;
+    parser.get<float>("off_cache", csize);
+    if (csize.size() == 1) {
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = CACHE_LINE_SIZE;
+        if (ITERATIONS.cache_size < 0.0) {
+            ITERATIONS.cache_size = CACHE_SIZE;
+        }
+    } else {
+        assert(csize.size() == 2);
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = (int)csize[1];
+        if (csize[1] != floor(csize[1])) {
+            cmd_line_error = true;
+        }
+    }
+    if (ITERATIONS.cache_size > 0.0)
+        ITERATIONS.off_cache = 1;
+
+    // iter
+    vector<int> given_iter;
+    parser.get<int>("iter", given_iter);
+    ITERATIONS.msgspersample = given_iter[0];
+    ITERATIONS.overall_vol = given_iter[1] * 1024 * 1024;
+    ITERATIONS.msgs_nonaggr = given_iter[2];
+
+    // iter_policy
+    string given_iter_policy = parser.get<string>("iter_policy");
+    if (given_iter_policy == "dynamic") { ITERATIONS.iter_policy = imode_dynamic; }
+    if (given_iter_policy == "off") { ITERATIONS.iter_policy = imode_off; }
+    if (given_iter_policy == "multiple_np") { ITERATIONS.iter_policy = imode_multiple_np; }
+    if (given_iter_policy == "auto") { ITERATIONS.iter_policy = imode_auto; }
+
+    // time
+    ITERATIONS.secs = parser.get<float>("time");
+
+    // mem
+    c_info.max_mem = parser.get<float>("mem");
+
+    // map
+    vector<int> given_map;
+    parser.get<int>("map", given_map);
+    c_info.px = given_map[0];
+    c_info.py = given_map[1];
+    if (c_info.px * c_info.py > c_info.w_num_procs) {
+        cmd_line_error = true;
+    }
+
+    // msglen
+    string given_msglen_filename = parser.get<string>("msglen");
+    if (given_msglen_filename != "") {
+        if (!load_msg_sizes(given_msglen_filename.c_str())) {
+            output << "Sizes File " << given_msglen_filename << " invalid or doesnt exist" << endl;
+            cmd_line_error = true;
+        }
+    }
+
+    // msglog
+    vector<int> given_msglog;
+    parser.get<int>("msglog", given_msglog);
+    if (given_msglog.size() == 1) {
+        c_info.min_msg_log = 0;
+        c_info.max_msg_log = given_msglog[0];
+    } else {
+        c_info.min_msg_log = given_msglog[0];
+        c_info.max_msg_log = given_msglog[1];
+    }
+    const int MAX_INT_LOG = 31;
+    if (c_info.min_msg_log < 0 || c_info.min_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < 0 || c_info.max_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < c_info.min_msg_log)
+        cmd_line_error = true;
+    
+    // imb_barrier
+    IMB_internal_barrier = (parser.get<bool>("imb_barrier") ? 1 : 0);
+
+    if (cmd_line_error)
+        return false;
+
+    if (ITERATIONS.iter_policy != imode_off &&
+        ITERATIONS.iter_policy != imode_invalid &&
+        c_info.n_lens > 0) {
+        ITERATIONS.numiters = (int *)malloc(c_info.n_lens * sizeof(int));
+    }
+//    IMB_cpu_exploit(TARGET_CPU_SECS, 1);
+
+#endif
+    
+#if BASIC_INPUT_EXPERIMENT == 0
+    struct Bench *BList;
+    char *argv[] = { "" };
+    int argc = 0;
+    IMB_basic_input(&c_info, &BList, &ITERATIONS, &argc, (char ***)argv, &glob.NP_min);
+#endif    
+
+    if (c_info.w_rank == 0 ) {
+        IMB_general_info();
+        fprintf(unit,"\n\n# Calling sequence was: \n\n");
+        string cmd_line;
+        parser.get_command_line(cmd_line);
+        fprintf(unit, "# %s\n\n", cmd_line.c_str());
+        if (c_info.n_lens) {
+            fprintf(unit,"# IO lengths were user defined\n");
+        } else {
+            fprintf(unit,"# Minimum io portion in bytes:   %d\n",0);
+            fprintf(unit,"# Maximum io portion in bytes:   %d\n",1<<c_info.max_msg_log);
+        }
+
+        fprintf(unit,"#\n");
+        fprintf(unit,"#\n");
+        fprintf(unit,"#\n");
+        fprintf(unit,"\n");
+        fprintf(unit,"# List of Benchmarks to run:\n\n");
+        for (vector<string>::iterator it = intersection.begin(); it != intersection.end(); ++it) {
+            printf("# %s\n", it->c_str());
+            std::vector<std::string> comments = create(it->c_str())->get_comments();
+            for (vector<string>::iterator it_com = comments.begin(); it_com != comments.end(); ++it_com) {
+                printf("#     %s\n", it_com->c_str());
+            }
+        }
+    }
+    return true;
+}
+
+template <> void BenchmarkSuite<BS_IO>::finalize(const vector<string> &benchs,
+                                                   std::ostream &output) {
+    UNUSED(output);
+    using namespace NS_IO;
+    if (!prepared)
+        return;
+    for (vector<string>::const_iterator it = benchs.begin(); it != benchs.end(); ++it) {
+        smart_ptr<Benchmark> b = get_instance().create(*it);
+        if (b.get() == NULL) 
+            continue;
+        // do nothing
+    }
+    if (c_info.w_rank == 0) {
+        fprintf(unit,"\n\n# All processes entering MPI_Finalize\n\n");
+    }
+}
+
+template <> void BenchmarkSuite<BS_IO>::get_bench_list(set<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_IO>::get_full_list(benchs);
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (set<string>::iterator it = benchs.begin(); it != benchs.end(); ++it) {
+            smart_ptr<Benchmark> b = get_instance().create(*it);
+            if (b.get() == NULL)            
+                continue;
+            if (!b->is_default()) 
+                benchs.erase(it);
+        }
+    }
+}
+
+template <> void BenchmarkSuite<BS_IO>::get_bench_list(vector<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_IO>::get_full_list(benchs);
+    if (benchs.size() == 0)
+        return;
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (size_t i = benchs.size() - 1; i != 0; i--) {
+            smart_ptr<Benchmark> b = get_instance().create(benchs[i]);
+            if (b.get() == NULL) {
+                continue;
+            }
+            if (!b->is_default()) 
+                benchs.erase(benchs.begin() + i);
+        }
+    }
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+
+template<> any BenchmarkSuite<BS_IO>::get_parameter(const std::string &key) {
+    using namespace NS_IO;
+    any result;
+    HANDLE_PARAMETER(comm_info, c_info);
+    HANDLE_PARAMETER(iter_schedule, ITERATIONS);
+    HANDLE_PARAMETER(GLOBALS, glob);
+    return result;
+}
+
+#ifdef WIN32
+template BenchmarkSuite<BS_IO>;
+#endif
diff --git a/src_cpp/IO/Makefile.IO.mk b/src_cpp/IO/Makefile.IO.mk
new file mode 100644
index 00000000..b147d862
--- /dev/null
+++ b/src_cpp/IO/Makefile.IO.mk
@@ -0,0 +1,79 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+include helpers/Makefile.*.mk
+
+override CPPFLAGS += -DMPIIO
+
+BECHMARK_SUITE_SRC += IO/IO_suite.cpp IO/IO_benchmark.cpp
+C_SRC = $(C_SRC_DIR)/IMB_declare.c \
+$(C_SRC_DIR)/IMB_init.c \
+$(C_SRC_DIR)/IMB_mem_manager.c \
+$(C_SRC_DIR)/IMB_benchlist.c \
+$(C_SRC_DIR)/IMB_strgs.c \
+$(C_SRC_DIR)/IMB_err_handler.c \
+$(C_SRC_DIR)/IMB_parse_name_io.c \
+$(C_SRC_DIR)/IMB_g_info.c \
+$(C_SRC_DIR)/IMB_warm_up.c \
+$(C_SRC_DIR)/IMB_open_close.c \
+$(C_SRC_DIR)/IMB_output.c \
+$(C_SRC_DIR)/IMB_utils.c \
+$(C_SRC_DIR)/IMB_init_transfer.c \
+$(C_SRC_DIR)/IMB_init_file.c \
+$(C_SRC_DIR)/IMB_user_set_info.c \
+$(C_SRC_DIR)/IMB_chk_diff.c \
+$(C_SRC_DIR)/IMB_cpu_exploit.c \
+$(C_SRC_DIR)/IMB_read.c \
+$(C_SRC_DIR)/IMB_write.c
+C_OBJ=$(subst $(C_SRC_DIR),IO,$(C_SRC:.c=.o))
+ADDITIONAL_OBJ += $(C_OBJ)
+
+IO/%.o: $(C_SRC_DIR)/%.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DMPIIO -c -o $@ $<
diff --git a/src_cpp/IO/Makefile_win.IO.mk b/src_cpp/IO/Makefile_win.IO.mk
new file mode 100644
index 00000000..da8c6e22
--- /dev/null
+++ b/src_cpp/IO/Makefile_win.IO.mk
@@ -0,0 +1,90 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+!INCLUDE  helpers/Makefile_win.helpers.mk
+
+CPPFLAGS = $(CPPFLAGS) -DMPIIO
+CFLAGS = $(CFLAGS) -DMPIIO
+C_SRC_DIR = ../$(C_SRC_DIR)
+
+C_OBJ = IMB_declare.obj \
+             IMB_init.obj \
+             IMB_mem_manager.obj \
+             IMB_benchlist.obj \
+             IMB_strgs.obj \
+             IMB_err_handler.obj \
+             IMB_parse_name_io.obj \
+             IMB_g_info.obj \
+             IMB_warm_up.obj \
+             IMB_open_close.obj \
+             IMB_output.obj \
+             IMB_utils.obj \
+             IMB_init_transfer.obj \
+             IMB_init_file.obj \
+             IMB_user_set_info.obj \
+             IMB_chk_diff.obj \
+             IMB_cpu_exploit.obj \
+             IMB_read.obj \
+             IMB_write.obj
+
+BECHMARK_SUITE_OBJ = IO_suite.obj \
+                     IO_benchmark.obj\
+                     imb.obj args_parser.obj \
+                     args_parser_utests.obj \
+                     scope.obj \
+                     benchmark_suites_collection.obj
+
+{$(C_SRC_DIR)/}.c.obj:
+	$(CC) /I"$(MPI_INCLUDE)" $(CFLAGS) -c $(C_SRC_DIR)/$*.c
+
+{../}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c ../$*.cpp
+
+{IO/}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c IO/$*.cpp
diff --git a/src_cpp/MPI1/MPI1_benchmark.cpp b/src_cpp/MPI1/MPI1_benchmark.cpp
new file mode 100644
index 00000000..465c9dcd
--- /dev/null
+++ b/src_cpp/MPI1/MPI1_benchmark.cpp
@@ -0,0 +1,318 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+#include "smart_ptr.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+#include "original_benchmark.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+#define BENCHMARK(BMRK_FN, BMRK_NAME) template class OriginalBenchmark<BenchmarkSuite<BS_MPI1>, BMRK_FN>; \
+DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME(OriginalBenchmark<BenchmarkSuite<BS_MPI1>, BMRK_FN>), BMRK_NAME) \
+template<> smart_ptr<Bmark_descr> OriginalBenchmark<BenchmarkSuite<BS_MPI1>, BMRK_FN>::descr = NULL; \
+template<> bool OriginalBenchmark<BenchmarkSuite<BS_MPI1>, BMRK_FN>::init_description() 
+
+
+BENCHMARK(IMB_pingpong, PingPong)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SELECT_SOURCE);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(SCALE_TIME_HALF);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+//    descr->comments.push_back("This is a pingpong benchmark bla bla bla...");
+    return true;
+}
+
+void IMB_pingpong_specificsource(struct comm_info* c_info, int size,
+                                 struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_pingpong(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_pingpong_specificsource, PingPongSpecificSource)
+{
+    descr->flags.insert(SELECT_SOURCE);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(SCALE_TIME_HALF);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+void IMB_pingpong_anysource(struct comm_info* c_info, int size,
+                            struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_pingpong(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_pingpong_anysource, PingPongAnySource)
+{
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(SCALE_TIME_HALF);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_pingping, PingPing)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SELECT_SOURCE);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+void IMB_pingping_specificsource(struct comm_info* c_info, int size,
+                                 struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_pingping(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_pingping_specificsource, PingPingSpecificSource)
+{
+    descr->flags.insert(SELECT_SOURCE);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+void IMB_pingping_anysource(struct comm_info* c_info, int size,
+                            struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_pingping(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_pingping_anysource, PingPingAnySource)
+{
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_sendrecv, Sendrecv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(SCALE_BW_DOUBLE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_exchange, Exchange)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(PARALLEL_TRANSFER);
+    descr->flags.insert(SCALE_BW_FOUR);
+    descr->flags.insert(SENDBUF_SIZE_2I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_allreduce, Allreduce)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(REDUCTION);    
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_reduce, Reduce)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(HAS_ROOT);    
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_reduce_scatter, Reduce_scatter)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(REDUCTION);    
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_allgather, Allgather)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    return true;
+}
+
+BENCHMARK(IMB_allgatherv, Allgatherv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    return true;
+}
+
+BENCHMARK(IMB_gather, Gather)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(HAS_ROOT);
+    return true;
+}
+
+BENCHMARK(IMB_gatherv, Gatherv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(HAS_ROOT);
+    return true;
+}
+
+BENCHMARK(IMB_scatter, Scatter)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    return true;
+}
+
+BENCHMARK(IMB_scatterv, Scatterv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    return true;
+}
+
+BENCHMARK(IMB_alltoall, Alltoall)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    return true;
+}
+
+BENCHMARK(IMB_alltoallv, Alltoallv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    return true;
+}
+
+BENCHMARK(IMB_bcast, Bcast)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_barrier, Barrier)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SYNC);
+    descr->flags.insert(SENDBUF_SIZE_0);
+    descr->flags.insert(RECVBUF_SIZE_0);
+    return true;
+}
+
+BENCHMARK(IMB_uni_bandwidth, Uniband)
+{
+    descr->flags.insert(PARALLEL_TRANSFER_MSG_RATE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
+
+BENCHMARK(IMB_bi_bandwidth, Biband)
+{
+    descr->flags.insert(PARALLEL_TRANSFER_MSG_RATE);
+    descr->flags.insert(SCALE_BW_DOUBLE);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    return true;
+}
diff --git a/src_cpp/MPI1/MPI1_suite.cpp b/src_cpp/MPI1/MPI1_suite.cpp
new file mode 100644
index 00000000..e2bcf8c6
--- /dev/null
+++ b/src_cpp/MPI1/MPI1_suite.cpp
@@ -0,0 +1,760 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#if defined RMA || defined NBC || defined MPIIO || defined EXT
+#error Legacy benchmark components can't be linked together
+#endif
+
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <algorithm>
+#include <mpi.h>
+#include "args_parser.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "any.h"
+#include "benchmark_suite.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_MPI1, IMB-MPI1)
+
+namespace NS_MPI1 {
+    struct comm_info c_info;
+    struct iter_schedule ITERATIONS;
+    struct GLOBALS glob;
+    bool prepared = false;
+}
+
+bool load_msg_sizes(const char *filename)
+{
+    using namespace NS_MPI1;
+
+    FILE*t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    int n_lens = 0;
+    char inp_line[72];
+    while(fgets(inp_line,72,t)) {
+        if( inp_line[0] != '#' && strlen(inp_line)>1 )
+            n_lens++;
+    }
+    fclose(t);
+
+    if (n_lens == 0)
+        return false;
+
+    t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    c_info.n_lens = n_lens;
+
+    char S[32];
+    int sz, isz;
+
+    c_info.msglen = (int *)malloc(n_lens * sizeof(int));
+
+    isz=-1;
+
+    while(fgets(inp_line,72,t)) {
+        S[0]='\0';
+        if( inp_line[0] != '#' && strlen(inp_line)-1 ) {
+            int ierr;
+            sz=0;
+
+            ierr=sscanf(&inp_line[0],"%d%s",&sz,&S[0]);
+            if( ierr<=0 || ierr==EOF || sz<0 ) {
+                ierr=-1;
+            } else if(ierr==2) {
+                if      (S[0]=='k' ||  S[0]=='K') {sz=sz*1024;}
+                else if (S[0]=='m' ||  S[0]=='M') {sz=sz*1024*1024;}
+                else {
+                    ierr=-1;
+                }
+            } /*else if(ierr==2) */
+
+            if( ierr>0 ) {
+                isz++;
+                c_info.msglen[isz]=sz;
+            } else {
+                return false;
+            }
+        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
+    } /*while(fgets(inp_line,72,t))*/
+
+    n_lens = c_info.n_lens = isz + 1;
+    fclose(t);
+
+    if (n_lens==0)
+        return false;
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_MPI1>::declare_args(args_parser &parser, std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("npmin", 2).set_caption("NPmin").
+        set_description(
+            "The argument after npmin is NPmin,\n"
+            "the minimum number of processes to run on\n"
+            "(then if IMB is started on NP processes, the process numbers\n"
+            "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
+            "To run on just NP processes, run IMB on NP and select -npmin NP\n"
+            "\n"
+            "Default:\n"
+            "NPmin=2\n");
+    parser.add<int>("multi", -1).set_caption("MultiMode").
+        set_description(
+            "The argument after -multi is MultiMode (0 or 1)\n"
+            "\n"
+            "If -multi is selected, running the N process version of a benchmark\n"
+            "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
+            "\n"
+            "MultiMode only controls default (0) or extensive (1) output charts.\n"
+            "0: only lowest performance groups is output\n"
+            "1: all groups are output\n"
+            "\n"
+            "Default:\n"
+            "multi off\n");
+    parser.add_vector<float>("off_cache", "-1.0,0.0", ',', 1, 2).
+           set_caption("cache_size[,cache_line_size]").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+                "the argument after off_cache can be either 1 single number (cache_size),\n"
+                "or 2 comma separated numbers (cache_size,cache_line_size), or just -1\n"
+                "\n"
+                "By default, without this flag, the communications buffer is\n"
+                "the same within all repetitions of one message size sample;\n"
+                "most likely, cache reusage is yielded and thus throughput results\n"
+                "that might be non realistic.\n"
+                "\n"
+                "With -off_cache, it is attempted to avoid cache reusage.\n"
+                "cache_size is a float for an upper bound of the size of the last level cache in MBytes\n"
+                "cache_line_size is assumed to be the size (Bytes) of a last level cache line\n"
+                "(can be an upper estimate).\n"
+                "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );\n"
+                "when repetitively using messages of a particular size, their addresses are advanced within those\n"
+                "buffers so that a single message is at least 2 cache lines after the end of the previous message.\n"
+                "Only when those buffers have been marched through (eventually), they will re-used from the beginning.\n"
+                "\n"
+                "A cache_size and a cache_line_size are assumed as statically defined\n"
+                "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered\n"
+                "\n"
+                "remark: -off_cache is effective for IMB-MPI1, IMB-EXT, but not IMB-IO\n"
+                "\n"
+                "Examples:\n"
+                "-off_cache -1 (use defaults of IMB_mem_info.h);\n"
+                "-off_cache 2.5 (2.5 MB last level cache, default line size);\n"
+                "-off_cache 16,128 (16 MB last level cache, line size 128);\n"
+                "\n"
+                "NOTE: the off_cache mode might also be influenced by eventual internal\n"
+                "caching with the MPI library. This could make the interpretation\n"
+                "intricate.\n"
+                "\n"
+                "Default:\n"
+                "no cache control, data likely to come out of cache most of the time\n");
+    parser.add_vector<int>("iter", "1000,40,100", ',', 1, 3).
+           set_caption("msgspersample[,overall_vol[,msgs_nonaggr]]").
+           set_description(
+                "The argument after -iter can contain from 1 to 3 comma separated values\n"
+                "3 integer numbers override the defaults\n"
+                "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of IMB_settings.h\n"
+                "Examples:\n"
+                "-iter 2000        (override MSGSPERSAMPLE by value 2000)\n"
+                "-iter 1000,100    (override OVERALL_VOL by 100)\n"
+                "-iter 1000,40,150 (override MSGS_NONAGGR by 150)\n"
+                "\n"
+                "Default:\n"
+                "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR => IMB_settings.h\n");
+    parser.add<string>("iter_policy", "dynamic").set_caption("iter_policy").
+           set_description(
+                "The argument after -iter_policy is a one from possible strings,\n"
+                "Specifying that policy will be used for auto iteration control:\n"
+                "dynamic,multiple_np,auto,off\n"
+                "\n"
+                "Example:\n"
+                "-iter_policy auto\n"
+                "\n"
+                "Default:\n"
+                "dynamic\n");
+    parser.add<float>("time", 10.0f).set_caption("max_runtime per sample").
+           set_description(
+                "The argument after -time is a float, specifying that\n"
+                "a benchmark will run at most that many seconds per message size\n"
+                "the combination with the -iter flag or its defaults is so that always\n"
+                "the maximum number of repetitions is chosen that fulfills all restrictions\n"
+                "\n"
+                "Example:\n"
+                "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, if\n"
+                "the default (or -iter selected) number of repetitions would take longer than that)\n"
+                "\n"
+                "remark: per sample, the rough number of repetitions to fulfill the -time request\n"
+                "is estimated in preparatory runs that use ~ 1 second overhead\n"
+                "\n"
+                "Default:\n"
+                "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10\n"
+                "(new default in IMB_3.2)\n");
+    parser.add<float>("mem", 1.0f).
+           set_caption("max. per process memory for overall message buffers").
+           set_description(
+               "The argument after -mem is a float, specifying that\n"
+               "at most that many GBytes are allocated per process for the message buffers\n"
+               "if the size is exceeded, a warning will be output, stating how much memory\n"
+               "would have been necessary, but the overall run is not interrupted\n"
+               "\n"
+               "Example:\n"
+               "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process)\n"
+               "\n"
+               "Default:\n"
+               "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h\n");
+    parser.add<string>("msglen", "").set_caption("Lengths_file").
+           set_description(
+               "The argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
+               "message lengths, 1 per line\n"
+               "\n"
+               "Default:\n"
+               "no lengths_file, lengths defined by settings.h, settings_io.h\n");
+    parser.add_vector<int>("map", "1x1", 'x', 2, 2).set_caption("PxQ").
+           set_description(
+               "The argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
+               "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
+               "the basic communicator is set up as P by Q process grid\n"
+               "\n"
+               "If, e.g., one runs on N nodes of X processors each, and inserts\n"
+               "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
+               "running PingPong with P=X, Q=2 would measure inter-node performance\n"
+               "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
+               "first priority)\n"
+               "\n"
+               "Default:\n"
+               "Q=1\n");
+    parser.add_vector<int>("msglog", "0:22", ':', 1, 2).
+           set_caption("min_msglog:max_msglog").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+               "the argument after -msglog is min:max, where min and max are non-negative integer numbers,\n"
+               "min < max, min is such that the second smallest data transfer size is max(unit, 2^min)\n"
+               "(the smallest always being 0), where unit = sizeof(float) for reductions, and unit = 1,\n"
+               "otherwise. max is such that 2^max is largest messages size, and max must be less than 31\n");
+    parser.add<bool>("root_shift", false).set_caption("on or off").
+           set_description(
+               "Controls root change at each iteration step for certain collective benchmarks,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");
+    parser.add<bool>("sync", true).set_caption("on or off").
+           set_description(
+               "Controls whether all processes are syncronized at each iteration step in collective benchmarks,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)"
+               "\n"
+               "Default:\n"
+               "on\n");
+    parser.add<bool>("imb_barrier", false).set_caption("on or off").
+           set_description(
+               "Use internal MPI-independent barrier syncronization implementation,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");
+    parser.add<string>("data_type", "byte").set_caption("data type").
+           set_description(
+                "The argument after -data_type is a one from possible strings,\n"
+                "Specifying that type will be used:\n"
+                "byte,char,int,float\n"
+                "\n"
+                "Example:\n"
+                "-data_type char\n"
+                "\n"
+                "Default:\n"
+                "byte\n");
+    parser.add<string>("red_data_type", "float").set_caption("data type for reductions").
+           set_description(
+                "The argument after -red_data_type is a one from possible strings,\n"
+                "Specifying that type will be used:\n"
+                "char,int,float\n"
+                "\n"
+                "Example:\n"
+                "-red_data_type int\n"
+                "\n"
+                "Default:\n"
+                "float\n");
+    parser.add<string>("contig_type", "base").set_caption("contig type").
+           set_description(
+                "The argument after -contig_type is a one from possible strings,\n"
+                "Specifying that type will be used:\n"
+                "base, base_vec, resize, resize_vec\n"
+                "\n"
+                "base - simple MPI type, like MPI_INT, MPI_CHAR, etc.\n"
+                "base_vec - vector of 'simple MPI type'\n"
+                "resize - simple MPI type with extent(type) = 2 * size(type)\n"
+                "resize_vec - vector of 'resize type'\n"
+                "\n"
+                "Example:\n"
+                "-contig_type resize\n"
+                "\n"
+                "Default:\n"
+                "base\n");
+   parser.add<bool>("zero_size", true).set_caption("on or off").
+           set_description(
+               "Do not run benchmarks with message size 0,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "on\n");
+    parser.set_default_current_group();
+    return true;
+}
+
+#define BASIC_INPUT_EXPERIMENT 1
+
+template <typename T>
+void preprocess_list(T &list) {
+    T tmp;
+    transform(list.begin(), list.end(), inserter(tmp, tmp.end()), tolower);
+    list = tmp;
+}
+
+template <typename T>
+void contig_sum(void* in_buf, void* in_out_buf, int* len, MPI_Datatype* datatype) {
+    MPI_Aint extent;
+    int      size,
+             step,
+             count,
+             i;
+    MPI_Type_extent(*datatype, &extent);
+    MPI_Type_size(*datatype, &size);
+    step = extent / size;
+    count = extent / sizeof(T) * (*len);
+    for (i = 0; i < count ; i += step)
+        ((T*)in_out_buf)[i] += ((T*)in_buf)[i];
+}
+
+MPI_Op get_op(MPI_Datatype type, MPI_Datatype base_type) {
+    MPI_Op op = MPI_SUM;
+    switch (base_type) {
+        case MPI_CHAR:
+            MPI_Op_create(&(contig_sum<char>), 1, &op);
+            break;
+        case MPI_INT:
+            MPI_Op_create(&(contig_sum<int>), 1, &op);
+            break;
+        case MPI_FLOAT:
+            MPI_Op_create(&(contig_sum<float>), 1, &op);
+            break;
+    }
+    return op;
+}
+
+string type_to_name(MPI_Datatype type) {
+    string name = "null";
+    switch (type) {
+        case MPI_BYTE:
+            name = "MPI_BYTE";
+            break;
+        case MPI_CHAR:
+            name = "MPI_CHAR";
+            break;
+        case MPI_INT:
+            name = "MPI_INT";
+            break;
+        case MPI_FLOAT:
+            name = "MPI_FLOAT";
+            break;
+    }
+    return name;
+}
+
+template <> bool BenchmarkSuite<BS_MPI1>::prepare(const args_parser &parser, const vector<string> &benchs,
+                                                  const vector<string> &unknown_args, std::ostream &output) {
+    using namespace NS_MPI1;
+    for (vector<string>::const_iterator it = unknown_args.begin(); it != unknown_args.end(); ++it) {
+        output << "Invalid benchmark name " << *it << endl;
+    }
+    vector<string> all_benchs, spare_benchs = benchs, intersection = benchs;
+    BenchmarkSuite<BS_MPI1>::get_full_list(all_benchs);
+    set_operations::exclude(spare_benchs, all_benchs);
+    set_operations::exclude(intersection, spare_benchs);
+    if (intersection.size() == 0)
+        return true;
+
+    prepared = true;
+
+    IMB_set_default(&c_info);
+    IMB_init_pointers(&c_info);
+
+#if BASIC_INPUT_EXPERIMENT == 1
+    {
+        /* run time control as default */
+        ITERATIONS.n_sample=0;
+        ITERATIONS.off_cache=0;
+        ITERATIONS.cache_size=-1;
+        ITERATIONS.s_offs = ITERATIONS.r_offs = 0;
+        ITERATIONS.s_cache_iter = ITERATIONS.r_cache_iter = 1;
+        ITERATIONS.msgspersample=MSGSPERSAMPLE;
+        ITERATIONS.msgs_nonaggr=MSGS_NONAGGR;
+        ITERATIONS.overall_vol=OVERALL_VOL;
+        ITERATIONS.secs=SECS_PER_SAMPLE;
+        ITERATIONS.iter_policy=ITER_POLICY;
+        ITERATIONS.numiters=(int*)NULL;
+
+        MPI_Comm_rank(MPI_COMM_WORLD,&c_info.w_rank);
+        MPI_Comm_size(MPI_COMM_WORLD,&c_info.w_num_procs);
+
+        unit = stdout;
+
+        if( c_info.w_rank == 0 && strlen(OUTPUT_FILENAME) > 0 )
+            unit = fopen(OUTPUT_FILENAME,"w");
+
+        c_info.group_mode = -1;
+        glob.NP_min=2;
+    }
+    bool cmd_line_error = false;
+
+    // npmin
+    glob.NP_min = parser.get<int>("npmin");
+    if (glob.NP_min <= 0) {
+        cmd_line_error = true;
+    }
+
+    // multi
+    c_info.group_mode = parser.get<int>("multi");
+
+    // off_cache
+    vector<float> csize;
+    parser.get<float>("off_cache", csize);
+    if (csize.size() == 1) {
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = CACHE_LINE_SIZE;
+        if (ITERATIONS.cache_size < 0.0) {
+            ITERATIONS.cache_size = CACHE_SIZE;
+        }
+    } else {
+        assert(csize.size() == 2);
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = (int)csize[1];
+        if (csize[1] != floor(csize[1])) {
+            cmd_line_error = true;
+        }
+    }
+    if (ITERATIONS.cache_size > 0.0)
+        ITERATIONS.off_cache = 1;
+
+    // iter
+    vector<int> given_iter;
+    parser.get<int>("iter", given_iter);
+    ITERATIONS.msgspersample = given_iter[0];
+    ITERATIONS.overall_vol = given_iter[1] * 1024 * 1024;
+    ITERATIONS.msgs_nonaggr = given_iter[2];
+
+    // iter_policy
+    string given_iter_policy = parser.get<string>("iter_policy");
+    if (given_iter_policy == "dynamic") { ITERATIONS.iter_policy = imode_dynamic; }
+    if (given_iter_policy == "off") { ITERATIONS.iter_policy = imode_off; }
+    if (given_iter_policy == "multiple_np") { ITERATIONS.iter_policy = imode_multiple_np; }
+    if (given_iter_policy == "auto") { ITERATIONS.iter_policy = imode_auto; }
+
+    // time
+    ITERATIONS.secs = parser.get<float>("time");
+
+    // mem
+    c_info.max_mem = parser.get<float>("mem");
+
+    // map
+    vector<int> given_map;
+    parser.get<int>("map", given_map);
+    c_info.px = given_map[0];
+    c_info.py = given_map[1];
+    if (c_info.px * c_info.py > c_info.w_num_procs) {
+        cmd_line_error = true;
+    }
+
+    // msglen
+    string given_msglen_filename = parser.get<string>("msglen");
+    if (given_msglen_filename != "") {
+        if (!load_msg_sizes(given_msglen_filename.c_str())) {
+            output << "Sizes File " << given_msglen_filename << " invalid or doesnt exist" << endl;
+            cmd_line_error = true;
+        }
+    }
+
+    // msglog
+    vector<int> given_msglog;
+    parser.get<int>("msglog", given_msglog);
+    if (given_msglog.size() == 1) {
+        c_info.min_msg_log = 0;
+        c_info.max_msg_log = given_msglog[0];
+    } else {
+        c_info.min_msg_log = given_msglog[0];
+        c_info.max_msg_log = given_msglog[1];
+    }
+    const int MAX_INT_LOG = 31;
+    if (c_info.min_msg_log < 0 || c_info.min_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < 0 || c_info.max_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < c_info.min_msg_log)
+        cmd_line_error = true;
+
+    // root_shift
+    c_info.root_shift = (parser.get<bool>("root_shift") ? 1 : 0);
+
+    // sync
+    c_info.sync = (parser.get<bool>("sync") ? 1 : 0);
+
+    // imb_barrier
+    IMB_internal_barrier = (parser.get<bool>("imb_barrier") ? 1 : 0);
+
+    // data_type
+    string given_data_type = parser.get<string>("data_type");
+    if (given_data_type == "byte") {
+        c_info.s_data_type = MPI_BYTE;
+        c_info.r_data_type = MPI_BYTE;
+    } else if (given_data_type == "char") {
+        c_info.s_data_type = MPI_CHAR;
+        c_info.r_data_type = MPI_CHAR;
+    } else if (given_data_type == "int") {
+        c_info.s_data_type = MPI_INT;
+        c_info.r_data_type = MPI_INT;
+    } else if (given_data_type == "float") {
+        c_info.s_data_type = MPI_FLOAT;
+        c_info.r_data_type = MPI_FLOAT;
+    } else {
+        output << "Invalid data_type " << given_data_type << endl;
+        output << "    Set data_type byte" << endl;
+    }
+
+    MPI_Datatype base_dt = c_info.s_data_type;
+
+    // red_data_type
+    string given_red_data_type = parser.get<string>("red_data_type");
+    if (given_red_data_type == "char") {
+        c_info.red_data_type = MPI_CHAR;
+    } else if (given_red_data_type == "int") {
+        c_info.red_data_type = MPI_INT;
+    } else if (given_red_data_type == "float") {
+        c_info.red_data_type = MPI_FLOAT;
+    } else {
+        output << "Invalid red_data_type " << given_red_data_type << endl;
+        output << "    Set red_data_type float" << endl;
+    }
+
+    MPI_Datatype base_red_dt = c_info.red_data_type;
+
+    // contig_type
+    string given_contig_type = parser.get<string>("contig_type");
+    if (given_contig_type == "base") {
+        c_info.size_scale = 1;
+        c_info.contig_type = CT_BASE;
+    } else if (given_contig_type == "base_vec") {
+        c_info.size_scale = 1;
+        c_info.contig_type = CT_BASE_VEC;
+    } else if (given_contig_type == "resize") {
+        int base_dt_size,
+            base_red_dt_size;
+        c_info.contig_type = CT_RESIZE;
+        c_info.size_scale = 2;
+        MPI_Type_size(base_dt, &base_dt_size);
+        MPI_Type_size(base_red_dt, &base_red_dt_size);
+        MPI_Type_create_resized(base_dt, base_dt_size, 2 * base_dt_size, &(c_info.s_data_type));
+        MPI_Type_commit(&(c_info.s_data_type));
+        MPI_Type_create_resized(base_dt, base_dt_size, 2 * base_dt_size, &(c_info.r_data_type));
+        MPI_Type_commit(&(c_info.r_data_type));
+        MPI_Type_create_resized(base_red_dt, base_red_dt_size, 2 * base_red_dt_size, &(c_info.red_data_type));
+        MPI_Type_commit(&(c_info.red_data_type));
+    } else if (given_contig_type == "resize_vec") {
+        int base_dt_size,
+            base_red_dt_size;
+        c_info.contig_type = CT_RESIZE_VEC;
+        c_info.size_scale = 2;
+        MPI_Type_size(base_dt, &base_dt_size);
+        MPI_Type_size(base_red_dt, &base_red_dt_size);
+        MPI_Type_create_resized(base_dt, base_dt_size, 2 * base_dt_size, &(c_info.s_data_type));
+        MPI_Type_commit(&(c_info.s_data_type));
+        MPI_Type_create_resized(base_dt, base_dt_size, 2 * base_dt_size, &(c_info.r_data_type));
+        MPI_Type_commit(&(c_info.r_data_type));
+        MPI_Type_create_resized(base_red_dt, base_red_dt_size, 2 * base_red_dt_size, &(c_info.red_data_type));
+        MPI_Type_commit(&(c_info.red_data_type));
+    } else {
+        output << "Invalid contig_type " << given_contig_type << endl;
+        output << "    Set contig_type base" << endl;
+    }
+
+    // zero_size
+    if (parser.get<bool>("zero_size") == false) {
+        c_info.zero_size = 0;
+    }
+
+    if (c_info.contig_type > 0)
+        c_info.op_type = get_op(c_info.red_data_type, base_red_dt);
+
+    if (cmd_line_error)
+        return false;
+
+    if (ITERATIONS.iter_policy != imode_off &&
+        ITERATIONS.iter_policy != imode_invalid &&
+        c_info.n_lens > 0) {
+        ITERATIONS.numiters = (int *)malloc(c_info.n_lens * sizeof(int));
+    }
+
+#endif
+
+#if BASIC_INPUT_EXPERIMENT == 0
+    struct Bench *BList;
+    char *argv[] = { "" };
+    int argc = 0;
+    IMB_basic_input(&c_info, &BList, &ITERATIONS, &argc, (char ***)argv, &glob.NP_min);
+#endif    
+
+    if (c_info.w_rank == 0 ) {
+        IMB_general_info();
+        fprintf(unit,"\n\n# Calling sequence was: \n\n");
+        string cmd_line;
+        parser.get_command_line(cmd_line);
+        fprintf(unit, "# %s \n\n", cmd_line.c_str());
+        if (c_info.n_lens) {
+            fprintf(unit,"# Message lengths were user defined\n");
+        } else {
+            fprintf(unit,"# Minimum message length in bytes:   %d\n", c_info.zero_size ? 0: 1<<c_info.min_msg_log);
+            fprintf(unit,"# Maximum message length in bytes:   %d\n", 1<<c_info.max_msg_log);
+        }
+
+        fprintf(unit,"#\n");
+        fprintf(unit,"# MPI_Datatype                   :   %s \n", type_to_name(base_dt).c_str());
+        fprintf(unit,"# MPI_Datatype for reductions    :   %s \n", type_to_name(base_red_dt).c_str());
+        fprintf(unit,"# MPI_Op                         :   MPI_SUM  \n");
+        fprintf(unit,"# \n");
+        fprintf(unit,"# \n");
+        fprintf(unit,"\n");
+        fprintf(unit,"# List of Benchmarks to run:\n\n");
+        for (vector<string>::iterator it = intersection.begin(); it != intersection.end(); ++it) {
+            printf("# %s\n", it->c_str());
+        }
+    }
+    return true;
+}
+
+template <> void BenchmarkSuite<BS_MPI1>::finalize(const vector<string> &benchs,
+                                                   std::ostream &output) {
+    UNUSED(output);
+    using namespace NS_MPI1;
+    if (!prepared)
+        return;
+    for (vector<string>::const_iterator it = benchs.begin(); it != benchs.end(); ++it) {
+        smart_ptr<Benchmark> b = get_instance().create(*it);
+        if (b.get() == NULL) 
+            continue;
+        // do nothing
+    }
+    if (c_info.w_rank == 0) {
+        fprintf(unit,"\n\n# All processes entering MPI_Finalize\n\n");
+    }
+}
+
+template <> void BenchmarkSuite<BS_MPI1>::get_bench_list(set<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_MPI1>::get_full_list(benchs);
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (set<string>::iterator it = benchs.begin(); it != benchs.end(); ++it) {
+            smart_ptr<Benchmark> b = get_instance().create(*it);
+            if (b.get() == NULL)            
+                continue;
+            if (!b->is_default()) 
+                benchs.erase(it);
+        }
+    }
+}
+
+template <> void BenchmarkSuite<BS_MPI1>::get_bench_list(vector<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_MPI1>::get_full_list(benchs);
+    if (benchs.size() == 0)
+        return;
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (size_t i = benchs.size() - 1; i != 0; i--) {
+            smart_ptr<Benchmark> b = get_instance().create(benchs[i]);
+            if (b.get() == NULL) {
+                continue;
+            }
+            if (!b->is_default()) 
+                benchs.erase(benchs.begin() + i);
+        }
+    }
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+
+template<> any BenchmarkSuite<BS_MPI1>::get_parameter(const std::string &key) {
+    using namespace NS_MPI1;
+    any result;
+    HANDLE_PARAMETER(comm_info, c_info);
+    HANDLE_PARAMETER(iter_schedule, ITERATIONS);
+    HANDLE_PARAMETER(GLOBALS, glob);
+    return result;
+}
+
+#ifdef WIN32
+template BenchmarkSuite<BS_MPI1>;
+#endif
diff --git a/src_cpp/MPI1/Makefile.MPI1.mk b/src_cpp/MPI1/Makefile.MPI1.mk
new file mode 100644
index 00000000..9525e100
--- /dev/null
+++ b/src_cpp/MPI1/Makefile.MPI1.mk
@@ -0,0 +1,92 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+include helpers/Makefile.*.mk
+
+override CPPFLAGS += -DMPI1
+
+BECHMARK_SUITE_SRC += MPI1/MPI1_suite.cpp MPI1/MPI1_benchmark.cpp
+C_SRC = $(C_SRC_DIR)/IMB_allgather.c \
+$(C_SRC_DIR)/IMB_allgatherv.c \
+$(C_SRC_DIR)/IMB_allreduce.c \
+$(C_SRC_DIR)/IMB_alltoall.c \
+$(C_SRC_DIR)/IMB_alltoallv.c \
+$(C_SRC_DIR)/IMB_bandwidth.c \
+$(C_SRC_DIR)/IMB_barrier.c \
+$(C_SRC_DIR)/IMB_bcast.c \
+$(C_SRC_DIR)/IMB_benchlist.c \
+$(C_SRC_DIR)/IMB_chk_diff.c \
+$(C_SRC_DIR)/IMB_cpu_exploit.c \
+$(C_SRC_DIR)/IMB_declare.c \
+$(C_SRC_DIR)/IMB_err_handler.c \
+$(C_SRC_DIR)/IMB_exchange.c \
+$(C_SRC_DIR)/IMB_gather.c \
+$(C_SRC_DIR)/IMB_gatherv.c \
+$(C_SRC_DIR)/IMB_g_info.c \
+$(C_SRC_DIR)/IMB_init.c \
+$(C_SRC_DIR)/IMB_init_transfer.c \
+$(C_SRC_DIR)/IMB_mem_manager.c \
+$(C_SRC_DIR)/IMB_output.c \
+$(C_SRC_DIR)/IMB_parse_name_mpi1.c \
+$(C_SRC_DIR)/IMB_pingping.c \
+$(C_SRC_DIR)/IMB_pingpong.c \
+$(C_SRC_DIR)/IMB_reduce.c \
+$(C_SRC_DIR)/IMB_reduce_scatter.c \
+$(C_SRC_DIR)/IMB_scatter.c \
+$(C_SRC_DIR)/IMB_scatterv.c \
+$(C_SRC_DIR)/IMB_sendrecv.c \
+$(C_SRC_DIR)/IMB_strgs.c \
+$(C_SRC_DIR)/IMB_utils.c \
+$(C_SRC_DIR)/IMB_warm_up.c
+C_OBJ=$(subst $(C_SRC_DIR),MPI1,$(C_SRC:.c=.o))
+ADDITIONAL_OBJ += $(C_OBJ)
+
+MPI1/%.o: $(C_SRC_DIR)/%.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DMPI1 -c -o $@ $<
diff --git a/src_cpp/MPI1/Makefile_win.MPI1.mk b/src_cpp/MPI1/Makefile_win.MPI1.mk
new file mode 100644
index 00000000..aa9a8a99
--- /dev/null
+++ b/src_cpp/MPI1/Makefile_win.MPI1.mk
@@ -0,0 +1,105 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+!INCLUDE helpers/Makefile_win.helpers.mk
+
+CFLAGS = $(CFLAGS) -DMPI1
+CPPFLAGS = $(CPPFLAGS) -DMPI1
+C_SRC_DIR = ../$(C_SRC_DIR)
+
+C_OBJ = IMB_declare.obj \
+             IMB_init.obj \
+             IMB_mem_manager.obj \
+             IMB_parse_name_mpi1.obj \
+             IMB_benchlist.obj \
+             IMB_err_handler.obj \
+             IMB_g_info.obj \
+             IMB_warm_up.obj \
+             IMB_output.obj \
+             IMB_pingpong.obj \
+             IMB_pingping.obj \
+             IMB_allreduce.obj \
+             IMB_reduce_scatter.obj \
+             IMB_reduce.obj \
+             IMB_exchange.obj \
+             IMB_bcast.obj \
+             IMB_barrier.obj \
+             IMB_allgather.obj \
+             IMB_allgatherv.obj \
+             IMB_alltoall.obj \
+             IMB_alltoallv.obj \
+             IMB_sendrecv.obj \
+             IMB_init_transfer.obj \
+             IMB_chk_diff.obj \
+             IMB_utils.obj \
+             IMB_strgs.obj \
+             IMB_cpu_exploit.obj \
+             IMB_gather.obj \
+             IMB_gatherv.obj \
+             IMB_scatter.obj \
+             IMB_scatterv.obj \
+             IMB_bandwidth.obj
+
+BECHMARK_SUITE_OBJ = MPI1_suite.obj \
+                     MPI1_benchmark.obj\
+                     imb.obj \
+                     args_parser.obj \
+                     args_parser_utests.obj \
+                     scope.obj \
+                     benchmark_suites_collection.obj
+
+{$(C_SRC_DIR)/}.c.obj:
+	$(CC) /I"$(MPI_INCLUDE)" $(CFLAGS) -c $(C_SRC_DIR)/$*.c
+
+{../}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c ../$*.cpp
+
+{MPI1/}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c MPI1/$*.cpp
+
diff --git a/src_cpp/MT/MT_benchmark.cpp b/src_cpp/MT/MT_benchmark.cpp
new file mode 100644
index 00000000..5e3792cd
--- /dev/null
+++ b/src_cpp/MT/MT_benchmark.cpp
@@ -0,0 +1,927 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+#include "MT_types.h"
+#include "MT_benchmark.h" 
+
+#define WITH_VECTOR
+
+#define GLUE_TYPENAME2(A,B) A,B
+#define GLUE_TYPENAME3(A,B,C) A,B,C
+
+#define WRAP(NEWNAME, OLDNAME) int NEWNAME(int repeat, int skip, void *in, void *out, int count, MPI_Datatype type, \
+                                       MPI_Comm comm, int rank, int size, input_benchmark_data *idata, \
+                                        output_benchmark_data *odata) { \
+                                    return OLDNAME(repeat, skip, in, out, count, type, comm, rank, size, idata, odata); \
+}
+
+#define DECLARE_INHERITED_BENCHMARKMT2(BS, FUNC, NAME) template class BenchmarkMT<BS, FUNC>; \
+    DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME3(BenchmarkMT<BS, FUNC>), NAME) \
+    template <> void BenchmarkMT<BS, FUNC >::init_flags() 
+
+#define DECLARE_INHERITED_BENCHMARKMT(BS, FUNC, NAME) template class BenchmarkMT<BS, FUNC>; \
+    DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME2(BenchmarkMT<BS, FUNC>), NAME) \
+    template <> void BenchmarkMT<BS, FUNC >::init_flags() 
+
+
+/* testing convenience macros */
+#define INIT_ARRAY(cond,arr,val) if (idata->checks.check && (cond)) { int type_size; MPI_Type_size(type, &type_size); for (size_t i = 0; i < count * type_size / sizeof(int); i++) ((int *)(arr))[i] = (int)(val); }
+#define CHECK_ARRAY(cond,arr,val) \
+        if (idata->checks.check && (cond)) { int type_size; MPI_Type_size(type, &type_size); for (size_t i = 0; i < count * type_size / sizeof(int); i++) if( ((int *)(arr))[i] != (int)(val) ) { \
+                    if (0) \
+                        fprintf(stderr,"Rank %d tid (%d---?) FAILED at index %ld: got %d, expected %d\n", \
+                                                    rank, 0, i, ((int *)(arr))[i], (int)(val)); \
+                    odata->checks.failures++; \
+                } }
+
+using namespace std;
+
+inline bool set_stride(int rank, int size, int &stride, int &group)
+{
+    if (stride == 0)
+        stride = size/2;
+    if (stride <= 0 || stride > size/2)
+        return false;
+    group = rank / stride;
+    if ((group / 2 == size / (2 * stride)) && (size % (2*stride) != 0))
+        return false;
+    return true;
+}
+
+template <bool set_src, int tag>
+int mt_pt2pt(int repeat, int, void *in, void *out, int count, MPI_Datatype type,
+               MPI_Comm comm, int rank, int size, input_benchmark_data *idata,
+               output_benchmark_data *odata) {
+    int group = 0;
+    int stride = idata->pt2pt.stride;
+    if (!set_stride(rank, size, stride, group))
+        return 0;
+    INIT_ARRAY(true, in, (rank+1)*i);
+    INIT_ARRAY(true, out, -1);
+    int pair = -1;
+    for (int i = 0; i < repeat; i++)
+        if (group % 2 == 0) {
+            pair = rank + stride;
+            MPI_Send(in, count, type, pair, (tag == MPI_ANY_TAG ? 0 : tag), comm);
+            MPI_Recv(out, count, type, set_src ? pair : MPI_ANY_SOURCE, tag, comm, MPI_STATUS_IGNORE);
+        } else {
+            pair = rank - stride;
+            MPI_Recv(out, count, type, set_src ? pair : MPI_ANY_SOURCE, tag, comm, MPI_STATUS_IGNORE);
+            MPI_Send(in, count, type, pair, (tag == MPI_ANY_TAG ? 0 : tag), comm);
+        }
+    CHECK_ARRAY(true, in, (rank+1)*i);
+    CHECK_ARRAY(true, out, (pair+1)*i);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT2(BenchmarkSuite<BS_MT>, GLUE_TYPENAME2(mt_pt2pt<true, 0>), PingPongMT) 
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_2);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+#if 0
+
+WRAP(mt_pt2pt_SSST, GLUE_TYPENAME2(mt_pt2pt<true, 0>))
+
+template class BenchmarkMT<MTBenchmarkSuite, mt_pt2pt<true, 0> >;
+namespace { BenchmarkMT<MTBenchmarkSuite, mt_pt2pt<true, 0> > elem_aaa; } template<> const char *BenhmarkMT<MTBenchmarkSuite, mt_pt2pt<true, 0> >::name = "PingPongMT";
+template <> void BenchmarkMT<MTBenchmarkSuite, mt_pt2pt<true, 0> >::init_flags()
+
+//DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_pt2pt_SSST, PingPongMTSpecificSourceSpecificTag) {
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_2);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}    
+
+DECLARE_INHERITED_BENCHMARKMT2(MTBenchmarkSuite, GLUE_TYPENAME2(mt_pt2pt<true, MPI_ANY_TAG>), PingPongMTSpecificSourceAnyTag)
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_2);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+
+DECLARE_INHERITED_BENCHMARKMT2(MTBenchmarkSuite, GLUE_TYPENAME2(mt_pt2pt<false, 0>), PingPongMTAnySourceSpecificTag)
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_2);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+
+DECLARE_INHERITED_BENCHMARKMT2(MTBenchmarkSuite, GLUE_TYPENAME2(mt_pt2pt<false, MPI_ANY_TAG>), PingPongMTAnySourceAnyTag)
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_2);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+#endif
+
+template <bool set_src, int tag>
+int mt_ipt2pt(int repeat, int, void *in, void *out, int count, MPI_Datatype type,
+                MPI_Comm comm, int rank, int size, input_benchmark_data *idata,
+                output_benchmark_data *odata) {
+    int group = 0;
+    int stride = idata->pt2pt.stride;
+    if (!set_stride(rank, size, stride, group))
+        return 0;
+    INIT_ARRAY(true, in, (rank+1)*i);
+    INIT_ARRAY(true, out, -1);
+    MPI_Request request;
+    int dest = (group % 2 == 0 ? rank+stride : rank-stride);
+    for (int i = 0; i < repeat; i++) {
+        MPI_Isend(in, count, type, dest, tag == MPI_ANY_TAG ? 0 : tag, comm, &request);
+        MPI_Recv(out, count, type, set_src ? dest : MPI_ANY_SOURCE, tag, comm, MPI_STATUS_IGNORE);
+        MPI_Wait(&request, MPI_STATUS_IGNORE);
+    }
+    CHECK_ARRAY(true, in, (rank+1)*i);
+    CHECK_ARRAY(true, out, (dest+1)*i);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT2(BenchmarkSuite<BS_MT>, GLUE_TYPENAME2(mt_ipt2pt<true, 0>), PingPingMT)
+{
+    flags.insert(PT2PT);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+
+template <bool set_src, int tag>
+int mt_sendrecv(int repeat, int, void *in, void *out, int count, MPI_Datatype type,
+                  MPI_Comm comm, int rank, int size, input_benchmark_data *idata,
+                  output_benchmark_data *odata) {
+    int group = 0;
+    int stride = idata->pt2pt.stride;
+    if (!set_stride(rank, size, stride, group))
+            return 0;
+    INIT_ARRAY(true, in, (rank+1)*i);
+    INIT_ARRAY(true, out, -1);
+    int dest = (rank + stride) % size;
+    int src = (rank + size - stride) % size;
+    for (int i = 0; i < repeat; i++) {
+        MPI_Sendrecv(in, count, type, dest, (tag == MPI_ANY_TAG ? 0 : tag),
+                     out, count, type, set_src ? src : MPI_ANY_SOURCE, tag, comm, MPI_STATUS_IGNORE);   
+    }
+    CHECK_ARRAY(true, in, (rank+1)*i);
+    CHECK_ARRAY(true, out, (src+1)*i);
+    return 1;
+}
+DECLARE_INHERITED_BENCHMARKMT2(BenchmarkSuite<BS_MT>, GLUE_TYPENAME2(mt_sendrecv<true, 0>), SendRecvMT)
+{
+    flags.insert(PT2PT);
+    flags.insert(SCALE_BW_TWICE);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+
+void *increment_ptr(void *p, int count, MPI_Datatype type)
+{
+    int type_size = 0;
+    MPI_Type_size(type, &type_size);
+    return (char *)p + count * type_size;
+}
+
+int mt_exchange(int repeat, int, void *in, void *out, int count, MPI_Datatype type,
+                  MPI_Comm comm, int rank, int size, input_benchmark_data *idata,
+                  output_benchmark_data *odata) {
+    int group = 0;
+    int stride = idata->pt2pt.stride;
+    if (!set_stride(rank, size, stride, group))
+            return 0;
+    void *out2 = increment_ptr(out, count, type);
+    INIT_ARRAY(true, in, (rank+1)*i);
+    INIT_ARRAY(true, out, -1);
+    int tag = 0;
+    int right = (rank + stride) % size;
+    int left = (rank + size - stride) % size;
+    MPI_Request requests[2];
+    for (int i = 0; i < repeat; i++) {
+        MPI_Isend(in, count, type, left, tag, comm, &requests[0]);
+        MPI_Isend(in, count, type, right, tag, comm, &requests[1]);
+        MPI_Recv(out, count, type, left, tag, comm, MPI_STATUS_IGNORE);
+        MPI_Recv(out2, count, type, right, tag, comm, MPI_STATUS_IGNORE);
+        MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
+    }
+    CHECK_ARRAY(true, in, (rank+1)*i);
+    CHECK_ARRAY(true, out, (left+1)*i);
+    CHECK_ARRAY(true, out2, (right+1)*i);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT(BenchmarkSuite<BS_MT>, mt_exchange, ExchangeMT)
+{
+    flags.insert(PT2PT);
+    flags.insert(RECV_FROM_2);
+    flags.insert(SCALE_BW_FOUR);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW);
+}
+
+static const int MAX_WIN_SIZE = 100;
+
+template <bool set_src, int tag>    
+int mt_uniband(int repeat, int, void *in, void *out, int count, MPI_Datatype type,
+                 MPI_Comm comm, int rank, int size, input_benchmark_data *idata,
+                 output_benchmark_data *odata) {
+    int group = 0;
+    int stride = idata->pt2pt.stride;
+    if (!set_stride(rank, size, stride, group))
+        return 0;
+    MPI_Request requests[MAX_WIN_SIZE];
+    char ack = 0;
+    INIT_ARRAY(true, in, (rank+1)*i);
+    INIT_ARRAY(true, out, -1);
+    int right = rank + stride;
+    int left = rank - stride;
+    for (int i = 0; i < repeat; i++) {
+        if (group % 2 == 0) {
+            for (int w = 0; w < MAX_WIN_SIZE; w++) {
+                MPI_Isend(in, count, type, right, (tag == MPI_ANY_TAG ? 0 : tag), comm, &requests[w]);
+            }
+            MPI_Waitall(MAX_WIN_SIZE, requests, MPI_STATUSES_IGNORE);
+            MPI_Recv(&ack, 1, MPI_CHAR, right, tag, comm, MPI_STATUS_IGNORE);
+        } else {
+            for (int w = 0; w < MAX_WIN_SIZE; w++) {
+                MPI_Irecv(out, count, type, set_src ? left : MPI_ANY_SOURCE, tag, comm, &requests[w]);
+            }
+            MPI_Waitall(MAX_WIN_SIZE, requests, MPI_STATUSES_IGNORE);
+            MPI_Send(&ack, 1, MPI_CHAR, left, (tag == MPI_ANY_TAG ? 0 : tag), comm);
+        }
+    }
+    CHECK_ARRAY(true, in, (rank+1)*i);
+    CHECK_ARRAY(group % 2 == 1, out, (left+1)*i);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT2(BenchmarkSuite<BS_MT>, GLUE_TYPENAME2(mt_uniband<true, 0>), UniBandMT)
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_100);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW_CUMULATIVE);
+    flags.insert(OUT_MSGRATE_CUMMULATIVE);
+}
+
+template <bool set_src, int tag>    
+int mt_biband(int repeat, int, void *in, void *out, int count, MPI_Datatype type,
+                 MPI_Comm comm, int rank, int size, input_benchmark_data *idata,
+                 output_benchmark_data *odata) {
+    int group = 0;
+    int stride = idata->pt2pt.stride;
+    if (!set_stride(rank, size, stride, group))
+        return 0;
+    MPI_Request requests[2 * MAX_WIN_SIZE];
+    char ack = 0;
+    INIT_ARRAY(1, in, (rank+1)*i);
+    INIT_ARRAY(1, out, -1);
+    int right = rank + stride;
+    int left = rank - stride;
+    for (int i = 0; i < repeat; i++) {
+        if (group % 2 == 0) {
+            for (int w = 0; w < MAX_WIN_SIZE; w++) {
+                MPI_Irecv(out, count, type, set_src ? right : MPI_ANY_SOURCE, tag, comm, &requests[w]);
+            }
+            for (int w = 0; w < MAX_WIN_SIZE; w++) {
+                MPI_Isend(in, count, type, right, (tag == MPI_ANY_TAG ? 0 : tag), comm, &requests[w + MAX_WIN_SIZE]);
+            }
+            MPI_Waitall(2 * MAX_WIN_SIZE, requests, MPI_STATUSES_IGNORE);
+            MPI_Recv(&ack, 1, MPI_CHAR, right, tag, comm, MPI_STATUS_IGNORE);
+        } else {
+            for (int w = 0; w < MAX_WIN_SIZE; w++) {
+                MPI_Irecv(out, count, type, set_src ? left : MPI_ANY_SOURCE, tag, comm, &requests[w]);
+            }
+            for (int w = 0; w < MAX_WIN_SIZE; w++) {
+                MPI_Isend(in, count, type, left, (tag == MPI_ANY_TAG ? 0 : tag), comm, &requests[w + MAX_WIN_SIZE]);
+            }
+            MPI_Waitall(2 * MAX_WIN_SIZE, requests, MPI_STATUSES_IGNORE);
+            MPI_Send(&ack, 1, MPI_CHAR, left, (tag == MPI_ANY_TAG ? 0 : tag), comm);
+        }
+    }
+    CHECK_ARRAY(true, in, (rank+1)*i);
+    CHECK_ARRAY(group % 2 == 0, out, (right+1)*i);
+    CHECK_ARRAY(group % 2 == 1, out, (left+1)*i);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT2(BenchmarkSuite<BS_MT>, GLUE_TYPENAME2(mt_biband<true, 0>), BiBandMT)
+{
+    flags.insert(PT2PT);
+    flags.insert(TIME_DIVIDE_BY_2);
+    flags.insert(TIME_DIVIDE_BY_100);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_AVG);
+    flags.insert(OUT_BW_CUMULATIVE);
+    flags.insert(OUT_MSGRATE_CUMMULATIVE);
+}
+#define MT_COLLECTIVE_BEGIN(NAME) int mt_##NAME(int repeat, int skip, void *in, void *out, int count, MPI_Datatype type, \
+               MPI_Comm comm, int rank, int size, input_benchmark_data *idata, output_benchmark_data *odata) 
+#define MT_CYCLE_BEGIN \
+    double t, sum = 0.0; \
+    for (int i = 0; i < repeat+skip; i++) { \
+        t = MPI_Wtime(); 
+
+#define MT_CYCLE_END        t = MPI_Wtime() - t; \
+        if (i >= skip) sum += t; \
+        idata->barrier.fn_ptr(); \
+        idata->barrier.fn_ptr(); \
+        idata->barrier.fn_ptr(); \
+        idata->barrier.fn_ptr(); \
+        idata->barrier.fn_ptr(); \
+    } \
+    if (odata->timing.time_ptr != NULL)  { \
+        *(odata->timing.time_ptr) = sum; \
+    } \
+
+#define MT_CYCLE_END_NOBARRIER   t = MPI_Wtime() - t; \
+        if (i >= skip) sum += t; \
+    } \
+    if (odata->timing.time_ptr != NULL)  { \
+        *(odata->timing.time_ptr) = sum; \
+    } \
+
+
+MT_COLLECTIVE_BEGIN(barrier) {
+    UNUSED(size);
+    MT_CYCLE_BEGIN
+        if (idata->threading.mode_multiple) {
+#pragma omp barrier            
+        }
+        MPI_Barrier(comm);
+    MT_CYCLE_END_NOBARRIER
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT(BenchmarkSuite<BS_MT>, mt_barrier, BarrierMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(SEND_0);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+
+}
+
+MT_COLLECTIVE_BEGIN(bcast) {
+    UNUSED(size);
+    INIT_ARRAY((idata->collective.root == rank), in, i);
+    INIT_ARRAY((idata->collective.root != rank), out, -1);
+    MT_CYCLE_BEGIN
+        MPI_Bcast((idata->collective.root == rank) ? in : out, count, type, idata->collective.root, comm);
+    MT_CYCLE_END
+    CHECK_ARRAY((idata->collective.root != rank), out, i);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT(BenchmarkSuite<BS_MT>, mt_bcast, BcastMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+
+}
+
+
+MT_COLLECTIVE_BEGIN(reduce) {
+    INIT_ARRAY(1, in, (rank+1)*i);
+    INIT_ARRAY(1, out, -1);
+    MT_CYCLE_BEGIN
+        MPI_Reduce(in, out, count, type, MPI_SUM, idata->collective.root, comm);
+    MT_CYCLE_END
+    CHECK_ARRAY((idata->collective.root == rank), out, size*(size+1)*i/2);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT(BenchmarkSuite<BS_MT>, mt_reduce, ReduceMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);    
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+MT_COLLECTIVE_BEGIN(allreduce) {
+    INIT_ARRAY(1, in, (rank+1)*i);
+    INIT_ARRAY(1, out, -1);
+    MT_CYCLE_BEGIN
+        MPI_Allreduce(in, out, count, type, MPI_SUM, comm);
+    MT_CYCLE_END
+    CHECK_ARRAY(true, out, size*(size+1)*i/2);
+    return 1;
+}
+
+DECLARE_INHERITED_BENCHMARKMT(BenchmarkSuite<BS_MT>, mt_allreduce, AllReduceMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+#if 0
+MT_COLLECTIVE_BEGIN(scatter)
+    MPI_Scatter(in, count, type, out, count, type, idata->collective.root, comm);
+MT_COLLECTIVE_END(scatter)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_scatter, ScatterMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(SEND_TO_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+#ifdef WITH_VECTOR
+MT_COLLECTIVE_BEGIN(scatterv)
+    MPI_Scatterv(in, idata->collective_vector.cnt, idata->collective_vector.displs, type, out, count, type, idata->collective.root, comm);
+MT_COLLECTIVE_END(scatterv)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_scatterv, ScattervMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(COLLECTIVE_VECTOR);
+    flags.insert(SEND_TO_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+MT_COLLECTIVE_BEGIN(reduce_scatter)
+    MPI_Reduce_scatter(in, out, idata->collective_vector.cnt, type, MPI_SUM, comm);
+MT_COLLECTIVE_END(reduce_scatter)
+   
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_reduce_scatter, ReduceScatterMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(COLLECTIVE_VECTOR);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+#endif
+
+MT_COLLECTIVE_BEGIN(gather)
+    MPI_Gather(in, count, type, out, count, type, idata->collective.root, comm);
+MT_COLLECTIVE_END(gather)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_gather, GatherMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(RECV_FROM_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+#ifdef WITH_VECTOR
+MT_COLLECTIVE_BEGIN(gatherv)
+    MPI_Gatherv(in, count, type, out, idata->collective_vector.cnt, idata->collective_vector.displs, type, idata->collective.root, comm);
+MT_COLLECTIVE_END(gatherv)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_gatherv, GathervMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(COLLECTIVE_VECTOR);
+    flags.insert(RECV_FROM_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+#endif
+ 
+MT_COLLECTIVE_BEGIN(allgather)
+    MPI_Allgather(in, count, type, out, count, type, comm);
+MT_COLLECTIVE_END(allgather)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_allgather, AllgatherMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(RECV_FROM_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+#ifdef WITH_VECTOR
+MT_COLLECTIVE_BEGIN(allgatherv)
+    MPI_Allgatherv(in, count, type, out, idata->collective_vector.cnt, idata->collective_vector.displs, type, comm);
+MT_COLLECTIVE_END(allgatherv)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_allgatherv, AllgathervMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(COLLECTIVE_VECTOR);
+    flags.insert(RECV_FROM_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+#endif
+
+MT_COLLECTIVE_BEGIN(alltoall)
+    MPI_Alltoall(in, count, type, out, count, type, comm);
+MT_COLLECTIVE_END(alltoall)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_alltoall, AlltoallMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(SEND_TO_ALL);
+    flags.insert(RECV_FROM_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+
+
+#ifdef WITH_VECTOR
+MT_COLLECTIVE_BEGIN(alltoallv)
+    MPI_Alltoallv(in,  idata->collective_vector.cnt,  idata->collective_vector.displs, type, out, idata->collective_vector.cnt, idata->collective_vector.displs, type, comm);
+MT_COLLECTIVE_END(alltoallv)
+
+DECLARE_INHERITED_BENCHMARKMT(MTBenchmarkSuite, mt_alltoallv, AlltoallvMT)
+{
+    flags.insert(COLLECTIVE);
+    flags.insert(SEPARATE_MEASURING);
+    flags.insert(COLLECTIVE_VECTOR);
+    flags.insert(SEND_TO_ALL);
+    flags.insert(RECV_FROM_ALL);
+    flags.insert(OUT_BYTES);
+    flags.insert(OUT_REPEAT);
+    flags.insert(OUT_TIME_MIN);
+    flags.insert(OUT_TIME_MAX);
+    flags.insert(OUT_TIME_AVG);
+}
+#endif
+
+#endif
+
+#if 0
+
+#include "ransac.cpp"
+#include <math.h>
+
+int mt_bcast(int repeat, void *in, void *out, int count, MPI_Datatype type,
+               MPI_Comm comm, int rank, int size, int root, int stride, double *t_avg) {
+    double t, sum = 0.0;
+    for (int i = 0; i < repeat; i++) {
+        t = MPI_Wtime();
+        MPI_Bcast(in, count, type, 0, comm);
+        t = MPI_Wtime() - t;
+        sum += t;
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+    }
+    sum /= repeat;
+    *t_avg = sum;
+}
+
+#if 0    
+int get_token(double t)
+{
+  double result;
+  int n;
+  result = frexp(t , &n);
+  return (int)floor(result*1000.0) * 100 + abs(n);
+}
+#endif
+
+int mt_bcast(int repeat, void *in, void *out, int count, MPI_Datatype type,
+               MPI_Comm comm, int rank, int size, int root, int stride) {
+    static int prev_count = 0;
+    double t, sum = 0.0;
+#if 0    
+    vector<double> times;
+    typedef pair<double, int> value;
+    times.resize(repeat);
+    map<int, value> times_map;
+#endif    
+    for (int i = 0; i < repeat; i++) {
+        t = MPI_Wtime();
+        MPI_Bcast(in, count, type, 0, comm);
+        t = MPI_Wtime() - t;
+        sum += t;
+#if 0        
+        times[i] = t*1000000.0
+        int token = get_token(t*1000000.0);
+        value old_val = times_map[token];
+        times_map[token] = value(t*1000000.0, old_val.second + 1); 
+#endif        
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+        MPI_Barrier(comm);
+    }
+    // FIXME!!! use this value
+    sum /= repeat;
+
+    if (prev_count == count /*&& repeat > 40*/) {
+    
+//        int instability_mark = 0;
+#if 0
+        // ransac
+        {
+            double avg = 0;
+            for (int i = 0; i < times.size(); i++) {
+                cout << "<< " << times[i] << endl;
+            }        
+            int niter = 0;
+            double threshold = (times[0]+times[2]+times[3]+times[4])/4.0/5.0;
+            Model<double> M = ransac<double, Model<double> >(times, max(threshold, 0.1), 0.99, niter);
+            if (niter > 50)
+                instability_mark++;
+            if (niter > 200) 
+                instability_mark++;
+            MPI_Reduce(&M.N, &avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+            if (rank == 0) {
+                cout << ">> ransac: " << avg / size << endl;
+            }
+        }
+#endif        
+#if 0        
+        // simple avegare
+        {
+            double avg = 0;
+            Model<double> S(0); S.init(times);
+            MPI_Reduce(&S.N, &avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+            if (rank == 0) {
+                cout << ">> avg: " << avg / size << endl;
+            }
+        }
+#endif        
+#if 0
+        // frequency-based
+        {
+            map<int, double> sorted;
+            for (map<int, value>::iterator it = times_map.begin(); it != times_map.end(); ++it) {
+                sorted[it->second.second] = it->second.first;
+            }
+            int prev = 0;
+            vector<double> top_vals;
+            vector<int> top_tokens;
+            vector<int> top_counts;
+            for (map<int, double>::reverse_iterator it = sorted.rbegin(); it != sorted.rend(); ++it) {
+                if (it->first < prev*2/3) {
+                    break;
+                }
+                top_vals.push_back(it->second);
+                cout << ">>>> " << it->second << " " << it->first << endl;
+                top_tokens.push_back(get_token(it->second));
+                top_counts.push_back(it->first);
+                prev = it->first;
+                if (top_vals.size() > 5) 
+                    break;
+            }
+            if (top_vals.size() > 3) {
+                instability_mark++;
+            }
+            if (top_vals.size() > 5) {
+                instability_mark++;
+            }
+            if (top_vals.size() > 3) {
+                top_vals.resize(0);
+#if 0                
+                Model<double> S(0); S.init(times);
+                top_vals.resize(1);
+                top_vals[0] = S.N;
+                top_counts[0] = times.size()/2;
+                top_tokens[0] = get_token(S.N);
+#endif                
+            } else {
+                double min_val = 1e6;
+                int min_idx = 0;
+                for (int i = 0; i < top_vals.size(); i++) {
+                    if (top_vals[i] < min_val) {
+                        min_val = top_vals[i];
+                        min_idx = i;
+                    }
+                }
+                top_vals[0] = top_vals[min_idx];
+                top_counts[0] = top_counts[min_idx];
+                top_tokens[0] = top_tokens[min_idx];
+                top_vals.resize(1);
+            }
+            int tsize = top_vals.size();
+            top_tokens.resize(tsize);
+            top_counts.resize(tsize);
+
+            int all_instability_mark = 0;
+            MPI_Reduce(&instability_mark, &all_instability_mark, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+            if (rank == 0) {
+                cout << ">> instability: " << all_instability_mark << endl;
+            }
+
+            int all_tsize = 0;
+            vector<int> displs(size + 1, 0);
+            MPI_Scan(&tsize, &all_tsize, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Gather(&all_tsize, 1, MPI_INT, &displs[1], 1, MPI_INT, 0, MPI_COMM_WORLD);
+            if (rank == 0)
+                all_tsize = displs[size];
+            else 
+                all_tsize = 0;
+            vector<double> all_top_vals(all_tsize);
+            vector<int> all_top_tokens(all_tsize);
+            vector<int> all_top_counts(all_tsize);
+            vector<int> counts(size);
+            for (int i = 0; i < size; i++) {
+                counts[i] = displs[i+1] - displs[i];
+            }
+            MPI_Gatherv(&top_vals[0], tsize, MPI_DOUBLE, &all_top_vals[0], 
+                        &counts[0], &displs[0], MPI_DOUBLE, 0, MPI_COMM_WORLD);
+            MPI_Gatherv(&top_tokens[0], tsize, MPI_INT, &all_top_tokens[0], 
+                        &counts[0], &displs[0], MPI_INT, 0, MPI_COMM_WORLD);
+            MPI_Gatherv(&top_counts[0], tsize, MPI_INT, &all_top_counts[0], 
+                        &counts[0], &displs[0], MPI_INT, 0, MPI_COMM_WORLD);
+            if (rank == 0) {
+                double sum = 0, min_val = 1e6, max_val = -1e6;
+                int n = 0;
+                map<int, value> all_times_map;
+                for (int i = 0; i < all_tsize; i++) {
+                    cout << "@@ >> " << all_top_tokens[i] << " " << all_top_vals[i] << " " << all_top_counts[i] << endl;
+                    int token = all_top_tokens[i];                  
+                    value old_val = all_times_map[token];
+                    if (old_val.first != 0 && old_val.first != all_top_vals[0])
+                        cout << ">> OOOPS" << endl;
+                    old_val.first = all_top_vals[i];
+                    old_val.second += all_top_counts[i];
+                    all_times_map[token] = old_val;
+
+                    min_val = min(old_val.first, min_val);
+                    max_val = max(old_val.first, max_val);
+                    sum += old_val.first;
+                    n++;
+                }
+/*                
+                map<int, double> all_sorted;
+                for (map<int, value>::iterator it = all_times_map.begin(); it != all_times_map.end(); ++it) {
+                    all_sorted[it->second.second] = it->second.first;
+                }
+                //double sum = 0;
+                //int n = 0, i = 0;
+                for (map<int, double>::reverse_iterator it = all_sorted.rbegin(); it != all_sorted.rend(); ++it) {
+                    cout << "@@ " << it->second << " " << it->first << endl;
+                    sum += it->second;
+                    n++;
+                    //sum += it->second * it->first;
+                    //n += it->first;
+//                    if (++i > 8)
+//                        break;
+                }
+*/              
+//                if (n > 1) { sum -= max_val; n--; }
+                cout << ">> freq: " << sum / n << endl;
+                cout << ">> minmax: " << min_val << " " << max_val << endl;
+            }
+        }
+#endif        
+    }
+    prev_count = count;
+    return 1;
+}
+
+//template class PingPongMT<MTBenchmarkSuite, mt_bcast>;
+//DECLARE_INHERITED(GLUE_TYPENAME2(PingPongMT<MTBenchmarkSuite, mt_bcast>), BcastMT)
+
+#endif
diff --git a/src_cpp/MT/MT_benchmark.h b/src_cpp/MT/MT_benchmark.h
new file mode 100644
index 00000000..0847333d
--- /dev/null
+++ b/src_cpp/MT/MT_benchmark.h
@@ -0,0 +1,491 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+#include <mpi.h>
+#include <omp.h>
+
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+
+#include "benchmark.h"
+
+using namespace std;
+
+#define GLUE_TYPENAME(A,B) A,B
+#define GET_GLOBAL(TYPE, NAME) { TYPE *p = suite->get_parameter(#NAME).as<TYPE>(); \
+                                 assert(p != NULL); \
+                                 NAME = *p; }
+
+
+#include "MT_types.h"
+
+template <typename T>
+class Allocator {
+    protected:
+    std::vector<T *> original_ptrs;
+    public:
+    virtual T *Alloc(size_t size) = 0;
+    Allocator() {}
+    virtual ~Allocator() { 
+        for (size_t i = 0; i < original_ptrs.size(); i++) { 
+            free(original_ptrs[i]); 
+        } 
+    }
+    private:
+    Allocator &operator=(const Allocator &) { return *this; }
+    Allocator(const Allocator &) {}
+};
+
+template <typename T>
+class AlignedAllocator : public Allocator<T> {
+    public:
+    size_t align;
+    AlignedAllocator(size_t _align = 1) : align(_align) { }
+    virtual T *Alloc(size_t size) {
+        //size_t aligned_size = (size + align - 1) & ~(align - 1);
+        //assert(aligned_size >= size);
+        size_t size_with_spare_space = size + align;
+        char *ptr = (char *)malloc(size_with_spare_space);
+        memset(ptr, 0, size_with_spare_space);
+        Allocator<T>::original_ptrs.push_back(ptr);
+        size_t diff = align - ((size_t)(ptr) % (size_t)align);
+        return (T *)(ptr + diff);
+    }
+    virtual ~AlignedAllocator() {};
+};
+
+void normal_barrier()
+{
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+void special_barrier()
+{
+    int size = 0;
+    int rank = 0;
+
+    int mask = 0x1;
+    int dst,src;
+
+    int tmp = 0;
+
+    MPI_Comm_size(MPI_COMM_WORLD, &size );
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank );
+
+    for( ; mask < size; mask <<=1 ) {
+        dst = (rank + mask) % size;
+        src = (rank - mask + size) % size;
+        MPI_Sendrecv( &tmp, 0, MPI_BYTE, dst, 1010,
+                      &tmp, 0, MPI_BYTE, src, 1010,
+                      MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+}
+
+template <void (bfn)()>
+void omp_aware_barrier()
+{
+#pragma omp barrier 
+    if (omp_get_thread_num() == 0)
+        bfn();
+#pragma omp barrier
+}
+
+void no_barrier()
+{
+}
+
+typedef void (*barrier_func_t)();
+
+struct input_benchmark_data {
+    struct {
+        int root;
+    } collective;
+    struct {
+        int stride;
+        bool anysource;
+        bool anytag;
+    } pt2pt;
+    struct {
+        int *cnt;
+        int *displs;
+    } collective_vector;
+    struct {
+        barrier_func_t fn_ptr;
+    } barrier;
+    struct {
+        bool check;
+    } checks;
+    struct {
+        bool mode_multiple;
+    } threading;
+};
+
+struct output_benchmark_data {
+    struct {
+        double *time_ptr;
+    } timing;
+    struct {
+        int failures;
+    } checks;
+};
+
+typedef int (*mt_benchmark_func_t)(int repeat, int skip, void *in, void *out, int count,
+                                   MPI_Datatype type, MPI_Comm comm, int ranks, int size, 
+                                   input_benchmark_data *data, output_benchmark_data *odata);
+
+
+template <typename T>
+string out_field(T val);
+
+template <int field_len, typename T>
+string do_format(const char *fmt, T val) {
+    char s[field_len+1];
+#ifdef WIN_IMB
+    _snprintf(s, field_len, fmt, val);
+#else
+    snprintf(s, field_len, fmt, val);
+#endif
+    s[field_len] = 0;
+    return string(s);
+}
+
+template <> string out_field<double>(double val) { return do_format<14>("% 13.2f", val); }
+template <> string out_field<int>(int val) { return do_format<14>("% 13d", val); }
+template <> string out_field<unsigned int>(unsigned int val) { return do_format<14>("% 13u", val); }
+template <> string out_field<const char *>(const char *val) { return do_format<14>("% 13s", val); }
+template <> string out_field<unsigned long>(unsigned long val) { return do_format<14>("% 13ul", val); }
+template <> string out_field<unsigned long long>(unsigned long long val) { return do_format<14>("% 13llu", val); }
+
+template <class bs, mt_benchmark_func_t fn_ptr>
+class BenchmarkMTBase : public Benchmark {
+    public:    
+    enum Flags {
+        COLLECTIVE,
+        PT2PT,
+        SEPARATE_MEASURING,
+        COLLECTIVE_VECTOR,
+        SEND_TO_ALL,
+        RECV_FROM_ALL,
+        SEND_TO_2,
+        SEND_0,
+        RECV_FROM_2,
+        TIME_DIVIDE_BY_2,
+        TIME_DIVIDE_BY_4,
+        TIME_DIVIDE_BY_100,
+        SCALE_BW_TWICE,
+        SCALE_BW_FOUR,
+        OUT_BYTES,
+        OUT_REPEAT,
+        OUT_TIME_MIN,
+        OUT_TIME_MAX,
+        OUT_TIME_AVG,
+        OUT_BW,
+        OUT_BW_CUMULATIVE,
+        OUT_MSGRATE,
+        OUT_MSGRATE_CUMMULATIVE
+    };
+    std::set<Flags> flags; 
+    MPI_Datatype datatype;
+    size_t datatype_size;
+    std::vector<void *> a;
+    std::vector<void *> b;
+    std::vector<input_benchmark_data *> idata;
+    std::vector<output_benchmark_data *> odata;
+    std::vector<thread_local_data_t> input;
+    std::vector<int> count;
+    int mode_multiple;
+    int stride;
+    int num_threads;
+    barropt_t barrier_option;
+    malopt_t malloc_option;
+    int malloc_align;
+    bool do_checks;
+    double time_avg, time_min, time_max;
+    int world_rank, world_size;
+    public:
+    virtual void init_flags() {}
+    virtual void run_instance(thread_local_data_t *input, int count, double &t, int &result) {
+        MPI_Comm comm = input->comm;
+        int warmup = input->warmup, repeat = input->repeat;
+        if (repeat <= 0) return;
+        int rank, size;
+        MPI_Comm_rank(comm, &rank);
+        MPI_Comm_size(comm, &size);
+        void *in = a[omp_get_thread_num()];
+        void *out = b[omp_get_thread_num()];
+        input_benchmark_data &idata_local = *idata[omp_get_thread_num()];
+        output_benchmark_data &odata_local = *odata[omp_get_thread_num()];
+        idata_local.collective.root = 0;
+        idata_local.pt2pt.stride = stride;
+        
+        idata_local.checks.check = do_checks;
+
+        idata_local.threading.mode_multiple = mode_multiple;
+
+        barrier_func_t bfn;
+        switch (barrier_option) {
+            case BARROPT_NOBARRIER: bfn = no_barrier; break;
+            case BARROPT_NORMAL: 
+                if (mode_multiple) {
+                    bfn = omp_aware_barrier<normal_barrier>;
+                } else {
+                    bfn = normal_barrier;
+                }
+                break;
+            case BARROPT_SPECIAL:
+                if (mode_multiple) {
+                    bfn = omp_aware_barrier<special_barrier>;
+                } else {
+                    bfn = special_barrier;
+                }
+                break;
+            default: assert(0);
+        }
+        odata_local.checks.failures = 0;
+        if (flags.count(SEPARATE_MEASURING)) {
+            idata_local.barrier.fn_ptr = bfn;
+            if (flags.count(COLLECTIVE_VECTOR)) {
+                for (int i = 0; i < size; i++) {
+                    idata_local.collective_vector.cnt[i] = count;
+                    idata_local.collective_vector.displs[i] = count * i;
+                }
+            }
+            odata_local.timing.time_ptr = &t;
+            result = fn_ptr(repeat, warmup, in, out, count, datatype, comm, rank, size, &idata_local, &odata_local);
+        } else {
+            odata_local.timing.time_ptr = NULL;
+            fn_ptr(warmup, 0, in, out, count, datatype, comm, rank, size, &idata_local, &odata_local);
+            bfn();
+            t = MPI_Wtime();
+            result = fn_ptr(repeat, 0, in, out, count, datatype, comm, rank, size, &idata_local, &odata_local);
+            t = MPI_Wtime()-t;
+        }
+        if (!result)
+            t = 0;
+            if (odata_local.checks.failures) {
+                cout << "CHECK FAILURES: rank " << rank << ": " << odata_local.checks.failures << endl;
+            }
+        return;
+    }
+    virtual void init() {
+        init_flags();
+        GET_GLOBAL(vector<thread_local_data_t>, input);
+        GET_GLOBAL(int, mode_multiple);
+        GET_GLOBAL(int, stride);
+        GET_GLOBAL(int, num_threads);
+        GET_GLOBAL(int, malloc_align);
+        GET_GLOBAL(malopt_t, malloc_option);
+        GET_GLOBAL(barropt_t, barrier_option);
+        GET_GLOBAL(bool, do_checks);
+        GET_GLOBAL(MPI_Datatype, datatype);
+        if (flags.count(SEND_0))
+            count.push_back(0);
+        else
+            GET_GLOBAL(vector<int>, count);
+        int idts;
+        MPI_Type_size(datatype, &idts);
+        datatype_size = idts;
+        VarLenScope *sc = new VarLenScope(count);
+        scope = sc;
+
+        MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+        MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+        // get longest element from sequence
+        size_t maxlen = sc->get_max_len();
+        size_t size_a = datatype_size * maxlen;
+        size_t size_b = datatype_size * maxlen;
+        if (flags.count(SEND_TO_ALL))
+            size_a *= world_size;
+        else if (flags.count(SEND_TO_2))
+            size_a *= 2;
+        if (flags.count(RECV_FROM_ALL))
+            size_b *= world_size;
+        else if (flags.count(RECV_FROM_2))
+            size_b *= 2;
+
+        static AlignedAllocator<char> allocator(malloc_align);
+        if (malloc_option == MALOPT_SERIAL) {
+            for (int thread_num = 0; thread_num < num_threads; thread_num++) {
+                a.push_back(allocator.Alloc(size_a));
+                b.push_back(allocator.Alloc(size_b));
+            }
+        } else if (malloc_option == MALOPT_PARALLEL) {
+            a.resize(num_threads);
+            b.resize(num_threads);
+#pragma omp parallel
+            {
+#pragma omp critical
+                {
+                    a[omp_get_thread_num()] = allocator.Alloc(size_a);
+                    b[omp_get_thread_num()] = allocator.Alloc(size_b);
+                }
+            }
+        } else if (malloc_option == MALOPT_CONTINUOUS) {
+            char *a_base = (char *)allocator.Alloc(size_a * num_threads);
+            char *b_base = (char *)allocator.Alloc(size_b * num_threads);
+            for (int thread_num = 0; thread_num < num_threads; thread_num++) {
+                a.push_back(a_base + (size_t)thread_num * size_a);
+                b.push_back(b_base + (size_t)thread_num * size_b);
+            }
+        }
+        for (int thread_num = 0; thread_num < num_threads; thread_num++) {
+            idata.push_back((input_benchmark_data *)malloc(sizeof(input_benchmark_data)));
+            odata.push_back((output_benchmark_data *)malloc(sizeof(output_benchmark_data)));
+            if (flags.count(COLLECTIVE_VECTOR)) {
+                idata[idata.size()-1]->collective_vector.cnt = (int *)malloc(world_size * sizeof(int));
+                idata[idata.size()-1]->collective_vector.displs = (int *)malloc(world_size * sizeof(int));
+            }        
+        }
+  }
+    virtual void run(const scope_item &item) { 
+        static int ninvocations = 0;
+        double t, tavg = 0, tmin = 1e6, tmax = 0; 
+        int nresults = 0;
+        if (mode_multiple) {
+        #pragma omp parallel default(shared)
+            {
+                double t_mp;
+                int result;
+                run_instance(&input[omp_get_thread_num()], item.len, t_mp, result);
+            #pragma omp critical
+                {
+                    tmax = max(tmax, t_mp);
+                    tmin = min(tmin, t_mp);
+                    tavg = tavg + t_mp;
+                    nresults += result;
+                }
+            }
+        } else {
+            run_instance(&input[0], item.len, t, nresults);
+            tavg = tmax = tmin = t;
+        }
+        MPI_Allreduce(&tavg, &time_avg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+        MPI_Allreduce(&tmin, &time_min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+        MPI_Allreduce(&tmax, &time_max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+        MPI_Allreduce(MPI_IN_PLACE, &nresults, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+        if (nresults) {
+            time_avg /= (double)nresults;
+        }
+        time_avg /= (double)input[0].repeat;
+        time_min /= (double)input[0].repeat;
+        time_max /= (double)input[0].repeat;
+        if (world_rank == 0) {
+            double divider = 1.0, bw_multiplier = 1.0;
+            if (flags.count(TIME_DIVIDE_BY_2)) divider *= 2.0;
+            if (flags.count(TIME_DIVIDE_BY_4)) divider *= 4.0;
+            if (flags.count(TIME_DIVIDE_BY_100)) divider *= 100.0;
+            if (flags.count(SCALE_BW_TWICE)) bw_multiplier *= 2.0;
+            if (flags.count(SCALE_BW_FOUR)) bw_multiplier *= 4.0;
+
+            time_avg /= divider;
+            time_min /= divider;
+            time_max /= divider;
+            if (nresults) {
+                if (ninvocations++ == 0) {
+                    cout << endl;
+                    cout << "#-----------------------------------------------------------------------------" << endl;
+                    cout << "# Benchmarking " << get_name() << endl;
+                    cout << "# #processes = " << nresults / num_threads << " (threads: " << num_threads << ")" << endl;
+                    cout << "#-----------------------------------------------------------------------------" << endl;
+ 
+                    if (flags.count(OUT_BYTES)) cout << out_field("#bytes"); //"#bytes";
+                    if (flags.count(OUT_REPEAT)) cout << out_field("#repetitions");
+                    if (flags.count(OUT_TIME_MIN)) cout << out_field("t_min[usec]");
+                    if (flags.count(OUT_TIME_MAX)) cout << out_field("t_max[usec]");
+                    if (flags.count(OUT_TIME_AVG)) cout << out_field("t_avg[usec]");
+                    if (flags.count(OUT_BW)) cout << out_field("Mbytes/sec");
+                    if (flags.count(OUT_BW_CUMULATIVE)) cout << out_field("Mbytes/sec");
+                    if (flags.count(OUT_MSGRATE)) cout << out_field("Msg/sec");
+                    if (flags.count(OUT_MSGRATE_CUMMULATIVE)) cout << out_field("Msg/sec");
+                    cout << endl;
+                }
+                // NOTE: since we do weak scalability measurements, multiply the amount of data
+                size_t real_size = item.len * datatype_size * num_threads;
+                if (flags.count(OUT_BYTES)) cout << out_field(real_size);
+                if (flags.count(OUT_REPEAT)) cout << out_field(input[0].repeat);
+                if (flags.count(OUT_TIME_MIN)) cout << out_field(1e6 * time_min);
+                if (flags.count(OUT_TIME_MAX)) cout << out_field(1e6 * time_max);
+                if (flags.count(OUT_TIME_AVG)) cout << out_field(1e6 * time_avg);
+                if (flags.count(OUT_BW)) cout << out_field((double)real_size * bw_multiplier / time_max / 1e6);
+                if (flags.count(OUT_BW_CUMULATIVE)) cout << out_field((double)real_size / (double)num_threads * bw_multiplier * (double)(nresults / 2) / time_max / 1e6);
+                if (flags.count(OUT_MSGRATE)) cout << out_field((int)(1.0 / time_avg));
+                if (flags.count(OUT_MSGRATE_CUMMULATIVE)) cout << out_field((int)((double)(nresults / 2) / time_avg));
+                cout << endl;
+            }
+            else {
+                if (ninvocations++ == 0) {
+                    cout << endl;
+                    cout << "#-----------------------------------------------------------------------------" << endl;
+                    cout << "# Benchmarking " << get_name() << endl;
+                    cout << "# NO SUCCESSFUL EXECUTIONS" << endl;
+                    cout << "#-----------------------------------------------------------------------------" << endl;
+                    cout << endl;
+                }
+            }
+        }
+    }
+    virtual void finalize() {
+        for (int thread_num = 0; thread_num < num_threads; thread_num++) {
+            if (flags.count(COLLECTIVE_VECTOR)) {
+               free(idata[thread_num]->collective_vector.cnt);
+               free(idata[thread_num]->collective_vector.displs);
+            }
+        }
+    }
+};
+template <class bs, mt_benchmark_func_t fn_ptr>
+class BenchmarkMT : public BenchmarkMTBase<bs, fn_ptr> {
+    public:
+    virtual void init_flags();
+    DEFINE_INHERITED(GLUE_TYPENAME(BenchmarkMT<bs, fn_ptr>), bs);
+};
diff --git a/src_cpp/MT/MT_suite.cpp b/src_cpp/MT/MT_suite.cpp
new file mode 100644
index 00000000..6c3bd384
--- /dev/null
+++ b/src_cpp/MT/MT_suite.cpp
@@ -0,0 +1,233 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <omp.h>
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <stdio.h>
+#include "benchmark.h"
+#include "args_parser.h"
+#include "utils.h"
+#include "benchmark_suites_collection.h"
+#include "benchmark_suite.h"
+#include "utils.h"
+
+#include "MT_types.h"
+
+namespace NS_MT {
+    std::vector<thread_local_data_t> input;
+    int mode_multiple;
+    int stride;
+    int num_threads;
+    int rank;
+    bool prepared = false;
+    std::vector<int> count;
+    int malloc_align;
+    malopt_t malloc_option;
+    barropt_t barrier_option;
+    bool do_checks;
+    MPI_Datatype datatype;
+    bool noheader;
+}
+
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_MT, IMB-MT)
+
+template <> bool BenchmarkSuite<BS_MT>::declare_args(args_parser &parser,
+                                                     std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("stride", 0);
+    parser.add<int>("warmup",  100);
+    parser.add<int>("repeat", 1000);
+    parser.add<std::string>("barrier", "on").set_caption("on|off|special");
+    parser.add_vector<int>("count", "1,2,4,8").
+        set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING);
+    parser.add<int>("malloc_align", 64);
+    parser.add<std::string>("malloc_algo", "serial").set_caption("serial|continuous|parallel");
+    parser.add<bool>("check", false);
+    parser.add_flag("noheader");
+    parser.add<std::string>("datatype", "int").set_caption("int|char");
+    parser.set_default_current_group();
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_MT>::prepare(const args_parser &parser, 
+                                                const std::vector<std::string> &benchs,
+                                                const std::vector<std::string> &unknown_args,
+                                                std::ostream &output) {
+    using namespace NS_MT;
+
+    if (unknown_args.size() != 0) {
+        std::vector<std::string> unknown_options, unknown_benchmarks;
+        for (std::vector<std::string>::const_iterator it = unknown_args.begin(); it != unknown_args.end(); ++it) {
+            if (parser.is_option(*it)) {
+                unknown_options.push_back(*it);
+            }
+            else {
+                unknown_benchmarks.push_back(*it);
+            }
+        }
+        for (std::vector<std::string>::const_iterator it = unknown_options.begin(); it != unknown_options.end(); ++it) {
+            output << "Invalid option " << *it << std::endl;
+        }
+        for (std::vector<std::string>::const_iterator it = unknown_benchmarks.begin(); it != unknown_benchmarks.end(); ++it) {
+            output << "Invalid benchmark name " << *it << std::endl;
+        }
+        return false;
+    }
+    std::vector<std::string> all_benchs, spare_benchs = benchs, intersection = benchs;
+    BenchmarkSuite<BS_MT>::get_full_list(all_benchs);
+    set_operations::exclude(spare_benchs, all_benchs);
+    set_operations::exclude(intersection, spare_benchs);
+    if (intersection.size() == 0)
+        return true;
+
+    parser.get<int>("count", count);
+    mode_multiple = (parser.get<std::string>("thread_level") == "multiple");
+    stride = parser.get<int>("stride");
+    
+    std::string barrier_type = parser.get<std::string>("barrier");
+    if (barrier_type == "off") barrier_option = BARROPT_NOBARRIER;
+    else if (barrier_type == "on") barrier_option = BARROPT_NORMAL;
+    else if (barrier_type == "special") barrier_option = BARROPT_SPECIAL;
+    else {
+        output << get_name() << ": " << "Wrong barrier option value" << std::endl;
+        return false;
+    }
+
+    malloc_align = parser.get<int>("malloc_align");
+
+    std::string malloc_algo = parser.get<std::string>("malloc_algo");
+    if (malloc_algo == "serial") malloc_option = MALOPT_SERIAL;
+    else if (malloc_algo == "continuous") malloc_option = MALOPT_CONTINUOUS;
+    else if (malloc_algo == "parallel") malloc_option = MALOPT_PARALLEL;
+    else {
+        output << get_name() << ": " << "Wrong malloc_algo option value" << std::endl;
+        return false;
+    }
+    if ((malloc_option == MALOPT_PARALLEL || malloc_option == MALOPT_CONTINUOUS) && !mode_multiple) {
+        malloc_option = MALOPT_SERIAL;
+    }
+
+    do_checks = parser.get<bool>("check");
+
+    noheader = parser.get<bool>("noheader");
+
+    std::string dt = parser.get<std::string>("datatype");
+    if (dt == "int") datatype = MPI_INT;
+    else if (dt == "char") datatype = MPI_CHAR;
+    else {
+        output << get_name() << ": " << "Unknown data type in datatype option" << std::endl;
+        return false;
+    }
+
+    if (do_checks && datatype != MPI_INT) {
+        output << get_name() << ": " << "Only int data type is supported with check option" << std::endl;
+        return false;
+    }
+    
+    num_threads = 1;
+    if (mode_multiple) {
+#pragma omp parallel default(shared)
+#pragma omp master        
+        num_threads = omp_get_num_threads();
+    } 
+    input.resize(num_threads);
+    for (int thread_num = 0; thread_num < num_threads; thread_num++) {
+        input[thread_num].comm = duplicate_comm(mode_multiple, thread_num);
+        input[thread_num].warmup = parser.get<int>("warmup");
+        input[thread_num].repeat = parser.get<int>("repeat");
+    }
+    prepared = true;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    if (rank == 0 && !noheader) {
+        output << "#---------------------------------------------------------" << std::endl;
+        output << "#    Intel(R) MPI Benchmarks " << "2019" << ", MT part    " << std::endl;
+        output << "#---------------------------------------------------------" << std::endl;
+        output << "#" << std::endl;
+    }
+    return true;
+}
+
+template <> void BenchmarkSuite<BS_MT>::finalize(const std::vector<std::string> &,
+                                                 std::ostream &output) {
+    using namespace NS_MT;
+    if (prepared && rank == 0)
+        output << std::endl;
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+template <> any BenchmarkSuite<BS_MT>::get_parameter(const std::string &key)
+{
+    using namespace NS_MT;
+    any result;
+    HANDLE_PARAMETER(std::vector<thread_local_data_t>, input);    
+    HANDLE_PARAMETER(int, num_threads);
+    HANDLE_PARAMETER(int, mode_multiple);
+    HANDLE_PARAMETER(int, stride);
+    HANDLE_PARAMETER(int, malloc_align);
+    HANDLE_PARAMETER(malopt_t, malloc_option);
+    HANDLE_PARAMETER(barropt_t, barrier_option);
+    HANDLE_PARAMETER(bool, do_checks);
+    HANDLE_PARAMETER(MPI_Datatype, datatype);
+    HANDLE_PARAMETER(std::vector<int>, count);
+    return result;
+}
+
+#ifdef WIN32
+template BenchmarkSuite<BS_MT>;
+#endif
diff --git a/src_cpp/MT/MT_types.h b/src_cpp/MT/MT_types.h
new file mode 100644
index 00000000..81c518ef
--- /dev/null
+++ b/src_cpp/MT/MT_types.h
@@ -0,0 +1,81 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+struct thread_local_data_t {
+    int warmup;
+    int repeat;
+    MPI_Comm comm;
+};
+
+enum malopt_t {
+    MALOPT_SERIAL,
+    MALOPT_CONTINUOUS,
+    MALOPT_PARALLEL
+};
+
+enum barropt_t {
+    BARROPT_NOBARRIER,
+    BARROPT_NORMAL,
+    BARROPT_SPECIAL
+};
+
+static inline MPI_Comm duplicate_comm(int mode_multiple, int thread_num)
+{
+    UNUSED(thread_num);
+    MPI_Comm comm =  MPI_COMM_WORLD, new_comm;
+    if(mode_multiple) {
+        MPI_Comm_dup(comm, &new_comm);
+        return new_comm;
+    }
+    return comm;
+}
+
diff --git a/src_cpp/MT/Makefile.MT.mk b/src_cpp/MT/Makefile.MT.mk
new file mode 100644
index 00000000..f4b84807
--- /dev/null
+++ b/src_cpp/MT/Makefile.MT.mk
@@ -0,0 +1,58 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+override CPPFLAGS += -DMT
+override CPPFLAGS += -IMT
+
+BECHMARK_SUITE_SRC += MT/MT_suite.cpp MT/MT_benchmark.cpp
+
+HEADERS += MT/MT_benchmark.h 
+
+WITH_OPENMP = 1
diff --git a/src_cpp/MT/Makefile_win.MT.mk b/src_cpp/MT/Makefile_win.MT.mk
new file mode 100644
index 00000000..4e07c6ec
--- /dev/null
+++ b/src_cpp/MT/Makefile_win.MT.mk
@@ -0,0 +1,64 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+CPPFLAGS = $(CPPFLAGS) -DMT /openmp
+
+BECHMARK_SUITE_OBJ = MT_suite.obj \
+                     MT_benchmark.obj\
+                     imb.obj \
+                     args_parser.obj \
+                     args_parser_utests.obj \
+                     scope.obj \
+                     benchmark_suites_collection.obj
+
+{../}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c ../$*.cpp
+
+{MT/}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c MT/$*.cpp
diff --git a/src_cpp/Makefile b/src_cpp/Makefile
new file mode 100644
index 00000000..8ec54ded
--- /dev/null
+++ b/src_cpp/Makefile
@@ -0,0 +1,198 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+all: announce
+	
+ifdef WITH_YAML_CPP
+all: libyaml-cpp.a
+else
+all:	
+endif
+
+ifndef TARGET
+TARGET=example
+ANNOUNCE=1
+endif
+
+ifdef TARGET
+include $(TARGET)/Makefile.*.mk
+BINARY:=IMB-$(TARGET)
+else
+include */Makefile.*.mk
+BINARY:=imb
+endif
+
+ifeq ($(origin CC),default)
+CC=mpiicc
+endif
+ifeq ($(origin CXX),default)
+CXX=mpiicpc
+endif
+override CPPFLAGS += -I.
+override CFLAGS += -O0 -Wall -Wextra -pedantic -Wno-long-long
+override CXXFLAGS += -O0 -Wall -Wextra -pedantic -Wno-long-long
+ifdef WITH_OPENMP
+override CFLAGS += -fopenmp
+override CXXFLAGS += -fopenmp
+endif
+
+#ifeq ($(origin CC),default)
+#CXX=mpicc
+#endif
+#ifeq ($(origin CXX),default)
+#CC=mpicxx
+#endif
+#override CPPFLAGS += -I.
+#override CFLAGS = -g -Wall -Wextra -pedantic -Wno-long-long
+#CXXFLAGS = -g -Wall -Wextra -pedantic -Wno-long-long
+#ifdef WITH_OPENMP
+#override CFLAGS += -qopenmp
+#override CXXFLAGS += -qopenmp
+#endif
+
+
+
+all: $(BINARY)
+
+ifdef WITH_YAML_CPP
+override CPPFLAGS += -Iyamlcpp/include -Iyamlcpp/test
+YAMLCPP_SRC = yamlcpp/src/aliasmanager.cpp \
+yamlcpp/src/binary.cpp \
+yamlcpp/src/conversion.cpp \
+yamlcpp/src/directives.cpp \
+yamlcpp/src/emitfromevents.cpp \
+yamlcpp/src/emitter.cpp \
+yamlcpp/src/emitterstate.cpp \
+yamlcpp/src/emitterutils.cpp \
+yamlcpp/src/exp.cpp \
+yamlcpp/src/iterator.cpp \
+yamlcpp/src/node.cpp \
+yamlcpp/src/nodebuilder.cpp \
+yamlcpp/src/nodeownership.cpp \
+yamlcpp/src/null.cpp \
+yamlcpp/src/ostream.cpp \
+yamlcpp/src/parser.cpp \
+yamlcpp/src/regex.cpp \
+yamlcpp/src/scanner.cpp \
+yamlcpp/src/scanscalar.cpp \
+yamlcpp/src/scantag.cpp \
+yamlcpp/src/scantoken.cpp \
+yamlcpp/src/simplekey.cpp \
+yamlcpp/src/singledocparser.cpp \
+yamlcpp/src/stream.cpp \
+yamlcpp/src/tag.cpp \
+yamlcpp/src/contrib/graphbuilder.cpp \
+yamlcpp/src/contrib/graphbuilderadapter.cpp
+
+
+YAMLCPP_TEST_CPP = yamlcpp/test/emittertests.cpp \
+yamlcpp/test/main.cpp \
+yamlcpp/test/spectests.cpp \
+yamlcpp/test/tests.cpp  \
+yamlcpp/test/old-api/parsertests.cpp \
+yamlcpp/test/old-api/spectests.cpp 
+
+YAMLCPP_OBJ = $(YAMLCPP_SRC:.cpp=.o)
+YAMLCPP_TEST_OBJ = $(YAMLCPP_TEST_SRC:.cpp=.o)
+endif
+
+IMB_SRC = imb.cpp args_parser.cpp args_parser_utests.cpp scope.cpp
+IMB_OBJ = $(IMB_SRC:.cpp=.o)
+
+BECHMARK_SUITE_SRC += benchmark_suites_collection.cpp
+BECHMARK_SUITE_OBJ = $(BECHMARK_SUITE_SRC:.cpp=.o)
+
+HEADERS += args_parser.h \
+benchmark.h \
+benchmark_suite_base.h \
+benchmark_suite.h \
+benchmark_suites_collection.h \
+smart_ptr.h \
+utils.h \
+scope.h
+
+ifdef WITH_YAML_CPP
+libyaml-cpp.a: $(YAMLCPP_OBJ)
+	ar qc $@ $^
+	ranlib $@
+
+yamltest: $(YAMLCPP_TEST_OBJ) libyaml-cpp.a
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^
+
+YAML_CPP_LIB = libyaml-cpp.a
+YAML_TEST = yamltest
+override CPPFLAGS += -DWITH_YAML_CPP
+endif
+
+announce:
+	@if [ "$(ANNOUNCE)" == "1" ]; then echo "NOTE: Building target: $(TARGET), binary name: $(BINARY)"; fi
+	@if [ "$(ANNOUNCE)" == "1" ]; then echo "NOTE: Use make TARGET=<DIR_NAME> to select a target suite"; fi
+
+$(BINARY): $(IMB_OBJ) $(BECHMARK_SUITE_OBJ) $(ADDITIONAL_OBJ) $(YAML_CPP_LIB)
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS)
+
+%.o: %.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+
+%.o: %.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+
+clean: announce
+	rm -f $(IMB_OBJ) $(YAMLCPP_OBJ) $(BECHMARK_SUITE_OBJ) $(ADDITIONAL_OBJ) $(BINARY) $(YAML_CPP_LIB) $(YAML_TEST)
+
+
+$(BECHMARK_SUITE_OBJ): $(HEADERS) Makefile
+$(IMB_OBJ): $(HEADERS) Makefile
+
+ifdef WITH_YAML_CPP
+$(YAMLCPP_OBJ): Makefile
+endif
+
+.PHONY: announce clean all
+
diff --git a/src_cpp/Makefile_win b/src_cpp/Makefile_win
new file mode 100644
index 00000000..be08561b
--- /dev/null
+++ b/src_cpp/Makefile_win
@@ -0,0 +1,101 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+CC  = cl.exe
+CPP = cl.exe
+
+CFLAGS   = $(CFLAGS) -DWIN32 -DWIN_IMB
+CPPFLAGS = $(CPPFLAGS) -DWIN32 -DWIN_IMB
+
+CLINKER     = $(CC)
+CPPLINKER   = $(CPP)
+
+MPI_INCLUDE = %I_MPI_ROOT%\intel64\include
+LIBS = %I_MPI_ROOT%\intel64\lib\release\impi.lib
+
+default: all
+
+!IFDEF TARGET
+
+!INCLUDE $(TARGET)/Makefile_win.$(TARGET).mk
+
+clean:
+	@cd $(TARGET)
+	del /f /q $(C_OBJ) $(BECHMARK_SUITE_OBJ)
+	del /f /q IMB-$(TARGET).exe
+	@cd ..
+	del /f /q IMB-$(TARGET).exe
+
+all: MOVE_TO_TARGET_DIR $(TARGET)
+	cd ..
+	copy .\$(TARGET)\IMB-$(TARGET).exe
+
+$(TARGET): $(C_OBJ) $(BECHMARK_SUITE_OBJ)
+	$(CPPLINKER) $(LDFLAGS) /FeIMB-$(TARGET) $(C_OBJ) $(BECHMARK_SUITE_OBJ) "$(LIB_PATH)" "$(LIBS)"
+
+MOVE_TO_TARGET_DIR:
+	cd ./$(TARGET)
+
+.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I.. $(CPPFLAGS) -c $*.cpp
+!ELSE
+
+TARGET       = IMB-RMA IMB-MT IMB-MPI1 IMB-NBC IMB-IO IMB-EXT
+CLEAN_TARGET = C_IMB-RMA C_IMB-MT C_IMB-MPI1 C_IMB-NBC C_IMB-IO C_IMB-EXT
+
+all: $(TARGET)
+
+$(TARGET):
+	nmake -f Makefile_win TARGET=$(*:IMB-=)
+
+clean: $(CLEAN_TARGET)
+
+$(CLEAN_TARGET):
+	nmake -f Makefile_win clean TARGET=$(*:C_IMB-=)
+!ENDIF
\ No newline at end of file
diff --git a/src_cpp/NBC/Makefile.NBC.mk b/src_cpp/NBC/Makefile.NBC.mk
new file mode 100644
index 00000000..bd8532d9
--- /dev/null
+++ b/src_cpp/NBC/Makefile.NBC.mk
@@ -0,0 +1,89 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+
+include helpers/Makefile.*.mk
+
+override CPPFLAGS += -DNBC
+
+BECHMARK_SUITE_SRC += NBC/NBC_suite.cpp NBC/NBC_benchmark.cpp
+C_SRC = $(C_SRC_DIR)/IMB_allgather.c \
+$(C_SRC_DIR)/IMB_allgatherv.c \
+$(C_SRC_DIR)/IMB_allreduce.c \
+$(C_SRC_DIR)/IMB_alltoall.c \
+$(C_SRC_DIR)/IMB_alltoallv.c \
+$(C_SRC_DIR)/IMB_barrier.c \
+$(C_SRC_DIR)/IMB_bcast.c \
+$(C_SRC_DIR)/IMB_benchlist.c \
+$(C_SRC_DIR)/IMB_chk_diff.c \
+$(C_SRC_DIR)/IMB_cpu_exploit.c \
+$(C_SRC_DIR)/IMB_declare.c \
+$(C_SRC_DIR)/IMB_err_handler.c \
+$(C_SRC_DIR)/IMB_gather.c \
+$(C_SRC_DIR)/IMB_gatherv.c \
+$(C_SRC_DIR)/IMB_g_info.c \
+$(C_SRC_DIR)/IMB_init.c \
+$(C_SRC_DIR)/IMB_init_transfer.c \
+$(C_SRC_DIR)/IMB_mem_manager.c \
+$(C_SRC_DIR)/IMB_output.c \
+$(C_SRC_DIR)/IMB_parse_name_nbc.c \
+$(C_SRC_DIR)/IMB_reduce.c \
+$(C_SRC_DIR)/IMB_reduce_scatter.c \
+$(C_SRC_DIR)/IMB_scatter.c \
+$(C_SRC_DIR)/IMB_scatterv.c \
+$(C_SRC_DIR)/IMB_sendrecv.c \
+$(C_SRC_DIR)/IMB_strgs.c \
+$(C_SRC_DIR)/IMB_utils.c \
+$(C_SRC_DIR)/IMB_warm_up.c
+C_OBJ=$(subst $(C_SRC_DIR),NBC,$(C_SRC:.c=.o))
+ADDITIONAL_OBJ += $(C_OBJ)
+
+NBC/%.o: $(C_SRC_DIR)/%.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DNBC -c -o $@ $<
diff --git a/src_cpp/NBC/Makefile_win.NBC.mk b/src_cpp/NBC/Makefile_win.NBC.mk
new file mode 100644
index 00000000..bd5ae2dc
--- /dev/null
+++ b/src_cpp/NBC/Makefile_win.NBC.mk
@@ -0,0 +1,99 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+!INCLUDE  helpers/Makefile_win.helpers.mk
+
+CPPFLAGS = $(CPPFLAGS) -DNBC
+CFLAGS = $(CFLAGS) -DNBC
+C_SRC_DIR = ../$(C_SRC_DIR)
+
+C_OBJ = IMB_allgather.obj \
+             IMB_allgatherv.obj \
+             IMB_allreduce.obj \
+             IMB_alltoall.obj \
+             IMB_alltoallv.obj \
+             IMB_barrier.obj \
+             IMB_bcast.obj \
+             IMB_benchlist.obj \
+             IMB_chk_diff.obj \
+             IMB_cpu_exploit.obj \
+             IMB_declare.obj \
+             IMB_err_handler.obj \
+             IMB_gather.obj \
+             IMB_gatherv.obj \
+             IMB_g_info.obj \
+             IMB_init.obj \
+             IMB_init_transfer.obj \
+             IMB_mem_manager.obj \
+             IMB_output.obj \
+             IMB_parse_name_nbc.obj \
+             IMB_reduce.obj \
+             IMB_reduce_scatter.obj \
+             IMB_scatter.obj \
+             IMB_scatterv.obj \
+             IMB_sendrecv.obj \
+             IMB_strgs.obj \
+             IMB_utils.obj \
+             IMB_warm_up.obj
+
+BECHMARK_SUITE_OBJ = NBC_suite.obj \
+                     NBC_benchmark.obj\
+                     imb.obj args_parser.obj \
+                     args_parser_utests.obj \
+                     scope.obj \
+                     benchmark_suites_collection.obj
+
+{$(C_SRC_DIR)/}.c.obj:
+	$(CC) /I"$(MPI_INCLUDE)" $(CFLAGS) -c $(C_SRC_DIR)/$*.c
+
+{../}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c ../$*.cpp
+
+{NBC/}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c NBC/$*.cpp
diff --git a/src_cpp/NBC/NBC_benchmark.cpp b/src_cpp/NBC/NBC_benchmark.cpp
new file mode 100644
index 00000000..9b25a669
--- /dev/null
+++ b/src_cpp/NBC/NBC_benchmark.cpp
@@ -0,0 +1,340 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+#include "smart_ptr.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+#include "original_benchmark.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+#define BENCHMARK(BMRK_FN, BMRK_NAME) template class OriginalBenchmark<BenchmarkSuite<BS_NBC>, BMRK_FN>; \
+DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME(OriginalBenchmark<BenchmarkSuite<BS_NBC>, BMRK_FN>), BMRK_NAME) \
+template<> smart_ptr<Bmark_descr> OriginalBenchmark<BenchmarkSuite<BS_NBC>, BMRK_FN>::descr = NULL; \
+template<> bool OriginalBenchmark<BenchmarkSuite<BS_NBC>, BMRK_FN>::init_description() 
+
+BENCHMARK(IMB_ibcast, Ibcast)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ibcast_pure, Ibcast_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iallgather, Iallgather)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iallgather_pure, Iallgather_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iallgatherv, Iallgatherv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iallgatherv_pure, Iallgatherv_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_igather, Igather)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_igather_pure, Igather_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_igatherv, Igatherv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_igatherv_pure, Igatherv_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iscatter, Iscatter)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iscatter_pure, Iscatter_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iscatterv, Iscatterv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iscatterv_pure, Iscatterv_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ialltoall, Ialltoall)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ialltoall_pure, Ialltoall_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ialltoallv, Ialltoallv)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ialltoallv_pure, Ialltoallv_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_NP_I);
+    descr->flags.insert(RECVBUF_SIZE_NP_I);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ireduce, Ireduce)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(HAS_ROOT);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ireduce_pure, Ireduce_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ireduce_scatter, Ireduce_scatter)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ireduce_scatter_pure, Ireduce_scatter_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iallreduce, Iallreduce)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_iallreduce_pure, Iallreduce_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(COLLECTIVE);
+    return true;
+}
+
+BENCHMARK(IMB_ibarrier, Ibarrier)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(NTIMES_3);
+    descr->flags.insert(SYNC);
+    return true;
+}
+
+BENCHMARK(IMB_ibarrier_pure, Ibarrier_pure)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SYNC);
+    return true;
+}
diff --git a/src_cpp/NBC/NBC_suite.cpp b/src_cpp/NBC/NBC_suite.cpp
new file mode 100644
index 00000000..5b2db72c
--- /dev/null
+++ b/src_cpp/NBC/NBC_suite.cpp
@@ -0,0 +1,586 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#if defined MPI1 || defined RMA || defined MPIIO || defined EXT
+#error Legacy benchmark components can't be linked together
+#endif
+
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <algorithm>
+#include <mpi.h>
+#include "args_parser.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "any.h"
+#include "benchmark_suite.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_NBC, IMB-NBC)
+
+namespace NS_NBC {
+    struct comm_info c_info;
+    struct iter_schedule ITERATIONS;
+    struct GLOBALS glob;
+    bool prepared = false;
+}
+
+bool load_msg_sizes(const char *filename)
+{
+    using namespace NS_NBC;
+
+    FILE*t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    int n_lens = 0;
+    char inp_line[72];
+    while(fgets(inp_line,72,t)) {
+        if( inp_line[0] != '#' && strlen(inp_line)>1 )
+            n_lens++;
+    }
+    fclose(t);
+
+    if (n_lens == 0)
+        return false;
+
+    t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    c_info.n_lens = n_lens;
+
+    char S[32];
+    int sz, isz;
+
+    c_info.msglen = (int *)malloc(n_lens * sizeof(int));
+
+    isz=-1;
+
+    while(fgets(inp_line,72,t)) {
+        S[0]='\0';
+        if( inp_line[0] != '#' && strlen(inp_line)-1 ) {
+            int ierr;
+            sz=0;
+
+            ierr=sscanf(&inp_line[0],"%d%s",&sz,&S[0]);
+            if( ierr<=0 || ierr==EOF || sz<0 ) {
+                ierr=-1;
+            } else if(ierr==2) {
+                if      (S[0]=='k' ||  S[0]=='K') {sz=sz*1024;}
+                else if (S[0]=='m' ||  S[0]=='M') {sz=sz*1024*1024;}
+                else {
+                    ierr=-1;
+                }
+            } /*else if(ierr==2) */
+
+            if( ierr>0 ) {
+                isz++;
+                c_info.msglen[isz]=sz;
+            } else {
+                return false;
+            }
+        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
+    } /*while(fgets(inp_line,72,t))*/
+
+    n_lens = c_info.n_lens = isz + 1;
+    fclose(t);
+
+    if (n_lens==0)
+        return false;
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_NBC>::declare_args(args_parser &parser, std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("npmin", 2).set_caption("NPmin").
+        set_description(
+            "The argument after npmin is NPmin,\n"
+            "the minimum number of processes to run on\n"
+            "(then if IMB is started on NP processes, the process numbers\n"
+            "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
+            "To run on just NP processes, run IMB on NP and select -npmin NP\n"
+            "\n"
+            "Default:\n"
+            "NPmin=2\n");
+    parser.add<int>("multi", -1).set_caption("MultiMode").
+        set_description(
+            "The argument after -multi is MultiMode (0 or 1)\n"
+            "\n"
+            "If -multi is selected, running the N process version of a benchmark\n"
+            "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
+            "\n"
+            "MultiMode only controls default (0) or extensive (1) output charts.\n"
+            "0: only lowest performance groups is output\n"
+            "1: all groups are output\n"
+            "\n"
+            "Default:\n"
+            "multi off\n");
+    parser.add_vector<float>("off_cache", "-1.0,0.0", ',', 1, 2).
+           set_caption("cache_size[,cache_line_size]").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+                "the argument after off_cache can be either 1 single number (cache_size),\n"
+                "or 2 comma separated numbers (cache_size,cache_line_size), or just -1\n"
+                "\n"
+                "By default, without this flag, the communications buffer is\n"
+                "the same within all repetitions of one message size sample;\n"
+                "most likely, cache reusage is yielded and thus throughput results\n"
+                "that might be non realistic.\n"
+                "\n"
+                "With -off_cache, it is attempted to avoid cache reusage.\n"
+                "cache_size is a float for an upper bound of the size of the last level cache in MBytes\n"
+                "cache_line_size is assumed to be the size (Bytes) of a last level cache line\n"
+                "(can be an upper estimate).\n"
+                "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );\n"
+                "when repetitively using messages of a particular size, their addresses are advanced within those\n"
+                "buffers so that a single message is at least 2 cache lines after the end of the previous message.\n"
+                "Only when those buffers have been marched through (eventually), they will re-used from the beginning.\n"
+                "\n"
+                "A cache_size and a cache_line_size are assumed as statically defined\n"
+                "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered\n"
+                "\n"
+                "remark: -off_cache is effective for IMB-RMA, IMB-EXT, but not IMB-IO\n"
+                "\n"
+                "Examples:\n"
+                "-off_cache -1 (use defaults of IMB_mem_info.h);\n"
+                "-off_cache 2.5 (2.5 MB last level cache, default line size);\n"
+                "-off_cache 16,128 (16 MB last level cache, line size 128);\n"
+                "\n"
+                "NOTE: the off_cache mode might also be influenced by eventual internal\n"
+                "caching with the MPI library. This could make the interpretation\n"
+                "intricate.\n"
+                "\n"
+                "Default:\n"
+                "no cache control, data likely to come out of cache most of the time\n");
+    parser.add_vector<int>("iter", "1000,40,100", ',', 1, 3).
+           set_caption("msgspersample[,overall_vol[,msgs_nonaggr]]").
+           set_description(
+                "The argument after -iter can contain from 1 to 3 comma separated values\n"
+                "3 integer numbers override the defaults\n"
+                "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of IMB_settings.h\n"
+                "Examples:\n"
+                "-iter 2000        (override MSGSPERSAMPLE by value 2000)\n"
+                "-iter 1000,100    (override OVERALL_VOL by 100)\n"
+                "-iter 1000,40,150 (override MSGS_NONAGGR by 150)\n"
+                "\n"
+                "Default:\n"
+                "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR => IMB_settings.h\n");
+    parser.add<string>("iter_policy", "dynamic").set_caption("iter_policy").
+           set_description(
+                "The argument after -iter_policy is a one from possible strings,\n"
+                "Specifying that policy will be used for auto iteration control:\n"
+                "dynamic,multiple_np,auto,off\n"
+                "\n"
+                "Example:\n"
+                "-iter_policy auto\n"
+                "\n"
+                "Default:\n"
+                "dynamic\n");
+    parser.add<float>("time", 10.0f).set_caption("max_runtime per sample").
+           set_description(
+                "The argument after -time is a float, specifying that\n"
+                "a benchmark will run at most that many seconds per message size\n"
+                "the combination with the -iter flag or its defaults is so that always\n"
+                "the maximum number of repetitions is chosen that fulfills all restrictions\n"
+                "\n"
+                "Example:\n"
+                "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, if\n"
+                "the default (or -iter selected) number of repetitions would take longer than that)\n"
+                "\n"
+                "remark: per sample, the rough number of repetitions to fulfill the -time request\n"
+                "is estimated in preparatory runs that use ~ 1 second overhead\n"
+                "\n"
+                "Default:\n"
+                "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10\n"
+                "(new default in IMB_3.2)\n");
+    parser.add<float>("mem", 1.0f).
+           set_caption("max. per process memory for overall message buffers").
+           set_description(
+               "The argument after -mem is a float, specifying that\n"
+               "at most that many GBytes are allocated per process for the message buffers\n"
+               "if the size is exceeded, a warning will be output, stating how much memory\n"
+               "would have been necessary, but the overall run is not interrupted\n"
+               "\n"
+               "Example:\n"
+               "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process)\n"
+               "\n"
+               "Default:\n"
+               "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h\n");
+    parser.add<string>("msglen", "").set_caption("Lengths_file").
+           set_description(
+               "The argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
+               "message lengths, 1 per line\n"
+               "\n"
+               "Default:\n"
+               "no lengths_file, lengths defined by settings.h, settings_io.h\n");
+    parser.add_vector<int>("map", "0x0", 'x', 2, 2).set_caption("PxQ").
+           set_description(
+               "The argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
+               "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
+               "the basic communicator is set up as P by Q process grid\n"
+               "\n"
+               "If, e.g., one runs on N nodes of X processors each, and inserts\n"
+               "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
+               "running PingPong with P=X, Q=2 would measure inter-node performance\n"
+               "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
+               "first priority)\n"
+               "\n"
+               "Default:\n"
+               "Q=1\n");
+    parser.add_vector<int>("msglog", "0:22", ':', 1, 2).
+           set_caption("min_msglog:max_msglog").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+               "the argument after -msglog is min:max, where min and max are non-negative integer numbers,\n"
+               "min < max, min is such that the second smallest data transfer size is max(unit, 2^min)\n"
+               "(the smallest always being 0), where unit = sizeof(float) for reductions, and unit = 1,\n"
+               "otherwise. max is such that 2^max is largest messages size, and max must be less than 31\n");
+    parser.add<bool>("root_shift", false).set_caption("on or off").
+           set_description(
+               "Controls root change at each iteration step for certain collective benchmarks,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");
+    parser.add<bool>("sync", true).set_caption("on or off").
+           set_description(
+               "Controls whether all processes are syncronized at each iteration step in collective benchmarks,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)"
+               "\n"
+               "Default:\n"
+               "on\n");
+    parser.add<bool>("imb_barrier", false).set_caption("on or off").
+           set_description(
+               "Use internal MPI-independent barrier syncronization implementation,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");                   
+    parser.set_default_current_group();
+    return true;
+}
+
+#define BASIC_INPUT_EXPERIMENT 1
+
+template <typename T>
+void preprocess_list(T &list) {
+    T tmp;
+    transform(list.begin(), list.end(), inserter(tmp, tmp.end()), tolower);
+    list = tmp;
+}
+
+template <> bool BenchmarkSuite<BS_NBC>::prepare(const args_parser &parser, const vector<string> &benchs,
+                                                  const vector<string> &unknown_args, std::ostream &output) {
+    using namespace NS_NBC;
+    for (vector<string>::const_iterator it = unknown_args.begin(); it != unknown_args.end(); ++it) {
+        output << "Invalid benchmark name " << *it << endl;
+    }
+    vector<string> all_benchs, spare_benchs = benchs, intersection = benchs;
+    BenchmarkSuite<BS_NBC>::get_full_list(all_benchs);
+    set_operations::exclude(spare_benchs, all_benchs);
+    set_operations::exclude(intersection, spare_benchs);
+    if (intersection.size() == 0)
+        return true;
+
+    prepared = true;
+
+    IMB_set_default(&c_info);
+    IMB_init_pointers(&c_info);
+
+#if BASIC_INPUT_EXPERIMENT == 1
+    {
+        /* run time control as default */
+        ITERATIONS.n_sample=0;
+        ITERATIONS.off_cache=0;
+        ITERATIONS.cache_size=-1;
+        ITERATIONS.s_offs = ITERATIONS.r_offs = 0;
+        ITERATIONS.s_cache_iter = ITERATIONS.r_cache_iter = 1;
+        ITERATIONS.msgspersample=MSGSPERSAMPLE;
+        ITERATIONS.msgs_nonaggr=MSGS_NONAGGR;
+        ITERATIONS.overall_vol=OVERALL_VOL;
+        ITERATIONS.secs=SECS_PER_SAMPLE;
+        ITERATIONS.iter_policy=ITER_POLICY;
+        ITERATIONS.numiters=(int*)NULL;
+
+        MPI_Comm_rank(MPI_COMM_WORLD,&c_info.w_rank);
+        MPI_Comm_size(MPI_COMM_WORLD,&c_info.w_num_procs);
+
+        unit = stdout;
+
+        if( c_info.w_rank == 0 && strlen(OUTPUT_FILENAME) > 0 )
+            unit = fopen(OUTPUT_FILENAME,"w");
+
+        c_info.group_mode = -1;
+        glob.NP_min=2;
+    }  
+    bool cmd_line_error = false;
+
+    // npmin
+    glob.NP_min = parser.get<int>("npmin");
+    if (glob.NP_min <= 0) {
+        cmd_line_error = true;
+    }
+
+    // multi
+    c_info.group_mode = parser.get<int>("multi");
+
+    // off_cache
+    vector<float> csize;
+    parser.get<float>("off_cache", csize);
+    if (csize.size() == 1) {
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = CACHE_LINE_SIZE;
+        if (ITERATIONS.cache_size < 0.0) {
+            ITERATIONS.cache_size = CACHE_SIZE;
+        }
+    } else {
+        assert(csize.size() == 2);
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = (int)csize[1];
+        if (csize[1] != floor(csize[1])) {
+            cmd_line_error = true;
+        }
+    }
+    if (ITERATIONS.cache_size > 0.0)
+        ITERATIONS.off_cache = 1;
+
+    // iter
+    vector<int> given_iter;
+    parser.get<int>("iter", given_iter);
+    ITERATIONS.msgspersample = given_iter[0];
+    ITERATIONS.overall_vol = given_iter[1] * 1024 * 1024;
+    ITERATIONS.msgs_nonaggr = given_iter[2];
+
+    // iter_policy
+    string given_iter_policy = parser.get<string>("iter_policy");
+    if (given_iter_policy == "dynamic") { ITERATIONS.iter_policy = imode_dynamic; }
+    if (given_iter_policy == "off") { ITERATIONS.iter_policy = imode_off; }
+    if (given_iter_policy == "multiple_np") { ITERATIONS.iter_policy = imode_multiple_np; }
+    if (given_iter_policy == "auto") { ITERATIONS.iter_policy = imode_auto; }
+
+    // time
+    ITERATIONS.secs = parser.get<float>("time");
+
+    // mem
+    c_info.max_mem = parser.get<float>("mem");
+
+    // map
+    vector<int> given_map;
+    parser.get<int>("map", given_map);
+    c_info.px = given_map[0];
+    c_info.py = given_map[1];
+    if (c_info.px * c_info.py > c_info.w_num_procs) {
+        cmd_line_error = true;
+    }
+
+    // msglen
+    string given_msglen_filename = parser.get<string>("msglen");
+    if (given_msglen_filename != "") {
+        if (!load_msg_sizes(given_msglen_filename.c_str())) {
+            output << "Sizes File " << given_msglen_filename << " invalid or doesnt exist" << endl;
+            cmd_line_error = true;
+        }
+    }
+
+    // msglog
+    vector<int> given_msglog;
+    parser.get<int>("msglog", given_msglog);
+    if (given_msglog.size() == 1) {
+        c_info.min_msg_log = 0;
+        c_info.max_msg_log = given_msglog[0];
+    } else {
+        c_info.min_msg_log = given_msglog[0];
+        c_info.max_msg_log = given_msglog[1];
+    }
+    const int MAX_INT_LOG = 31;
+    if (c_info.min_msg_log < 0 || c_info.min_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < 0 || c_info.max_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < c_info.min_msg_log)
+        cmd_line_error = true;
+    
+    // root_shift
+    c_info.root_shift = (parser.get<bool>("root_shift") ? 1 : 0);
+
+    // sync
+    c_info.sync = (parser.get<bool>("sync") ? 1 : 0);
+
+    // imb_barrier
+    IMB_internal_barrier = (parser.get<bool>("imb_barrier") ? 1 : 0);
+
+    if (cmd_line_error)
+        return false;
+
+    if (ITERATIONS.iter_policy != imode_off &&
+        ITERATIONS.iter_policy != imode_invalid &&
+        c_info.n_lens > 0) {
+        ITERATIONS.numiters = (int *)malloc(c_info.n_lens * sizeof(int));
+    }
+
+#endif
+    
+#if BASIC_INPUT_EXPERIMENT == 0
+    struct Bench *BList;
+    char *argv[] = { "" };
+    int argc = 0;
+    IMB_basic_input(&c_info, &BList, &ITERATIONS, &argc, (char ***)argv, &glob.NP_min);
+#endif    
+
+    if (c_info.w_rank == 0 ) {
+        IMB_general_info();
+        fprintf(unit,"\n\n# Calling sequence was: \n\n");
+        string cmd_line;
+        parser.get_command_line(cmd_line);
+        fprintf(unit, "# %s\n\n", cmd_line.c_str());
+        if (c_info.n_lens) {
+            fprintf(unit,"# Message lengths were user defined\n");
+        } else {
+            fprintf(unit,"# Minimum message length in bytes:   %d\n",0);
+            fprintf(unit,"# Maximum message length in bytes:   %d\n", 1<<c_info.max_msg_log);
+        }
+
+        fprintf(unit,"#\n");
+        fprintf(unit,"# MPI_Datatype                   :   MPI_BYTE \n");
+        fprintf(unit,"# MPI_Datatype for reductions    :   MPI_FLOAT\n");
+        fprintf(unit,"# MPI_Op                         :   MPI_SUM  \n");
+        fprintf(unit,"#\n");
+        fprintf(unit,"#\n");
+        fprintf(unit,"\n");
+        fprintf(unit,"# List of Benchmarks to run:\n\n");
+        for (vector<string>::iterator it = intersection.begin(); it != intersection.end(); ++it) {
+            printf("# %s\n", it->c_str());
+            std::vector<std::string> comments = create(it->c_str())->get_comments();
+            for (vector<string>::iterator it_com = comments.begin(); it_com != comments.end(); ++it_com) {
+                printf("#     %s\n", it_com->c_str());
+            }
+        }
+    }
+    return true;
+}
+
+template <> void BenchmarkSuite<BS_NBC>::finalize(const vector<string> &benchs,
+                                                   std::ostream &output) {
+    UNUSED(output);
+    using namespace NS_NBC;
+    if (!prepared)
+        return;
+    for (vector<string>::const_iterator it = benchs.begin(); it != benchs.end(); ++it) {
+        smart_ptr<Benchmark> b = get_instance().create(*it);
+        if (b.get() == NULL) 
+            continue;
+        // do nothing
+    }
+    if (c_info.w_rank == 0) {
+        fprintf(unit,"\n\n# All processes entering MPI_Finalize\n\n");
+    }
+}
+
+template <> void BenchmarkSuite<BS_NBC>::get_bench_list(set<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_NBC>::get_full_list(benchs);
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (set<string>::iterator it = benchs.begin(); it != benchs.end(); ++it) {
+            smart_ptr<Benchmark> b = get_instance().create(*it);
+            if (b.get() == NULL)            
+                continue;
+            if (!b->is_default()) 
+                benchs.erase(it);
+        }
+    }
+}
+
+template <> void BenchmarkSuite<BS_NBC>::get_bench_list(vector<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_NBC>::get_full_list(benchs);
+    if (benchs.size() == 0)
+        return;
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (size_t i = benchs.size() - 1; i != 0; i--) {
+            smart_ptr<Benchmark> b = get_instance().create(benchs[i]);
+            if (b.get() == NULL) {
+                continue;
+            }
+            if (!b->is_default()) 
+                benchs.erase(benchs.begin() + i);
+        }
+    }
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+
+template<> any BenchmarkSuite<BS_NBC>::get_parameter(const std::string &key) {
+    using namespace NS_NBC;
+    any result;
+    HANDLE_PARAMETER(comm_info, c_info);
+    HANDLE_PARAMETER(iter_schedule, ITERATIONS);
+    HANDLE_PARAMETER(GLOBALS, glob);
+    return result;
+}
+
+#ifdef WIN32
+template BenchmarkSuite<BS_NBC>;
+#endif
diff --git a/src_cpp/RMA/Makefile.RMA.mk b/src_cpp/RMA/Makefile.RMA.mk
new file mode 100644
index 00000000..8feebcd2
--- /dev/null
+++ b/src_cpp/RMA/Makefile.RMA.mk
@@ -0,0 +1,78 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+include helpers/Makefile.*.mk
+
+override CPPFLAGS += -DRMA
+
+BECHMARK_SUITE_SRC += RMA/RMA_suite.cpp RMA/RMA_benchmark.cpp
+C_SRC = $(C_SRC_DIR)/IMB_utils.c \
+$(C_SRC_DIR)/IMB_declare.c \
+$(C_SRC_DIR)/IMB_init.c \
+$(C_SRC_DIR)/IMB_mem_manager.c \
+$(C_SRC_DIR)/IMB_benchlist.c \
+$(C_SRC_DIR)/IMB_parse_name_rma.c \
+$(C_SRC_DIR)/IMB_strgs.c \
+$(C_SRC_DIR)/IMB_err_handler.c \
+$(C_SRC_DIR)/IMB_g_info.c \
+$(C_SRC_DIR)/IMB_warm_up.c \
+$(C_SRC_DIR)/IMB_output.c \
+$(C_SRC_DIR)/IMB_init_transfer.c \
+$(C_SRC_DIR)/IMB_user_set_info.c \
+$(C_SRC_DIR)/IMB_chk_diff.c \
+$(C_SRC_DIR)/IMB_rma_put.c \
+$(C_SRC_DIR)/IMB_cpu_exploit.c \
+$(C_SRC_DIR)/IMB_rma_get.c \
+$(C_SRC_DIR)/IMB_rma_atomic.c
+C_OBJ=$(subst $(C_SRC_DIR),RMA,$(C_SRC:.c=.o))
+ADDITIONAL_OBJ += $(C_OBJ)
+
+RMA/%.o: $(C_SRC_DIR)/%.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DRMA -c -o $@ $<
diff --git a/src_cpp/RMA/Makefile_win.RMA.mk b/src_cpp/RMA/Makefile_win.RMA.mk
new file mode 100644
index 00000000..39777feb
--- /dev/null
+++ b/src_cpp/RMA/Makefile_win.RMA.mk
@@ -0,0 +1,89 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+!INCLUDE  helpers/Makefile_win.helpers.mk
+
+CPPFLAGS = $(CPPFLAGS) -DRMA
+CFLAGS = $(CFLAGS) -DRMA
+C_SRC_DIR = ../$(C_SRC_DIR)
+
+C_OBJ = IMB_declare.obj \
+             IMB_utils.obj \
+             IMB_init.obj \
+             IMB_mem_manager.obj \
+             IMB_benchlist.obj \
+             IMB_parse_name_rma.obj \
+             IMB_strgs.obj \
+             IMB_err_handler.obj \
+             IMB_g_info.obj \
+             IMB_warm_up.obj \
+             IMB_output.obj \
+             IMB_init_transfer.obj \
+             IMB_user_set_info.obj \
+             IMB_chk_diff.obj \
+             IMB_rma_put.obj \
+             IMB_cpu_exploit.obj \
+             IMB_rma_get.obj \
+             IMB_rma_atomic.obj
+
+BECHMARK_SUITE_OBJ = RMA_suite.obj \
+                     RMA_benchmark.obj\
+                     imb.obj args_parser.obj \
+                     args_parser_utests.obj \
+                     scope.obj \
+                     benchmark_suites_collection.obj
+
+{$(C_SRC_DIR)/}.c.obj:
+	$(CC) /I"$(MPI_INCLUDE)" $(CFLAGS) -c $(C_SRC_DIR)/$*.c
+
+{../}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c ../$*.cpp
+
+{RMA/}.cpp.obj:
+	$(CPP) /I"$(MPI_INCLUDE)" /I. $(CPPFLAGS) -c RMA/$*.cpp
diff --git a/src_cpp/RMA/RMA_benchmark.cpp b/src_cpp/RMA/RMA_benchmark.cpp
new file mode 100644
index 00000000..98a1c5f0
--- /dev/null
+++ b/src_cpp/RMA/RMA_benchmark.cpp
@@ -0,0 +1,313 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+#include "smart_ptr.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+#include "original_benchmark.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+#define BENCHMARK(BMRK_FN, BMRK_NAME) template class OriginalBenchmark<BenchmarkSuite<BS_RMA>, BMRK_FN>; \
+DECLARE_INHERITED_TEMPLATE(GLUE_TYPENAME(OriginalBenchmark<BenchmarkSuite<BS_RMA>, BMRK_FN>), BMRK_NAME) \
+template<> smart_ptr<Bmark_descr> OriginalBenchmark<BenchmarkSuite<BS_RMA>, BMRK_FN>::descr = NULL; \
+template<> bool OriginalBenchmark<BenchmarkSuite<BS_RMA>, BMRK_FN>::init_description() 
+
+
+BENCHMARK(IMB_rma_single_put, Unidir_put)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_rma_Bidir_put(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_rma_single_put(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_rma_Bidir_put, Bidir_put)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(BIDIR_1);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_single_get, Unidir_get)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_rma_Bidir_get(struct comm_info* c_info, int size,
+                      struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_rma_single_get(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_rma_Bidir_get, Bidir_get)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(BIDIR_1);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_rma_put_local, Put_local)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_get_local, Get_local)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_rma_put_all_local, Put_all_local)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(MULT_PASSIVE_TRANSFER);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_get_all_local, Get_all_local)
+{
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(MULT_PASSIVE_TRANSFER);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_rma_put_all, One_put_all)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(MULT_PASSIVE_TRANSFER);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+void IMB_rma_All_put_all(struct comm_info* c_info, int size,
+                         struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_rma_put_all(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_rma_All_put_all, All_put_all)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_get_all, One_get_all)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(MULT_PASSIVE_TRANSFER);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(GET);
+    return true;
+}
+
+void IMB_rma_All_get_all(struct comm_info* c_info, int size,
+                         struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) {
+    IMB_rma_get_all(c_info, size, ITERATIONS, RUN_MODE, time);
+}
+
+BENCHMARK(IMB_rma_All_get_all, All_get_all)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_rma_exchange_put, Exchange_put)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_2I);
+    descr->flags.insert(RECVBUF_SIZE_2I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_exchange_get, Exchange_get)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_2I);
+    descr->flags.insert(RECVBUF_SIZE_2I);
+    descr->flags.insert(COLLECTIVE);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(GET);
+    return true;
+}
+
+BENCHMARK(IMB_rma_accumulate, Accumulate)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_get_accumulate, Get_accumulate)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_fetch_and_op, Fetch_and_op)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_ELEMENT_TRANSFER);
+    descr->flags.insert(REDUCTION);
+    descr->flags.insert(PUT);
+    return true;
+}
+
+BENCHMARK(IMB_rma_compare_and_swap, Compare_and_swap)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_3I);
+    descr->flags.insert(SINGLE_ELEMENT_TRANSFER);
+    descr->flags.insert(PUT);
+    descr->comments.push_back("Uses MPI_INT data type");
+    return true;
+}
+
+BENCHMARK(IMB_rma_passive_put, Truly_passive_put)
+{
+    descr->flags.insert(DEFAULT);
+    descr->flags.insert(SENDBUF_SIZE_I);
+    descr->flags.insert(RECVBUF_SIZE_I);
+    descr->flags.insert(SINGLE_TRANSFER);
+    descr->flags.insert(N_MODES_1);
+    descr->flags.insert(NON_AGGREGATE);
+    descr->flags.insert(NONBLOCKING);
+    descr->flags.insert(PUT);
+    descr->comments.push_back("The benchmark measures execution time of MPI_Put for 2 cases:");
+    descr->comments.push_back("1) The target is waiting in MPI_Barrier call (t_pure value)");
+    descr->comments.push_back("2) The target performs computation and then enters MPI_Barrier routine (t_ovrl value)");
+    return true;
+}
diff --git a/src_cpp/RMA/RMA_suite.cpp b/src_cpp/RMA/RMA_suite.cpp
new file mode 100644
index 00000000..d1bbe72e
--- /dev/null
+++ b/src_cpp/RMA/RMA_suite.cpp
@@ -0,0 +1,586 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#if defined MPI1 || defined NBC || defined MPIIO || defined EXT
+#error Legacy benchmark components can't be linked together
+#endif
+
+#include <set>
+#include <vector>
+#include <string>
+#include <map>
+#include <algorithm>
+#include <mpi.h>
+#include "args_parser.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "any.h"
+#include "benchmark_suite.h"
+
+extern "C" {
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+#include "IMB_prototypes.h"
+}
+
+#include "helper_IMB_functions.h"
+
+using namespace std;
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_RMA, IMB-RMA)
+
+namespace NS_RMA {
+    struct comm_info c_info;
+    struct iter_schedule ITERATIONS;
+    struct GLOBALS glob;
+    bool prepared = false;
+}
+
+bool load_msg_sizes(const char *filename)
+{
+    using namespace NS_RMA;
+
+    FILE*t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    int n_lens = 0;
+    char inp_line[72];
+    while(fgets(inp_line,72,t)) {
+        if( inp_line[0] != '#' && strlen(inp_line)>1 )
+            n_lens++;
+    }
+    fclose(t);
+
+    if (n_lens == 0)
+        return false;
+
+    t = fopen(filename, "r");
+    if (t == NULL)
+        return false;
+
+    c_info.n_lens = n_lens;
+
+    char S[32];
+    int sz, isz;
+
+    c_info.msglen = (int *)malloc(n_lens * sizeof(int));
+
+    isz=-1;
+
+    while(fgets(inp_line,72,t)) {
+        S[0]='\0';
+        if( inp_line[0] != '#' && strlen(inp_line)-1 ) {
+            int ierr;
+            sz=0;
+
+            ierr=sscanf(&inp_line[0],"%d%s",&sz,&S[0]);
+            if( ierr<=0 || ierr==EOF || sz<0 ) {
+                ierr=-1;
+            } else if(ierr==2) {
+                if      (S[0]=='k' ||  S[0]=='K') {sz=sz*1024;}
+                else if (S[0]=='m' ||  S[0]=='M') {sz=sz*1024*1024;}
+                else {
+                    ierr=-1;
+                }
+            } /*else if(ierr==2) */
+
+            if( ierr>0 ) {
+                isz++;
+                c_info.msglen[isz]=sz;
+            } else {
+                return false;
+            }
+        } /*if( inp_line[0] != '#' && strlen(inp_line)-1 )*/
+    } /*while(fgets(inp_line,72,t))*/
+
+    n_lens = c_info.n_lens = isz + 1;
+    fclose(t);
+
+    if (n_lens==0)
+        return false;
+    return true;
+}
+
+template <> bool BenchmarkSuite<BS_RMA>::declare_args(args_parser &parser, std::ostream &output) const {
+    UNUSED(output);
+    parser.set_current_group(get_name());
+    parser.add<int>("npmin", 2).set_caption("NPmin").
+        set_description(
+            "The argument after npmin is NPmin,\n"
+            "the minimum number of processes to run on\n"
+            "(then if IMB is started on NP processes, the process numbers\n"
+            "NPmin, 2*NPmin, ... ,2^k * NPmin < NP, NP are used)\n"
+            "To run on just NP processes, run IMB on NP and select -npmin NP\n"
+            "\n"
+            "Default:\n"
+            "NPmin=2\n");
+    parser.add<int>("multi", -1).set_caption("MultiMode").
+        set_description(
+            "The argument after -multi is MultiMode (0 or 1)\n"
+            "\n"
+            "If -multi is selected, running the N process version of a benchmark\n"
+            "on NP overall, means running on (NP/N) simultaneous groups of N each.\n"
+            "\n"
+            "MultiMode only controls default (0) or extensive (1) output charts.\n"
+            "0: only lowest performance groups is output\n"
+            "1: all groups are output\n"
+            "\n"
+            "Default:\n"
+            "multi off\n");
+    parser.add_vector<float>("off_cache", "-1.0,0.0", ',', 1, 2).
+           set_caption("cache_size[,cache_line_size]").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+                "the argument after off_cache can be either 1 single number (cache_size),\n"
+                "or 2 comma separated numbers (cache_size,cache_line_size), or just -1\n"
+                "\n"
+                "By default, without this flag, the communications buffer is\n"
+                "the same within all repetitions of one message size sample;\n"
+                "most likely, cache reusage is yielded and thus throughput results\n"
+                "that might be non realistic.\n"
+                "\n"
+                "With -off_cache, it is attempted to avoid cache reusage.\n"
+                "cache_size is a float for an upper bound of the size of the last level cache in MBytes\n"
+                "cache_line_size is assumed to be the size (Bytes) of a last level cache line\n"
+                "(can be an upper estimate).\n"
+                "The sent/recv'd data are stored in buffers of size ~ 2 x MAX( cache_size, message_size );\n"
+                "when repetitively using messages of a particular size, their addresses are advanced within those\n"
+                "buffers so that a single message is at least 2 cache lines after the end of the previous message.\n"
+                "Only when those buffers have been marched through (eventually), they will re-used from the beginning.\n"
+                "\n"
+                "A cache_size and a cache_line_size are assumed as statically defined\n"
+                "in  => IMB_mem_info.h; these are used when -off_cache -1 is entered\n"
+                "\n"
+                "remark: -off_cache is effective for IMB-RMA, IMB-EXT, but not IMB-IO\n"
+                "\n"
+                "Examples:\n"
+                "-off_cache -1 (use defaults of IMB_mem_info.h);\n"
+                "-off_cache 2.5 (2.5 MB last level cache, default line size);\n"
+                "-off_cache 16,128 (16 MB last level cache, line size 128);\n"
+                "\n"
+                "NOTE: the off_cache mode might also be influenced by eventual internal\n"
+                "caching with the MPI library. This could make the interpretation\n"
+                "intricate.\n"
+                "\n"
+                "Default:\n"
+                "no cache control, data likely to come out of cache most of the time\n");
+    parser.add_vector<int>("iter", "1000,40,100", ',', 1, 3).
+           set_caption("msgspersample[,overall_vol[,msgs_nonaggr]]").
+           set_description(
+                "The argument after -iter can contain from 1 to 3 comma separated values\n"
+                "3 integer numbers override the defaults\n"
+                "MSGSPERSAMPLE, OVERALL_VOL, MSGS_NONAGGR of IMB_settings.h\n"
+                "Examples:\n"
+                "-iter 2000        (override MSGSPERSAMPLE by value 2000)\n"
+                "-iter 1000,100    (override OVERALL_VOL by 100)\n"
+                "-iter 1000,40,150 (override MSGS_NONAGGR by 150)\n"
+                "\n"
+                "Default:\n"
+                "iteration control through parameters MSGSPERSAMPLE,OVERALL_VOL,MSGS_NONAGGR => IMB_settings.h\n");
+    parser.add<string>("iter_policy", "dynamic").set_caption("iter_policy").
+           set_description(
+                "The argument after -iter_policy is a one from possible strings,\n"
+                "Specifying that policy will be used for auto iteration control:\n"
+                "dynamic,multiple_np,auto,off\n"
+                "\n"
+                "Example:\n"
+                "-iter_policy auto\n"
+                "\n"
+                "Default:\n"
+                "dynamic\n");
+    parser.add<float>("time", 10.0f).set_caption("max_runtime per sample").
+           set_description(
+                "The argument after -time is a float, specifying that\n"
+                "a benchmark will run at most that many seconds per message size\n"
+                "the combination with the -iter flag or its defaults is so that always\n"
+                "the maximum number of repetitions is chosen that fulfills all restrictions\n"
+                "\n"
+                "Example:\n"
+                "-time 0.150       (a benchmark will (roughly) run at most 150 milli seconds per message size, if\n"
+                "the default (or -iter selected) number of repetitions would take longer than that)\n"
+                "\n"
+                "remark: per sample, the rough number of repetitions to fulfill the -time request\n"
+                "is estimated in preparatory runs that use ~ 1 second overhead\n"
+                "\n"
+                "Default:\n"
+                "A fixed time limit SECS_PER_SAMPLE =>IMB_settings.h; currently set to 10\n"
+                "(new default in IMB_3.2)\n");
+    parser.add<float>("mem", 1.0f).
+           set_caption("max. per process memory for overall message buffers").
+           set_description(
+               "The argument after -mem is a float, specifying that\n"
+               "at most that many GBytes are allocated per process for the message buffers\n"
+               "if the size is exceeded, a warning will be output, stating how much memory\n"
+               "would have been necessary, but the overall run is not interrupted\n"
+               "\n"
+               "Example:\n"
+               "-mem 0.2         (restrict memory for message buffers to 200 MBytes per process)\n"
+               "\n"
+               "Default:\n"
+               "the memory is restricted by MAX_MEM_USAGE => IMB_mem_info.h\n");
+    parser.add<string>("msglen", "").set_caption("Lengths_file").
+           set_description(
+               "The argument after -msglen is a lengths_file, an ASCII file, containing any set of nonnegative\n"
+               "message lengths, 1 per line\n"
+               "\n"
+               "Default:\n"
+               "no lengths_file, lengths defined by settings.h, settings_io.h\n");
+    parser.add_vector<int>("map", "1x1", 'x', 2, 2).set_caption("PxQ").
+           set_description(
+               "The argument after -map is PxQ, P,Q are integer numbers with P*Q <= NP\n"
+               "enter PxQ with the 2 numbers separated by letter \"x\" and no blancs\n"
+               "the basic communicator is set up as P by Q process grid\n"
+               "\n"
+               "If, e.g., one runs on N nodes of X processors each, and inserts\n"
+               "P=X, Q=N, then the numbering of processes is \"inter node first\"\n"
+               "running PingPong with P=X, Q=2 would measure inter-node performance\n"
+               "(assuming MPI default would apply 'normal' mapping, i.e. fill nodes\n"
+               "first priority)\n"
+               "\n"
+               "Default:\n"
+               "Q=1\n");
+    parser.add_vector<int>("msglog", "0:22", ':', 1, 2).
+           set_caption("min_msglog:max_msglog").
+           set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING).
+           set_description(
+               "the argument after -msglog is min:max, where min and max are non-negative integer numbers,\n"
+               "min < max, min is such that the second smallest data transfer size is max(unit, 2^min)\n"
+               "(the smallest always being 0), where unit = sizeof(float) for reductions, and unit = 1,\n"
+               "otherwise. max is such that 2^max is largest messages size, and max must be less than 31\n");
+    parser.add<bool>("root_shift", false).set_caption("on or off").
+           set_description(
+               "Controls root change at each iteration step for certain collective benchmarks,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");
+    parser.add<bool>("sync", true).set_caption("on or off").
+           set_description(
+               "Controls whether all processes are syncronized at each iteration step in collective benchmarks,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)"
+               "\n"
+               "Default:\n"
+               "on\n");
+    parser.add<bool>("imb_barrier", false).set_caption("on or off").
+           set_description(
+               "Use internal MPI-independent barrier syncronization implementation,\n"
+               "possible argument values are on (1|enable|yes) or off (0|disable|no)\n"
+               "\n"
+               "Default:\n"
+               "off\n");
+    parser.set_default_current_group();
+    return true;
+}
+
+#define BASIC_INPUT_EXPERIMENT 1
+
+template <typename T>
+void preprocess_list(T &list) {
+    T tmp;
+    transform(list.begin(), list.end(), inserter(tmp, tmp.end()), tolower);
+    list = tmp;
+}
+
+template <> bool BenchmarkSuite<BS_RMA>::prepare(const args_parser &parser, const vector<string> &benchs,
+                                                  const vector<string> &unknown_args, std::ostream &output) {
+    using namespace NS_RMA;
+    for (vector<string>::const_iterator it = unknown_args.begin(); it != unknown_args.end(); ++it) {
+        output << "Invalid benchmark name " << *it << endl;
+    }
+    vector<string> all_benchs, spare_benchs = benchs, intersection = benchs;
+    BenchmarkSuite<BS_RMA>::get_full_list(all_benchs);
+    set_operations::exclude(spare_benchs, all_benchs);
+    set_operations::exclude(intersection, spare_benchs);
+    if (intersection.size() == 0)
+        return true;
+
+    prepared = true;
+
+    IMB_set_default(&c_info);
+    IMB_init_pointers(&c_info);
+
+#if BASIC_INPUT_EXPERIMENT == 1
+    {
+        /* run time control as default */
+        ITERATIONS.n_sample=0;
+        ITERATIONS.off_cache=0;
+        ITERATIONS.cache_size=-1;
+        ITERATIONS.s_offs = ITERATIONS.r_offs = 0;
+        ITERATIONS.s_cache_iter = ITERATIONS.r_cache_iter = 1;
+        ITERATIONS.msgspersample=MSGSPERSAMPLE;
+        ITERATIONS.msgs_nonaggr=MSGS_NONAGGR;
+        ITERATIONS.overall_vol=OVERALL_VOL;
+        ITERATIONS.secs=SECS_PER_SAMPLE;
+        ITERATIONS.iter_policy=ITER_POLICY;
+        ITERATIONS.numiters=(int*)NULL;
+
+        MPI_Comm_rank(MPI_COMM_WORLD,&c_info.w_rank);
+        MPI_Comm_size(MPI_COMM_WORLD,&c_info.w_num_procs);
+
+        unit = stdout;
+
+        if( c_info.w_rank == 0 && strlen(OUTPUT_FILENAME) > 0 )
+            unit = fopen(OUTPUT_FILENAME,"w");
+
+        c_info.group_mode = -1;
+        glob.NP_min=2;
+    }  
+    bool cmd_line_error = false;
+
+    // npmin
+    glob.NP_min = parser.get<int>("npmin");
+    if (glob.NP_min <= 0) {
+        cmd_line_error = true;
+    }
+
+    // multi
+    c_info.group_mode = parser.get<int>("multi");
+
+    // off_cache
+    vector<float> csize;
+    parser.get<float>("off_cache", csize);
+    if (csize.size() == 1) {
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = CACHE_LINE_SIZE;
+        if (ITERATIONS.cache_size < 0.0) {
+            ITERATIONS.cache_size = CACHE_SIZE;
+        }
+    } else {
+        assert(csize.size() == 2);
+        ITERATIONS.cache_size = csize[0];
+        ITERATIONS.cache_line_size = (int)csize[1];
+        if (csize[1] != floor(csize[1])) {
+            cmd_line_error = true;
+        }
+    }
+    if (ITERATIONS.cache_size > 0.0)
+        ITERATIONS.off_cache = 1;
+
+    // iter
+    vector<int> given_iter;
+    parser.get<int>("iter", given_iter);
+    ITERATIONS.msgspersample = given_iter[0];
+    ITERATIONS.overall_vol = given_iter[1] * 1024 * 1024;
+    ITERATIONS.msgs_nonaggr = given_iter[2];
+
+    // iter_policy
+    string given_iter_policy = parser.get<string>("iter_policy");
+    if (given_iter_policy == "dynamic") { ITERATIONS.iter_policy = imode_dynamic; }
+    if (given_iter_policy == "off") { ITERATIONS.iter_policy = imode_off; }
+    if (given_iter_policy == "multiple_np") { ITERATIONS.iter_policy = imode_multiple_np; }
+    if (given_iter_policy == "auto") { ITERATIONS.iter_policy = imode_auto; }
+
+    // time
+    ITERATIONS.secs = parser.get<float>("time");
+
+    // mem
+    c_info.max_mem = parser.get<float>("mem");
+
+    // map
+    vector<int> given_map;
+    parser.get<int>("map", given_map);
+    c_info.px = given_map[0];
+    c_info.py = given_map[1];
+    if (c_info.px * c_info.py > c_info.w_num_procs) {
+        cmd_line_error = true;
+    }
+
+    // msglen
+    string given_msglen_filename = parser.get<string>("msglen");
+    if (given_msglen_filename != "") {
+        if (!load_msg_sizes(given_msglen_filename.c_str())) {
+            output << "Sizes File " << given_msglen_filename << " invalid or doesnt exist" << endl;
+            cmd_line_error = true;
+        }
+    }
+
+    // msglog
+    vector<int> given_msglog;
+    parser.get<int>("msglog", given_msglog);
+    if (given_msglog.size() == 1) {
+        c_info.min_msg_log = 0;
+        c_info.max_msg_log = given_msglog[0];
+    } else {
+        c_info.min_msg_log = given_msglog[0];
+        c_info.max_msg_log = given_msglog[1];
+    }
+    const int MAX_INT_LOG = 31;
+    if (c_info.min_msg_log < 0 || c_info.min_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < 0 || c_info.max_msg_log >= MAX_INT_LOG)
+        cmd_line_error = true;
+    if (c_info.max_msg_log < c_info.min_msg_log)
+        cmd_line_error = true;
+    
+    // root_shift
+    c_info.root_shift = (parser.get<bool>("root_shift") ? 1 : 0);
+
+    // sync
+    c_info.sync = (parser.get<bool>("sync") ? 1 : 0);
+
+    // imb_barrier
+    IMB_internal_barrier = (parser.get<bool>("imb_barrier") ? 1 : 0);
+
+    if (cmd_line_error)
+        return false;
+
+    if (ITERATIONS.iter_policy != imode_off &&
+        ITERATIONS.iter_policy != imode_invalid &&
+        c_info.n_lens > 0) {
+        ITERATIONS.numiters = (int *)malloc(c_info.n_lens * sizeof(int));
+    }
+
+#endif
+    
+#if BASIC_INPUT_EXPERIMENT == 0
+    struct Bench *BList;
+    char *argv[] = { "" };
+    int argc = 0;
+    IMB_basic_input(&c_info, &BList, &ITERATIONS, &argc, (char ***)argv, &glob.NP_min);
+#endif    
+
+    if (c_info.w_rank == 0 ) {
+        IMB_general_info();
+        fprintf(unit,"\n\n# Calling sequence was: \n\n");
+        string cmd_line;
+        parser.get_command_line(cmd_line);
+        fprintf(unit, "# %s \n\n", cmd_line.c_str());
+        if (c_info.n_lens) {
+            fprintf(unit,"# Message lengths were user defined\n");
+        } else {
+            fprintf(unit,"# Minimum message length in bytes:   %d\n",0);
+            fprintf(unit,"# Maximum message length in bytes:   %d\n", 1<<c_info.max_msg_log);
+        }
+
+        fprintf(unit,"#\n");
+        fprintf(unit,"# MPI_Datatype                   :   MPI_BYTE \n");
+        fprintf(unit,"# MPI_Datatype for reductions    :   MPI_FLOAT\n");
+        fprintf(unit,"# MPI_Op                         :   MPI_SUM  \n");
+        fprintf(unit,"# \n");
+        fprintf(unit,"# \n");
+        fprintf(unit,"\n");
+        fprintf(unit,"# List of Benchmarks to run:\n\n");
+        for (vector<string>::iterator it = intersection.begin(); it != intersection.end(); ++it) {
+            printf("# %s\n", it->c_str());
+            std::vector<std::string> comments = create(it->c_str())->get_comments();
+            for (vector<string>::iterator it_com = comments.begin(); it_com != comments.end(); ++it_com) {
+                printf("#     %s\n", it_com->c_str());
+            }
+        }
+    }
+    return true;
+}
+
+template <> void BenchmarkSuite<BS_RMA>::finalize(const vector<string> &benchs,
+                                                   std::ostream &output) {
+    UNUSED(output);
+    using namespace NS_RMA;
+    if (!prepared)
+        return;
+    for (vector<string>::const_iterator it = benchs.begin(); it != benchs.end(); ++it) {
+        smart_ptr<Benchmark> b = get_instance().create(*it);
+        if (b.get() == NULL) 
+            continue;
+        // do nothing
+    }
+    if (c_info.w_rank == 0) {
+        fprintf(unit,"\n\n# All processes entering MPI_Finalize\n\n");
+    }
+}
+
+template <> void BenchmarkSuite<BS_RMA>::get_bench_list(set<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_RMA>::get_full_list(benchs);
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (set<string>::iterator it = benchs.begin(); it != benchs.end(); ++it) {
+            smart_ptr<Benchmark> b = get_instance().create(*it);
+            if (b.get() == NULL)            
+                continue;
+            if (!b->is_default()) 
+                benchs.erase(it);
+        }
+    }
+}
+
+template <> void BenchmarkSuite<BS_RMA>::get_bench_list(vector<string> &benchs, 
+                                                         BenchmarkSuiteBase::BenchListFilter filter) const {
+    BenchmarkSuite<BS_RMA>::get_full_list(benchs);
+    if (benchs.size() == 0)
+        return;
+    if (filter == BenchmarkSuiteBase::DEFAULT_BENCHMARKS) {
+        for (size_t i = benchs.size() - 1; i != 0; i--) {
+            smart_ptr<Benchmark> b = get_instance().create(benchs[i]);
+            if (b.get() == NULL) {
+                continue;
+            }
+            if (!b->is_default()) 
+                benchs.erase(benchs.begin() + i);
+        }
+    }
+}
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+
+template<> any BenchmarkSuite<BS_RMA>::get_parameter(const std::string &key) {
+    using namespace NS_RMA;
+    any result;
+    HANDLE_PARAMETER(comm_info, c_info);
+    HANDLE_PARAMETER(iter_schedule, ITERATIONS);
+    HANDLE_PARAMETER(GLOBALS, glob);
+    return result;
+}
+
+#ifdef WIN32
+template BenchmarkSuite<BS_RMA>;
+#endif
diff --git a/src_cpp/any.h b/src_cpp/any.h
new file mode 100644
index 00000000..dd73972c
--- /dev/null
+++ b/src_cpp/any.h
@@ -0,0 +1,122 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+#include <typeinfo>
+#include "smart_ptr.h"
+
+class any
+{
+    struct holder_base
+    {
+        virtual void *get() const { return NULL; }
+        virtual const std::type_info &get_type_id() const { return typeid(void); }
+        virtual ~holder_base() {}
+        int dummy;
+    };
+    template <class type>
+    struct holder : holder_base
+    {
+        smart_ptr<type> storedObject;
+        holder(smart_ptr<type> pobject) : storedObject(pobject) {}
+        virtual void *get() const { return storedObject.get(); }
+        virtual const std::type_info &get_type_id() const { return typeid(type); }
+    };
+    smart_ptr<holder_base> held;
+public:
+    any() {}
+    template <class type>
+    any(smart_ptr<type> objectToStore) : held(new holder<type>(objectToStore))
+  {}
+    template <class type>
+    type *as() const { 
+        if (held.get() == NULL)
+            return NULL;
+        if (typeid(type) == held->get_type_id()) 
+            return static_cast<type *>(held->get()); 
+        else 
+            return NULL;
+    }
+    void detach_ptr() { held.detach(); }
+};
+
+
+/*
+#include <iostream>
+#include <vector>
+
+struct A
+{
+  void say() { std::cout << "I am A" << std::endl; }
+  ~A() { std::cout << "Destroyed an A" << std::endl; }
+};
+
+struct B
+{
+  void say() { std::cout << "I am B" << std::endl; }
+  ~B() { std::cout << "Destroyed a B" << std::endl; }
+};
+
+int main()
+{
+    std::vector<any> collection(4);
+    collection[0] = smart_ptr<A>(new A);
+    collection[1] = smart_ptr<B>(new B);
+    collection[2] = smart_ptr<A>(new A);
+    collection[3] = smart_ptr<A>(new A);
+    for (size_t i = 0; i < collection.size(); i++) {
+        A *A_ptr = collection[i].as<A>();
+        B *B_ptr = collection[i].as<B>();
+        if (A_ptr) A_ptr->say();        
+        if (B_ptr) B_ptr->say();
+    }
+    return 0;
+}
+*/
diff --git a/src_cpp/args_parser.cpp b/src_cpp/args_parser.cpp
new file mode 100644
index 00000000..99b39b5a
--- /dev/null
+++ b/src_cpp/args_parser.cpp
@@ -0,0 +1,787 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include "args_parser.h"
+#ifdef WITH_YAML_CPP
+#include "yaml-cpp/yaml.h"
+#endif
+
+#include <stdexcept>
+#include <assert.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <algorithm>
+
+using namespace std;
+
+// FIXME!!! change it to some positive integer value when it is a release time
+const int args_parser::version = 0;
+
+args_parser::value &args_parser::value::operator=(const args_parser::value &other) {
+    assert(other.initialized);
+    if (initialized) {
+        assert(other.type == type);
+    }
+    type = other.type;
+    switch (type) {
+        case STRING: str = other.str; break;
+        case INT: i = other.i; break;
+        case FLOAT: f = other.f; break;
+        case BOOL: b = other.b; break;
+        default: assert(NULL == "Impossible case in switch(type)");
+    }
+    initialized = true;
+    return *this;
+}
+
+bool args_parser::value::parse(const char *sval, arg_t _type) {
+    type = _type;
+    int res = 0;
+    switch(type) {
+        case STRING: str.assign(sval); res = 1; break;
+        case INT: res = sscanf(sval, "%d", &i); break;
+        case FLOAT: res = sscanf(sval, "%f", &f); break;
+        case BOOL: { 
+            res = 1;
+            string s; s.assign(sval);                                 
+            if (s == "on" || s == "yes" || s == "ON" || s == "YES" || 
+                s == "true" || s == "enable" || s == "TRUE" || s == "ENABLE" ||
+                s == "1") {
+                b = true;
+            } else if (s == "off" || s == "no" || s == "OFF" || s == "NO" || 
+                s == "false" || s == "disable" || s == "FALSE" || s == "DISABLE" ||
+                s == "0") {
+                b = false;
+            } else {
+                res = 0;
+            }
+            break;
+        }
+        default: assert(NULL == "Impossible case in switch(type)");
+    }
+    bool bres = ((res == 1) ? true : false);
+    if (bres) 
+        initialized = true;
+    return bres;
+}
+
+void args_parser::value::sanity_check(arg_t _type) const { 
+    assert(type == _type); 
+    assert(initialized); 
+}
+
+const string args_parser::value::get_type_str(arg_t _type) {
+    switch(_type) {
+        case STRING: return "STRING"; break;
+        case INT: return "INT"; break;
+        case FLOAT: return "FLOAT"; break;
+        case BOOL: return "BOOL"; break;
+        default: assert(NULL == "Impossible case in switch(type)");                    
+    }
+    return "";
+}
+
+#ifdef WITH_YAML_CPP
+YAML::Emitter &operator<< (YAML::Emitter& out, const args_parser::value &v) {
+    if (v.is_initialized()) {
+        switch(v.type) {
+            case args_parser::STRING: out << v.str.c_str(); break;
+            case args_parser::INT: out << v.i; break;
+            case args_parser::FLOAT: out << v.f; break;
+            case args_parser::BOOL: out << v.b; break;
+            default: assert(NULL == "Impossible case in switch(type)");
+        }
+    }
+    return out;
+}
+
+void operator>> (const YAML::Node& node, args_parser::value &v) {
+    switch(v.type) {
+        case args_parser::STRING: node >> v.str; break;
+        case args_parser::INT: node >> v.i; break;
+        case args_parser::FLOAT: node >> v.f; break;
+        case args_parser::BOOL: node >> v.b; break;
+        default: assert(NULL == "Impossible case in switch(type)");
+    }
+    v.initialized = true;
+}
+
+YAML::Emitter &operator<< (YAML::Emitter& out, const args_parser::option &opt) {
+    opt.to_yaml(out);
+    return out;
+}
+
+void operator>> (const YAML::Node& node, args_parser::option &opt) {
+    opt.from_yaml(node);
+    opt.defaulted = false;
+}
+
+void args_parser::option_scalar::to_yaml(YAML::Emitter& out) const { out << val; }
+void args_parser::option_scalar::from_yaml(const YAML::Node& node) { node >> val; }
+
+void args_parser::option_vector::to_yaml(YAML::Emitter& out) const { out << val; }
+void args_parser::option_vector::from_yaml(const YAML::Node& node) 
+{
+    node >> val;
+    assert(val.size() >= (size_t)vec_min && val.size() <= (size_t)vec_max);
+}
+#endif
+
+bool args_parser::option_scalar::do_parse(const char *sval) {
+    if (val.initialized && parser.is_flag_set(NODUPLICATE))
+        return false;
+    return val.parse(sval, type); 
+}
+
+bool args_parser::option_vector::do_parse(const char *const_sval) {
+    bool res = true;
+    string sval(const_sval);
+    std::vector<int> positions;
+    for (const char *s = sval.c_str(); *s; s++) {
+        if (*s == vec_delimiter)
+            positions.push_back(s - sval.c_str());
+    }
+    positions.push_back(sval.size());
+    size_t nelems = sval.size() ? positions.size() : 0;
+    size_t max_elem = num_already_initialized_elems + nelems;
+    if (max_elem < (size_t)vec_min || max_elem > (size_t)vec_max) 
+        return false;
+    val.resize(std::max(max_elem, val.size()));
+    if (nelems == 0) 
+        return true;
+    for (size_t i = 0, j = 0; i < positions.size(); i++) {
+        sval[positions[i]] = 0;
+        int n = num_already_initialized_elems + i;
+        if (val[n].initialized && parser.is_flag_set(NODUPLICATE))
+            return false;
+        res = res && val[n].parse(sval.c_str() + j, type);
+        j = positions[i] + 1;
+    }
+    num_already_initialized_elems += positions.size();
+    return res;
+}
+
+void args_parser::option_vector::set_default_value() {
+    if (num_already_initialized_elems == 0) {
+        do_parse(vec_def.c_str());
+        defaulted = true;
+        num_already_initialized_elems = 0;
+    }
+}
+
+args_parser::option &args_parser::add_flag(const char *s) {
+    option &opt = add<bool>(s, false);
+    opt.flag = true;
+    return opt;
+}
+
+bool args_parser::match(string &arg, string pattern) const {
+    if (strncmp(arg.c_str(), option_starter, strlen(option_starter)))
+        return false;
+    if (strncmp(arg.c_str() + strlen(option_starter), pattern.c_str(), pattern.size()))
+        return false;
+    if (option_delimiter == ' ' && *(arg.c_str() + strlen(option_starter) + pattern.size()) != 0)
+        return false;
+    return true;
+}
+
+bool args_parser::match(string &arg, option &opt) const {
+    return match(arg, opt.str);
+}
+
+bool args_parser::get_value(const string &arg, option &opt) {
+    size_t offset = 0; 
+    assert(prev_option == NULL);
+    offset = strlen(option_starter);
+    if (option_delimiter == ' ') {
+        // save the option descriptor -- next arg will be the value
+        prev_option = &opt;
+        offset += opt.str.size();
+        if (*(arg.c_str() + offset) != 0)
+            return false;
+        return true;
+    } else {
+        offset += opt.str.size();
+        if (*(arg.c_str() + offset) != option_delimiter)
+            return false;
+        offset += 1;
+    }
+    bool res = opt.do_parse(arg.c_str() + offset);
+    return res;
+}
+
+void args_parser::print_err(error_t err, string option, string extra) {
+    if (!is_flag_set(SILENT))
+        switch (err) {
+            case NONE: break;
+            case NO_REQUIRED_OPTION: 
+                sout << "ERROR: The required option missing or can't be parsed: " 
+                     << option_starter << option << endl;
+                break;
+            case NO_REQUIRED_EXTRA_ARG: 
+                sout << "ERROR: The required extra argument missing" << endl;
+                break;
+            case PARSE_ERROR_OPTION: 
+                sout << "ERROR: Parse error on option: "
+                     << option_starter << option << endl;
+                break;
+            case PARSE_ERROR_EXTRA_ARGS: 
+                sout << "ERROR: Parse error on an extra argument" << endl;
+                break;
+            case UNKNOWN_EXTRA_ARGS:
+                sout << "ERROR: Some extra or unknown arguments or options" << endl;
+                break;
+            default: throw logic_error("args_parser: print_err: unknown error");
+        }
+    last_error = err;
+    last_error_option = option;
+    last_error_extra = extra;
+}
+
+#ifdef WIN_IMB
+std::string basename(const char *name) {
+	std::string file_name(name);
+	const size_t i = file_name.find_last_of("\\/");
+	if (std::string::npos != i)	{
+		file_name.erase(0, i + 1);
+	}
+	return file_name;
+}
+#endif
+
+void args_parser::print_help_advice() const {
+    sout << "Try \"" <<  basename(argv[0]) << " " << option_starter << "help\" for usage information" << endl;
+}
+
+// NOTE: This one is just to loop over expected_args 2-level array in a easier way.
+// First call woth FOREACH_FIRST initializes the walk throgh all expected args,
+// each next call with FOREACH_NEXT gives a pointer to the next arg from expected_args
+// together with the pointer to the group name it belongs
+// Two versions are here for ordinary and constant methods, mind the 'const' keyword.
+bool args_parser::in_expected_args(enum foreach_t t, const string *&group, smart_ptr<option> *&opt) {
+    static map<const string, vector<smart_ptr<option> > >::iterator it;
+    static size_t j = 0;
+    if (t == FOREACH_FIRST) {
+        it = expected_args.begin();
+        j = 0;
+        return true;
+    }
+    if (t == FOREACH_NEXT) {
+        while (it != expected_args.end()) {
+            vector<smart_ptr<option> > &expected_args = it->second;
+            if (j >= expected_args.size()) {
+               ++it;
+               j = 0;
+               continue;
+            } 
+            group = &(it->first);
+            opt = &expected_args[j];
+            j++;
+            return true;
+        }
+        return false;
+    }
+    return false;
+}
+
+bool args_parser::in_expected_args(enum foreach_t t, const string *&group, const smart_ptr<option> *&opt) const {
+    static map<const string, vector<smart_ptr<option> > >::const_iterator cit;
+    static size_t j = 0;
+    if (t == FOREACH_FIRST) {
+        cit = expected_args.begin();
+        j = 0;
+        return true;
+    }
+    if (t == FOREACH_NEXT) {
+        while (cit != expected_args.end()) {
+            const vector<smart_ptr<option> > &expected_args = cit->second;
+            if (j >= expected_args.size()) {
+               ++cit;
+               j = 0;
+               continue;
+            } 
+            group = &(cit->first);
+            opt = &expected_args[j];
+            j++;
+            return true;
+        }
+        return false;
+    }
+    return false;
+}
+
+void args_parser::print_single_option_usage(const smart_ptr<option> &opt, size_t header_size, 
+        bool is_first, bool no_option_name) const {
+    string tab(header_size, ' ');
+    const char *open_brace = "[";
+    const char *close_brace = "]";
+    const char *empty = "";
+    const char *open = opt->required ? empty : open_brace;
+    const char *close = opt->required ? empty : close_brace;
+    const string stype = value::get_type_str(opt->type);
+    const string cap = (opt->caption.size() == 0 ? stype : opt->caption);
+    const string allign = (is_first ? "" : tab);
+    if (no_option_name)
+        sout << allign << open << cap << close << " ";
+    else if (opt->flag)
+        sout << allign << open << option_starter << opt->str << close << endl;
+    else
+        sout << allign << open << option_starter << opt->str << option_delimiter << cap << close << endl;
+}
+
+void args_parser::print_help() const {
+    if (program_name.size() != 0)
+        sout << program_name << endl;
+    sout << "Usage: " << basename(argv[0]) << " ";
+    string header;
+    header +=  "Usage: ";
+    header += basename(argv[0]); 
+    header += " ";
+    size_t size = min(header.size(), (size_t)16);
+    string tab(size - 2, ' ');
+    bool is_first = true;
+    bool is_there_sys_group = false, is_there_empty_group = false;
+    // help
+    smart_ptr<option> help = new option_scalar(*this, "help", BOOL, value(false)); 
+    help->flag = true;
+    print_single_option_usage(help, size, is_first);
+    // help option
+    is_first = false;
+    help->flag = false;
+    help->set_caption("option");
+    print_single_option_usage(help, size, is_first);
+    // enumarate all groups which are here
+    vector<string> groups;
+    map<const string, vector<smart_ptr<option> > >::const_iterator cit;
+    for (cit = expected_args.begin(); cit != expected_args.end(); ++cit) {
+        groups.push_back(cit->first);
+        if (cit->first == "SYS")
+            is_there_sys_group = true;
+        if (cit->first == "")
+            is_there_empty_group = true;
+    }
+    // "SYS" option go first
+    if (is_there_sys_group) {
+        const vector<smart_ptr<option> > &args = expected_args.find("SYS")->second;
+        for (size_t i = 0; i < args.size(); i++)
+            print_single_option_usage(args[i], size, is_first);
+    }
+    // option from unnamed group go next
+    if (is_there_empty_group) {
+        const vector<smart_ptr<option> > &args = expected_args.find("")->second;
+        for (size_t i = 0; i < args.size(); i++)
+            print_single_option_usage(args[i], size, is_first);
+    }
+    // options from groups in the order they where added
+    for (size_t group = 0; group < groups.size(); group++) {
+        const vector<smart_ptr<option> > &args = expected_args.find(groups[group])->second;
+        if (groups[group] == "EXTRA_ARGS" || groups[group] == "SYS" || groups[group] == "")
+            continue;
+        sout << tab << groups[group] << ":" << endl;
+        for (size_t i = 0; i < args.size(); i++)
+            print_single_option_usage(args[i], size, is_first);
+    }
+    // extra args
+    int num_extra_args = 0, num_required_extra_args = 0;
+    const std::vector<smart_ptr<option> > &extra_args = get_extra_args_info(num_extra_args, num_required_extra_args);
+    for (int j = 0; j < num_extra_args; j++) 
+        print_single_option_usage(extra_args[j], size, is_first, true);
+    if (num_extra_args)
+        sout << endl;
+}
+
+void args_parser::print_help(string str) const {
+    if (program_name.size() != 0)
+        sout << program_name << endl;
+    bool was_printed = false;
+    const string *pgroup;
+    const smart_ptr<option> *popt;
+    in_expected_args(FOREACH_FIRST, pgroup, popt);
+    while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+        const smart_ptr<option> &opt = *popt;
+        if (opt->str == str) {
+            sout << "Option: ";
+            print_single_option_usage(opt, 0, true);
+            if (*pgroup != "SYS" && *pgroup != "")
+                sout << "Group: " << *pgroup << endl;
+            if (opt->description != "") {
+                sout << endl << opt->description << endl;
+            } 
+            was_printed = true;
+        }
+    }
+    if (!was_printed) {
+        sout << "No such option: " << str << endl;
+        print_help_advice();
+    }
+}
+
+void args_parser::print() const {
+    const string *pgroup;
+    const smart_ptr<option> *popt;
+    in_expected_args(FOREACH_FIRST, pgroup, popt);
+    while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+        (*popt)->print();
+    }
+}
+
+const vector<smart_ptr<args_parser::option> > &args_parser::get_extra_args_info(int &num_extra_args, int &num_required_extra_args) const {
+    const vector<smart_ptr<option> > &extra_args = expected_args.find("EXTRA_ARGS")->second;
+    bool required_args_ended = false;
+    for (size_t j = 0; j < extra_args.size(); j++) {
+        if (extra_args[j]->required) {
+            if (required_args_ended)
+                throw logic_error("args_parser: all required extra args must precede non-required args");
+            num_required_extra_args++;
+        } else {
+            required_args_ended = true;
+        }
+
+    } 
+    num_extra_args = extra_args.size();
+    return extra_args;
+}
+
+vector<smart_ptr<args_parser::option> > &args_parser::get_extra_args_info(int &num_extra_args, int &num_required_extra_args) {
+    vector<smart_ptr<option> > &extra_args = expected_args["EXTRA_ARGS"];
+    for (size_t j = 0; j < extra_args.size(); j++) {
+        if (extra_args[j]->required)
+            num_required_extra_args++;
+    } 
+    num_extra_args = extra_args.size();
+    return extra_args;
+}
+
+void args_parser::get_command_line(std::string &result) const {
+    for (int n = 0; n < argc; n++) {
+        result += argv[n];
+        if (n < argc-1)
+            result += " ";
+    }
+}
+
+bool args_parser::parse() {
+    bool parse_result = true;
+    bool help_printed = false;
+    unknown_args.resize(0);
+    // go through all given args
+    for (int i = 1; i < argc; i++) {
+        string arg(argv[i]);
+        // if there is a pointer to a optioniptor which corresponds to previous argv[i]
+        if (prev_option) {
+            // the option itself was given as a previous argv[i] 
+            // now only parse the option argument
+            option &opt = *prev_option;
+            if (!opt.required && opt.defaultize_before_parsing) 
+                opt.set_default_value();
+            opt.defaulted = false;
+            if (!opt.do_parse(arg.c_str())) {
+                print_err(PARSE_ERROR_OPTION, opt.str, arg);
+                parse_result = false;
+            }
+            prev_option = NULL;
+            continue;
+        }
+        // help is hardcoded as and optional 1st arg
+        if (i == 1 && match(arg, string("help")) && !is_flag_set(NOHELP)) {
+            if (argc == 3) {
+                print_help(string(argv[2]));
+            } else {
+                print_help();
+            }
+            parse_result = false;
+            help_printed = true;
+        }
+        // go throwgh all expected_args[] elements to find the option by pattern
+        bool found = false;
+        const string *pgroup;
+        smart_ptr <option> *popt;
+        in_expected_args(FOREACH_FIRST, pgroup, popt);
+        while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+            if (*pgroup == "EXTRA_ARGS")
+                continue;
+            if (match(arg, **popt)) {
+                if (!(*popt)->required && (*popt)->defaultize_before_parsing)
+                    (*popt)->set_default_value();
+                (*popt)->defaulted = false;
+                if ((*popt)->flag) {
+                    (*popt)->do_parse("on");
+                    found = true;
+                    break;
+                }
+                if (!get_value(arg, **popt)) {
+                    print_err(PARSE_ERROR_OPTION, (*popt)->str, arg);
+                    parse_result = false;
+                }
+                found = true;
+                break;
+            }
+        }
+        // all unmatched args are stored in a separate array to handle them later
+        if (!found)
+            unknown_args.push_back(arg);
+    }
+
+    // the case when cmdline args ended too early
+    if (prev_option != NULL) {
+        print_err(PARSE_ERROR_OPTION, prev_option->str);
+        parse_result = false;
+    }
+
+    // now parse the expected extra agrs
+    int num_extra_args = 0, num_required_extra_args = 0;
+    std::vector<smart_ptr<option> > &extra_args = get_extra_args_info(num_extra_args, num_required_extra_args);
+    if (unknown_args.size() < (size_t)num_required_extra_args) {
+        print_err(NO_REQUIRED_EXTRA_ARG, "");
+        parse_result = false;
+    } else {
+        int num_processed_extra_args = 0;
+        for (size_t j = 0; j < extra_args.size(); j++) {
+            if (j >= unknown_args.size())
+               break;
+            if (match(unknown_args[j], "")) 
+                continue;
+            if (!extra_args[j]->required && extra_args[j]->defaultize_before_parsing)
+                extra_args[j]->set_default_value();
+            extra_args[j]->defaulted = false;
+            if (!extra_args[j]->do_parse(unknown_args[j].c_str())) {
+                print_err(PARSE_ERROR_EXTRA_ARGS, "", unknown_args[j]);
+                parse_result = false;
+                break;
+            }
+            num_processed_extra_args++;
+        }
+        assert((size_t)num_processed_extra_args <= unknown_args.size());
+        unknown_args.erase(unknown_args.begin(), unknown_args.begin() + num_processed_extra_args);
+    }
+
+    // loop again through all in expected_args[] to find options which were not given in cmdline
+    const string *pgroup;
+    smart_ptr<option> *popt;
+    in_expected_args(FOREACH_FIRST, pgroup, popt);
+    while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+        if ((*popt)->is_default_setting_required()) {
+            (*popt)->set_default_value();
+            continue;
+        }
+        if ((*popt)->is_required_but_not_set()) {
+            print_err(NO_REQUIRED_OPTION, (*popt)->str);
+            parse_result = false;
+        }
+    }
+    // if there are too many unexpected args, raise an error
+    if (!is_flag_set(ALLOW_UNEXPECTED_ARGS)) {
+        if (parse_result && unknown_args.size()) {
+            print_err(UNKNOWN_EXTRA_ARGS, "");
+            parse_result = false;
+        }
+    }
+    if (!parse_result && !is_flag_set(SILENT) && !help_printed)
+        print_help_advice();
+    return parse_result;
+}
+
+args_parser::option &args_parser::set_caption(int n, const char *cap) {
+    int num_extra_args = 0, num_required_extra_args = 0;
+    vector<smart_ptr<option> > &extra_args = get_extra_args_info(num_extra_args, num_required_extra_args);
+    if (n >= num_extra_args)
+        throw logic_error("args_parser: no such extra argument");
+    extra_args[n]->caption.assign(cap);
+    return *extra_args[n];
+}
+
+vector<args_parser::value> args_parser::get_result_value(const string &s) const {
+    const string *pgroup;
+    const smart_ptr<option> *popt;
+    in_expected_args(FOREACH_FIRST, pgroup, popt);
+    while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+        if ((*popt)->str == s) {
+            return (*popt)->get_value_as_vector();
+        }
+    }
+    throw logic_error("args_parser: no such option");
+}
+
+void args_parser::get_unknown_args(vector<string> &r) const {
+    for (size_t j = 0; j < unknown_args.size(); j++) {
+        r.push_back(unknown_args[j]);
+    }
+}
+
+#ifdef WITH_YAML_CPP
+bool args_parser::load(istream &stream) {
+    try {
+        YAML::Parser parser(stream);
+        YAML::Node node;
+        parser.GetNextDocument(node);
+        // loop through all in expected_args[] to find each option in file 
+        const string *pgroup;
+        smart_ptr<option> *popt;
+        in_expected_args(FOREACH_FIRST, pgroup, popt);
+        while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+            if (*pgroup == "SYS" || *pgroup == "EXTRA_ARGS")
+                continue;
+            if(const YAML::Node *pName = node.FindValue((*popt)->str.c_str())) {
+                *pName >> **popt;
+            }
+        }
+        int num_extra_args = 0, num_required_extra_args = 0;
+        std::vector<smart_ptr<option> > &extra_args = get_extra_args_info(num_extra_args, num_required_extra_args);
+        if(const YAML::Node *pName = node.FindValue("extra_args")) {
+            int j = 0;
+            for(YAML::Iterator it = pName->begin(); it != pName->end(); ++it) {
+                if (j == num_extra_args) 
+                    break;
+                popt = &extra_args[j];
+                    *it >> **popt;
+            }
+        }
+    }
+    catch (const YAML::Exception& e) {
+        sout << "ERROR: input YAML file parsing error: " << e.what() << endl;
+        return false;
+    }
+    // now do regular parse procedure to complete: 1) cmdline options ovelapping: 
+    // what is given in cmdline has a priority; 2) filling in non-required options 
+    // with defaults.
+    // NOTE: if cmdline parsing is unwanted, you can defeat it with a previous
+    // call to clean_args()
+    return parse();
+}
+
+bool args_parser::load(const string &input) {
+    stringstream stream(input.c_str());
+    return load(stream);
+}
+
+string args_parser::dump() const {
+    YAML::Emitter out;
+    out << YAML::BeginDoc;
+    if (program_name.size() != 0)
+        out << YAML::Comment(program_name.c_str());
+    out << YAML::BeginMap;
+    out << YAML::Flow;
+    out << YAML::Key << "version";
+    out << YAML::Value << version;
+    const string *pgroup;
+    const smart_ptr<option> *popt;
+    in_expected_args(FOREACH_FIRST, pgroup, popt);
+    while(in_expected_args(FOREACH_NEXT, pgroup, popt)) {
+        if (*pgroup == "SYS" || *pgroup == "EXTRA_ARGS")
+            continue;
+        if ((*popt)->defaulted) {
+            YAML::Emitter comment;
+            comment << YAML::BeginMap;
+            comment << YAML::Flow << YAML::Key << (*popt)->str.c_str();
+            comment << YAML::Flow << YAML::Value << **popt;
+            comment << YAML::EndMap;
+            out << YAML::Flow << YAML::Newline << YAML::Comment(comment.c_str()) << YAML::Comment("(default)");
+        } else {
+            out << YAML::Key << (*popt)->str.c_str();
+            out << YAML::Value << **popt;
+        }
+    }
+    int num_extra_args = 0, num_required_extra_args = 0;
+    const std::vector<smart_ptr<option> > &extra_args = get_extra_args_info(num_extra_args, num_required_extra_args);
+    if (num_extra_args > 0) {
+        out << YAML::Key << "extra_args";
+        out << YAML::Value << YAML::BeginSeq << YAML::Newline;
+        for (int i = 0; i < num_extra_args; i++) {
+            popt = &extra_args[i];
+            if ((*popt)->defaulted) {
+                YAML::Emitter comment;
+                comment << YAML::Flow << **popt;
+                out << YAML::Flow << YAML::Newline << YAML::Comment(comment.c_str()) << YAML::Comment("(default)");
+            } else {
+                out << **popt;
+            }
+        }
+        out << YAML::Newline << YAML::EndSeq;
+    }
+    out << YAML::EndMap;
+    out << YAML::Newline;
+    return string(out.c_str());
+}
+#endif
+
+bool args_parser::is_option(const string &str) const {
+    if (strncmp(str.c_str(), option_starter, strlen(option_starter)) == 0) return true;
+    return false;
+}
+
+
+ostream &operator<<(ostream &s, const args_parser::option &opt) {
+    opt.to_ostream(s);
+    return s;
+}
+
+ostream &operator<<(ostream &s, const args_parser::value &val) {
+    switch(val.type) {
+        case args_parser::STRING: s << val.str; break;
+        case args_parser::INT: s << val.i; break;
+        case args_parser::FLOAT: s << val.f; break;
+        case args_parser::BOOL: s << val.b; break;
+        default: assert(NULL == "Impossible case in switch(type)");
+    }
+    return s;
+}
+
+template <> args_parser::arg_t get_arg_t<int>() { return args_parser::INT; }
+template <> args_parser::arg_t get_arg_t<float>() { return args_parser::FLOAT; }
+template <> args_parser::arg_t get_arg_t<std::string>() { return args_parser::STRING; }
+template <> args_parser::arg_t get_arg_t<bool>() { return args_parser::BOOL; }
+
+template <> int get_val<int>(const args_parser::value &v) { return v.i; }
+template <> float get_val<float>(const args_parser::value &v) { return v.f; }
+template <> bool get_val<bool>(const args_parser::value &v) { return v.b; }
+template <> std::string get_val<std::string>(const args_parser::value &v) { return v.str; }
+
diff --git a/src_cpp/args_parser.h b/src_cpp/args_parser.h
new file mode 100644
index 00000000..f6006692
--- /dev/null
+++ b/src_cpp/args_parser.h
@@ -0,0 +1,372 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+*/
+
+#pragma once
+
+#include <vector>
+#include <iostream>
+#include <sstream>
+#include <assert.h>
+#include <string>
+#include <map>
+#include <set>
+#include <stdexcept>
+#include "smart_ptr.h"
+#ifdef WITH_YAML_CPP
+#include "yaml-cpp/yaml.h"
+#endif
+
+// TODO:
+// proposed moding to add:
+// SILENT -- no output (DONE)
+// NOHELP -- don't detect help option (DONE)
+// ALLOW_UNEXPECTED_ARGS -- don't regards extra args as an error (DONE)
+// NODUPLICATE -- don't allow duplicate options (DONE)
+// NODEFAULTSDUMP -- don't put defaults into dump
+
+
+class args_parser {
+    protected:
+    int argc;
+    const char * const *argv;
+    const char *option_starter;
+    const char option_delimiter;
+    std::ostream &sout;
+    const static int version;
+    std::string program_name;
+
+    public:
+    args_parser(int &_argc, char * *&_argv, const char *opt_st = "--", 
+                char opt_delim = '=', std::ostream &_sout = std::cout) : argc(_argc), argv(_argv), 
+                                                                         option_starter(opt_st), 
+                                                                         option_delimiter(opt_delim), 
+                                                                         sout(_sout),
+                                                                         prev_option(NULL),
+                                                                         last_error(NONE)  
+    {}
+    typedef enum { STRING, INT, FLOAT, BOOL } arg_t;
+    typedef enum { ALLOW_UNEXPECTED_ARGS, SILENT, NOHELP, NODUPLICATE /*, NODEFAULTSDUMP*/ } flag_t;
+    typedef enum { NONE, NO_REQUIRED_OPTION, NO_REQUIRED_EXTRA_ARG, PARSE_ERROR_OPTION, PARSE_ERROR_EXTRA_ARGS, UNKNOWN_EXTRA_ARGS } error_t;
+
+    class value {
+        public:
+            value() : initialized(false) {}
+            value(float v) : initialized(true), i(0), str("(none)"), b(false) { type = FLOAT; f = v; }
+            value(int v) : initialized(true), f(0), str("(none)"), b(false) { type = INT; i = v; }
+            value(bool v) : initialized(true), i(0), f(0), str("(none)") { type = BOOL; b = v; }
+            value(std::string v) : initialized(true), i(0), f(0), b(false) { type = STRING; str = v; }
+            value(const char *v) : initialized(true), i(0), f(0), b(false) { type = STRING; str.assign(v); }
+        public:
+            bool initialized;
+            int i;
+            float f;
+            std::string str;
+            bool b;
+            arg_t type;
+        public:
+            bool is_initialized() const { return initialized; };
+            value &operator=(const value &other);
+            bool parse(const char *sval, arg_t _type);
+            friend std::ostream &operator<<(std::ostream &s, const args_parser::value &val);
+            void sanity_check(arg_t _type) const;
+            static const std::string get_type_str(arg_t _type); 
+    };
+    struct option {
+        const args_parser &parser;
+        enum mode { APPLY_DEFAULTS_ONLY_WHEN_MISSING };
+        std::string str;
+        arg_t type;
+        bool required;
+        bool defaultize_before_parsing;
+        bool defaulted;
+        bool flag;
+        std::string caption;
+        std::string description;
+        option(const args_parser &_parser, const std::string _str, arg_t _type, bool _required) : parser(_parser), str(_str), 
+                                                               type(_type), required(_required), 
+                                                               defaultize_before_parsing(true), 
+                                                               defaulted(false), flag(false) {};
+        virtual void print() const = 0;
+        virtual bool do_parse(const char *sval) = 0;
+        virtual bool is_scalar() const = 0;
+        virtual void set_default_value() = 0;
+        virtual option &set_caption(const char *cap) { caption.assign(cap); return *this; }
+        virtual option &set_description(const char *descr) { description.assign(descr); return *this; }
+        virtual option &set_mode(mode m) { 
+            if (m == APPLY_DEFAULTS_ONLY_WHEN_MISSING) 
+                defaultize_before_parsing = false; 
+            return *this; 
+        }
+        virtual bool is_default_setting_required() = 0;
+        virtual bool is_required_but_not_set() = 0;
+        virtual std::vector<args_parser::value> get_value_as_vector() const = 0;
+        virtual void to_ostream(std::ostream &s) const = 0;
+        friend std::ostream &operator<<(std::ostream &s, const args_parser::option &d);
+#ifdef WITH_YAML_CPP        
+        virtual void to_yaml(YAML::Emitter& out) const = 0;
+        virtual void from_yaml(const YAML::Node& node) = 0;
+#endif        
+        virtual ~option() {}
+        private:
+        option(const option &other) : parser(other.parser) {}
+        option &operator=(const option &) { return *this; }
+    };
+    struct option_scalar : public option {
+        args_parser::value def;
+        args_parser::value val;
+        option_scalar(const args_parser &_parser, const std::string _str, arg_t _type) : option(_parser,_str, _type, true) { }
+        option_scalar(const args_parser &_parser, const std::string _str, arg_t _type, value _def) : option(_parser, _str, _type, false), def(_def)
+        { def.sanity_check(type); }
+        virtual ~option_scalar() {}
+        virtual void print() const { parser.sout << str << ": " << val << std::endl; }
+        virtual bool do_parse(const char *sval);
+        virtual bool is_scalar() const { return true; }
+        virtual void to_ostream(std::ostream &s) const { s << val; }
+#ifdef WITH_YAML_CPP        
+        virtual void to_yaml(YAML::Emitter& out) const;
+        virtual void from_yaml(const YAML::Node& node);
+#endif        
+        virtual void set_default_value() { val = def; defaulted = true; }
+        virtual bool is_default_setting_required() { return !val.is_initialized() && !required; }
+        virtual bool is_required_but_not_set() { return required && !val.is_initialized(); }
+        virtual std::vector<args_parser::value> get_value_as_vector() const { std::vector<args_parser::value> r; r.push_back(val); return r; }
+    };
+    struct option_vector : public option {
+        enum { MAX_VEC_SIZE = 1024 };
+        char vec_delimiter;
+        int vec_min;
+        int vec_max;
+        int num_already_initialized_elems;
+        std::vector<args_parser::value> val;
+        std::string vec_def;
+        option_vector(const args_parser &_parser, const std::string _str, arg_t _type, 
+                     char _vec_delimiter, int _vec_min, int _vec_max)  :
+            option(_parser, _str, _type, true), vec_delimiter(_vec_delimiter), vec_min(_vec_min), vec_max(_vec_max)
+        { num_already_initialized_elems = 0; }
+        option_vector(const args_parser &_parser, std::string _str, arg_t _type, 
+                     char _vec_delimiter, int _vec_min, int _vec_max, 
+                     const std::string &_vec_def)  :
+            option(_parser, _str, _type, false), vec_delimiter(_vec_delimiter), vec_min(_vec_min), vec_max(_vec_max), 
+            vec_def(_vec_def)
+        { num_already_initialized_elems = 0; }
+        virtual ~option_vector() {}
+        virtual void print() const {
+            parser.sout << str << ": ";
+            to_ostream(parser.sout);
+            parser.sout << std::endl;
+        }
+        virtual bool do_parse(const char *sval);
+        virtual bool is_scalar() const { return false; }
+        virtual void to_ostream(std::ostream &s) const { for (size_t i = 0; i < val.size(); i++) { s << val[i]; if (i != val.size()) s << ", "; } }
+#ifdef WITH_YAML_CPP        
+        virtual void to_yaml(YAML::Emitter& out) const;
+        virtual void from_yaml(const YAML::Node& node);
+#endif        
+        virtual void set_default_value();
+        virtual bool is_default_setting_required() { return val.size() == 0 && !required; }
+        virtual bool is_required_but_not_set() { return required && vec_min != 0 && !val.size() ==0; }
+        virtual std::vector<args_parser::value> get_value_as_vector() const { return val; }
+    };
+
+    protected:
+    std::set<flag_t> flags;
+    std::string current_group;
+    std::map<std::string, std::vector<smart_ptr<option> > > expected_args;
+    std::vector<std::string> unknown_args;
+    option *prev_option;
+    error_t last_error;
+    std::string last_error_option;
+    std::string last_error_extra;
+   
+    bool match(std::string &arg, std::string pattern) const;
+    bool match(std::string &arg, option &exp) const;
+    bool get_value(const std::string &arg, option &exp);
+    void get_default_value(option &d);
+    
+    const std::vector<smart_ptr<args_parser::option> > &get_extra_args_info(int &num_extra_args, int &num_required_extra_args) const;
+    std::vector<smart_ptr<args_parser::option> > &get_extra_args_info(int &num_extra_args, int &num_required_extra_args);
+
+    void print_err(error_t err, std::string arg, std::string extra = "");
+    void print_single_option_usage(const smart_ptr<option> &d, size_t header_size, bool is_first, bool no_option_name = false) const;
+
+    std::vector<value> get_result_value(const std::string &s) const;
+
+    public:
+    args_parser &set_program_name(const std::string name) { program_name = name; return *this; }
+    args_parser &set_flag(flag_t flag) { flags.insert(flag); return *this; }
+    bool is_flag_set(flag_t flag) const { return flags.count(flag) > 0; } 
+    void print_help_advice() const;
+    void print_help() const;
+    void print_help(std::string str) const;
+    void print() const;
+    void get_command_line(std::string &) const;
+    bool parse();
+    template <typename T>
+    option &add(const char *s);
+    template <typename T>
+    option &add(const char *s, T v);
+    option &add_flag(const char *s);
+    template <typename T>
+    option &add_vector(const char *s, char delim = ',', int min = 0, int max = option_vector::MAX_VEC_SIZE);
+    template <typename T>
+    option &add_vector(const char *s, const char *defaults, char delim = ',', int min = 0, int max = option_vector::MAX_VEC_SIZE);
+
+    args_parser &set_current_group(const std::string &g) { current_group = g; return *this; }
+    args_parser &set_default_current_group() { current_group = ""; return *this; }
+
+    option &set_caption(int n, const char *cap);
+
+    template <typename T>
+    T get(const std::string &s) const;
+    template <typename T>
+    void get(const std::string &s, std::vector<T> &r) const;
+    void get_unknown_args(std::vector<std::string> &r) const;
+
+    template <typename T>
+    bool parse_special(const std::string &s, T &r);
+    template <typename T>
+    bool parse_special_vec(const std::string &s, std::vector<T> &r, char delim = ',', int min = 0, int max = option_vector::MAX_VEC_SIZE);
+
+    void clean_args() { argc = 0; }
+#ifdef WITH_YAML_CPP    
+    std::string dump() const;
+    bool load(const std::string &input);
+    bool load(std::istream &in);
+#endif    
+    bool is_option(const std::string &str) const;
+
+    error_t get_last_error(std::string &option, std::string &extra) {
+        option = last_error_option;
+        extra = last_error_extra;
+        return last_error;
+    }
+
+    protected:
+    // NOTE: see source for usage comments
+    enum foreach_t { FOREACH_FIRST, FOREACH_NEXT };
+    bool in_expected_args(enum foreach_t t, const std::string *&group, smart_ptr<option> *&arg);    
+    bool in_expected_args(enum foreach_t t, const std::string *&group, const smart_ptr<option> *&arg) const;    
+};
+
+template <typename T> args_parser::arg_t get_arg_t();
+
+template <typename T> T get_val(const args_parser::value &v);
+
+template <typename T>
+void vresult_to_vector(const std::vector<args_parser::value> &in, std::vector<T> &out) {
+    for (size_t i = 0; i < in.size(); i++)
+        out.push_back(get_val<T>(in[i]));
+}
+
+template <typename T>
+args_parser::option &args_parser::add(const char *s) {
+    smart_ptr<option> popt = new args_parser::option_scalar(*this, s, get_arg_t<T>());
+    expected_args[current_group].push_back(popt);
+    return *popt.get();
+}
+
+template <typename T>
+args_parser::option &args_parser::add(const char *s, T v) {
+    smart_ptr<option> popt = new args_parser::option_scalar(*this, s, get_arg_t<T>(), value(v));
+    expected_args[current_group].push_back(popt);
+    return *popt.get();
+}
+
+template <typename T>
+args_parser::option &args_parser::add_vector(const char *s, char delim, int min, int max) {
+    if (max > option_vector::MAX_VEC_SIZE)
+        throw std::logic_error("args_parser: maximum allowed vector size for vector argument exceeded");
+    smart_ptr<option> popt = new args_parser::option_vector(*this, s, get_arg_t<T>(), delim, min, max);
+    expected_args[current_group].push_back(popt);
+    return *popt.get();
+}
+
+template <typename T>
+args_parser::option &args_parser::add_vector(const char *s, const char *defaults, char delim, int min, int max) {
+    if (max > option_vector::MAX_VEC_SIZE)
+        throw std::logic_error("args_parser: maximum allowed vector size for vector argument exceeded");
+    smart_ptr<option> popt = new args_parser::option_vector(*this, s, get_arg_t<T>(), delim, min, max, defaults); 
+    expected_args[current_group].push_back(popt);
+    return *popt.get();
+}
+
+template <typename T>
+void args_parser::get(const std::string &s, std::vector<T> &r) const {
+    std::vector<value> v = get_result_value(s);
+    vresult_to_vector<T>(v, r);
+}
+
+template <typename T>
+T args_parser::get(const std::string &s) const {
+    std::vector<T> r;
+    get<T>(s, r);
+    if (r.size() != 1)
+        throw std::logic_error("args_parser: get_result can't get a result: zero-sized vector returned");
+    return r[0];
+}
+
+template <typename T>
+bool args_parser::parse_special(const std::string &s, T &r) {
+    option_scalar d("[FREE ARG]", get_arg_t<T>());
+    bool res = d.do_parse(s.c_str());
+    if (res) {
+        r = get_val<T>(d.get_value_as_vector()[0]);
+    }
+    return res;
+}
+
+template <typename T>
+bool args_parser::parse_special_vec(const std::string &s, std::vector<T> &r, char delim, int min, int max) {
+    option_vector d("[FREE ARG]", get_arg_t<T>(), delim, min, max);
+    bool res = d.do_parse(s.c_str());
+    if (res) {
+        r = vresult_to_vector(d.get_value_as_vector(), r);
+    }
+    return res;
+}
+
diff --git a/src_cpp/args_parser_utests.cpp b/src_cpp/args_parser_utests.cpp
new file mode 100644
index 00000000..0d5cf219
--- /dev/null
+++ b/src_cpp/args_parser_utests.cpp
@@ -0,0 +1,309 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+*/
+
+#include "args_parser.h"
+#ifdef WITH_YAML_CPP
+#include "yaml-cpp/yaml.h"
+#endif
+
+#include <stdexcept>
+#include <assert.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <sstream>
+#include <algorithm>
+
+using namespace std;
+
+//-- UNIT TESTS ----------------------------------------------------------------------------------
+#if 1
+
+void print_args(int nargs, const char * const *argv) {
+    cout << ":: ";
+    for (int i = 0; i < nargs; i++) {
+        cout << argv[i];
+        if (i != nargs - 1) cout << " ";
+    }
+    cout << endl;
+}
+
+struct CheckParser {
+    bool result;
+    bool except;
+    smart_ptr<args_parser> pparser;
+    ostringstream output;
+    CheckParser() : result(false), except(false) {}
+    args_parser &init(int argc, char ** argv, int mode = 1) {
+        print_args(argc, argv);
+        switch (mode) {
+            case 1: pparser = new args_parser(argc, argv, "-", ' ', output); break;
+            case 2: pparser = new args_parser(argc, argv, "--", '=', output); break;
+            case 3: pparser = new args_parser(argc, argv, "/", ':', output); break;
+            default: assert(false);
+        }
+        return *pparser;
+    }
+    args_parser &run() {
+        try {
+            result = pparser->parse();
+        }
+        catch(exception &ex) {
+            cout << "EXCEPTION: " << ex.what() << endl;
+            except = true;
+        }
+        return *pparser;
+    }
+    size_t lines_in_output() {
+        string s = output.str();
+        return count(s.begin(), s.end(), '\n');
+    }
+    bool output_contains(string what) {
+        return output.str().find(what) != string::npos;
+    }
+};
+
+template <typename T> 
+void val2str(T val, string &str)
+{
+    ostringstream os;
+    os << val;
+    str = os.str();
+}
+    
+template <> 
+void val2str<bool>(bool val, string &str)
+{
+    str = (val ? "true" : "false");
+}
+
+template <typename T> 
+void vals2str(vector<T> vals, string delim, string &str)
+{
+    for (size_t i = 0; i < vals.size(); i++) {
+        string tmp;
+        val2str<T>(vals[i], tmp);
+        str += tmp;
+        if (i != vals.size() - 1) {
+            str += delim;
+        }
+    }
+}
+
+template <typename T>
+int make_args(int start, const char *(&argv)[1024], string opt, const string &sval, int mode)
+{
+    switch (mode) {
+        case 1: { 
+                    string a1 = "-" + opt; 
+                    argv[start] = strdup(a1.c_str()); 
+                    argv[start+1] = strdup(sval.c_str()); 
+                    return 2; 
+                    break; 
+                }
+        case 2: { string a1 = "--" + opt + "=" + sval; argv[start] = strdup(a1.c_str()); return 1; break; }
+        case 3: { string a1 = "/" + opt + ":" + sval; argv[start] = strdup(a1.c_str()); return 1; break; }
+        default: assert(false);
+    }
+    return 0;
+}
+
+template <typename T>
+int make_args_scalar(const char *(&argv)[1024], string opt, int mode, T val)
+{
+    string sval;
+    val2str<T>(val, sval);
+    argv[0] = strdup("check");
+    return make_args<T>(1, argv, opt, sval, mode) + 1;
+}
+ 
+template <typename T>
+int make_args_vector(const char *(&argv)[1024], string opt, int mode, T val1, T val2)
+{
+    string sval;
+    vector<T> vals;
+    vals.push_back(val1);
+    vals.push_back(val2);
+    vals2str<T>(vals, ",", sval);
+    argv[0] = strdup("check");
+    return make_args<T>(1, argv, opt, sval, mode) + 1;
+}
+
+template <typename T>
+void basic_scalar_check(T val) {
+    const char * argv[1024]; 
+    for (int mode = 1; mode <= 3; mode++) {
+        int nargs = make_args_scalar<T>(argv, "aaa", mode, val);
+        CheckParser p;
+        p.init(nargs, (char * *)argv, mode).add<T>("aaa").set_caption("bbb");
+        T result = p.run().get<T>("aaa");
+        assert(result == val && p.result && !p.except);
+    }
+}
+ 
+
+template <typename T>
+void err_scalar_check(T val) {
+    const char * argv[1024]; 
+    for (int mode = 1; mode <= 3; mode++) {
+        string opt, ext;
+        int nargs = make_args_scalar<T>(argv, "aaaa", mode, val);
+        CheckParser p;
+        p.init(nargs, (char * *)argv, mode).add<T>("aaa").set_caption("bbb");
+        args_parser::error_t err = p.run().get_last_error(opt, ext);
+        assert(!p.result && !p.except && err == args_parser::NO_REQUIRED_OPTION &&
+                opt == "aaa"); // && p.lines_in_output() == 2 && p.output_contains("ERROR"));
+//        printf(">> %s, %d, %s\n", p.output.str().c_str(), p.lines_in_output(), p.output_contains("ERROR")?"true":"false");
+    }
+}
+
+template <typename T>
+void basic_vector_check(T val1, T val2) {
+    const char *argv[1024]; 
+    for (int mode = 1; mode <= 3; mode++) {
+        int nargs = make_args_vector<T>(argv, "aaa", mode, val1, val2);
+        CheckParser p;
+        p.init(nargs,  (char * *)argv, mode).add_vector<T>("aaa", ',').set_caption("bbb");
+        vector<T> result;
+        p.run().get<T>("aaa", result);
+        assert(result.size() == 2 && result[0] == val1 && result[1] == val2 && p.result && !p.except);
+    }
+}
+
+template <typename T>
+void default_scalar_check(T def) {
+    const char *argv[1024]; 
+    for (int mode = 1; mode <= 3; mode++) {
+        argv[0] = "check";
+        CheckParser p;
+        p.init(1,  (char * *)argv, mode).add<T>("aaa", def).set_caption("bbb");
+        T res = p.run().get<T>("aaa");
+        assert(res == def && p.result && !p.except);
+    }
+}
+
+template <typename T>
+void default_vector_check(const char *def, size_t n) {
+    const char *argv[1024]; 
+    for (int mode = 1; mode <= 3; mode++) {
+        argv[0] = "check";
+        CheckParser p;
+        p.init(1,  (char * *)argv, mode).add_vector<T>("aaa", def).set_caption("bbb");
+        vector<T> result;
+        p.run().get<T>("aaa", result);
+        assert(result.size() == n && p.result && !p.except);
+    }
+}
+
+template <typename T>
+void default_vector_check_ext(const char *def, const char *sval, size_t n, T val1, T val2) {
+    const char *argv[1024]; 
+    for (int mode = 1; mode <= 3; mode++) {
+        argv[0] = "check";
+        int nargs = make_args<string>(1, argv, "aaa", sval, mode) + 1;
+        CheckParser p;
+        p.init(nargs,  (char * *)argv, mode).add_vector<T>("aaa", def).set_caption("bbb");
+        vector<T> result;
+        p.run().get<T>("aaa", result);
+        assert(result.size() == n && p.result && !p.except);
+        assert(result[0] == val1 && result[1] == val2);
+    }
+}
+
+void check_parser()
+{
+    basic_scalar_check<int>(5);
+    basic_scalar_check<int>(-5);
+    basic_scalar_check<float>(5.5);
+    basic_scalar_check<float>(-5.5);
+    basic_scalar_check<bool>(true);
+    basic_scalar_check<bool>(true);
+    basic_scalar_check<string>("ccc");
+    
+    basic_vector_check<int>(5, 5);
+    basic_vector_check<int>(5, -5);
+    basic_vector_check<int>(-5, 5);
+    basic_vector_check<float>(5.5, 5.5);
+    basic_vector_check<float>(-5.5, 5.5);
+    basic_vector_check<bool>(true, false);
+    basic_vector_check<bool>(true, false);
+    basic_vector_check<string>("ccc", "ddd");
+
+    default_scalar_check<int>(5);
+    default_scalar_check<float>(5.5);
+    default_scalar_check<bool>(true);
+    default_scalar_check<string>("ccc");
+
+    default_vector_check<int>("", 0);
+    default_vector_check<int>("5", 1);
+    default_vector_check<int>("5,-5", 2);
+    default_vector_check<float>("", 0);
+    default_vector_check<float>("5.5", 1);
+    default_vector_check<float>("5.5,.3", 2);
+    default_vector_check<bool>("", 0);
+    default_vector_check<bool>("true", 1);
+    default_vector_check<bool>("true,false", 2);
+    default_vector_check<string>("", 0);
+    default_vector_check<string>("ccc", 1);
+    default_vector_check<string>("ccc,ddd", 2);
+
+    default_vector_check_ext<int>("5,-5", "1", 2, 1, -5);
+    default_vector_check_ext<int>("5,-5", "1,1", 2, 1, 1);
+    default_vector_check_ext<float>("5.,-5.0e0", ".1", 2, 0.1, -5.0);
+    default_vector_check_ext<float>("5.", ".1,1e-6", 2, 0.1, 1e-6);
+    default_vector_check_ext<bool>("true,false", "false", 2, false, false);
+    default_vector_check_ext<bool>("true", "false,true", 2, false, true);
+    default_vector_check_ext<string>("aaa,bbbb", "ccc", 2, "ccc", "bbbb");
+    default_vector_check_ext<string>("aaa", "ccc,ddd", 2, "ccc", "ddd");
+
+    err_scalar_check<string>("ccc");
+
+}
+#endif
+
diff --git a/src_cpp/benchmark.h b/src_cpp/benchmark.h
new file mode 100644
index 00000000..ebdf963c
--- /dev/null
+++ b/src_cpp/benchmark.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+*/
+
+#pragma once
+#include "smart_ptr.h"
+#include "scope.h"
+#include <iostream>
+
+#define UNUSED(expr) do { (void)(expr); } while (0)
+
+class BenchmarkSuiteBase;
+class Benchmark {
+    public:
+        Benchmark() : initialized(false) {}
+        virtual const std::string get_name() const { return std::string(""); }
+        virtual Benchmark* create_myself() const { return NULL; }
+        virtual void allocate_internals() {}
+        virtual bool init_description() { return true; }
+        virtual void init() { } 
+        virtual void run(const scope_item &) = 0;
+        virtual void finalize() { }
+        virtual bool is_default() { return true; }
+        virtual std::vector<std::string> get_comments() { return std::vector<std::string>(0); }
+        smart_ptr<Scope> get_scope() { if (scope.get() == NULL) { scope.assign(new Scope); scope->commit(); } return scope; }
+        virtual ~Benchmark() { }
+        bool initialized;
+        BenchmarkSuiteBase *suite;
+    protected:
+        smart_ptr<Scope> scope;
+    private:
+        Benchmark &operator=(const Benchmark &) { return *this; }
+        Benchmark(const Benchmark &) {}
+};
+
+#define DEFINE_INHERITED(CLASS, SUITE_CLASS) static const char *name; \
+    virtual const std::string get_name() const { return name; } \
+    virtual Benchmark *create_myself() const { return new CLASS; } \
+    CLASS() { Benchmark::suite = SUITE_CLASS::register_elem(this); this->allocate_internals(); }
+
+#define DECLARE_INHERITED(CLASS, NAME) namespace { CLASS elem_ ## NAME; } const char *CLASS::name = #NAME;
+
+#ifdef __GNUC__ 
+#define DECLARE_INHERITED_TEMPLATE(CLASS, NAME) namespace { CLASS elem_ ## NAME; } template<> const char *CLASS::name = #NAME;
+#else
+#define DECLARE_INHERITED_TEMPLATE(CLASS, NAME) namespace { CLASS elem_ ## NAME; } const char *CLASS::name = #NAME;
+#endif
diff --git a/src_cpp/benchmark_suite.h b/src_cpp/benchmark_suite.h
new file mode 100644
index 00000000..e3809f9a
--- /dev/null
+++ b/src_cpp/benchmark_suite.h
@@ -0,0 +1,157 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+#include "smart_ptr.h"
+#include "any.h"
+
+template <benchmark_suite_t bs>
+class BenchmarkSuite : public BenchmarkSuiteBase {
+    public:
+        typedef std::map<std::string, const Benchmark*, set_operations::case_insens_cmp> pnames_t;
+    protected:
+        static pnames_t *pnames;
+        static BenchmarkSuite<bs> *instance;
+        std::vector<std::string> names_list;
+    public:   
+        static BenchmarkSuite<bs> &get_instance() { 
+            if (instance == NULL) {
+                instance = new BenchmarkSuite<bs>(); 
+                BenchmarkSuitesCollection::register_elem(instance);
+            }
+            return *instance; 
+        }
+        virtual void init() {
+            std::set<std::string> benchs;
+            get_full_list(benchs);
+            for (std::set<std::string>::iterator it = benchs.begin(); it != benchs.end(); ++it) {
+                smart_ptr<Benchmark> b = get_instance().create(*it);
+                if (!b->init_description())
+                    throw std::logic_error("BenchmarkSuite: wrong description of one of benchmarks in suite");
+            }
+        }
+
+        virtual bool declare_args(args_parser &,
+                                  std::ostream &output = std::cout) const {
+            UNUSED(output);
+            return true;
+        } 
+        virtual bool prepare(const args_parser &, const std::vector<std::string> &,
+                             const std::vector<std::string> &, std::ostream &output = std::cout) {
+            UNUSED(output);
+            return true;
+        }
+        virtual void finalize(const std::vector<std::string> &,
+                              std::ostream &output = std::cout) { UNUSED(output); }
+        static BenchmarkSuite<bs> *register_elem(const Benchmark *elem) { get_instance().do_register_elem(elem); return instance; }
+        static void get_full_list(std::set<std::string> &all_benchmarks) {
+            get_instance().do_get_full_list(all_benchmarks);
+        }
+        static void get_full_list(std::vector<std::string> &all_benchmarks) {
+            get_instance().do_get_full_list(all_benchmarks);
+        }
+        virtual smart_ptr<Benchmark> create(const std::string &s) { return get_instance().do_create(s); }
+        virtual any get_parameter(const std::string &key) { UNUSED(key); return any(); }
+        
+    protected:
+        void do_register_elem(const Benchmark *elem) {
+            assert(elem != NULL);
+            std::string name = elem->get_name();
+            assert(name != "(none)");
+            if (pnames == NULL) {
+                pnames = new pnames_t();
+            }
+            if (pnames->find(name) == pnames->end()) {
+                (*pnames)[name] = elem;
+                names_list.push_back(name);
+            }
+        }
+        smart_ptr<Benchmark> do_create(const std::string &s) {
+            if (pnames == NULL) {
+                pnames = new pnames_t();
+            }
+            const Benchmark *elem = (*pnames)[s];
+            if (elem == NULL)
+                return smart_ptr<Benchmark>((Benchmark *)0);
+            return smart_ptr<Benchmark>(elem->create_myself());
+        }
+        template <typename T>
+        void do_get_full_list(T &all_benchmarks) {
+            if (pnames == NULL) {
+                pnames = new pnames_t();
+            }
+            std::insert_iterator<T> insert(all_benchmarks, all_benchmarks.end());
+            for (size_t i = 0; i < names_list.size(); i++) {
+                *insert++ = names_list[i];
+            }
+        }
+    public:
+        virtual void get_bench_list(std::set<std::string> &benchs, BenchListFilter filter = ALL_BENCHMARKS) const {
+            UNUSED(filter);
+            get_full_list(benchs); 
+        }
+        virtual void get_bench_list(std::vector<std::string> &benchs, BenchListFilter filter = ALL_BENCHMARKS) const {
+            UNUSED(filter);
+            get_full_list(benchs); 
+        }
+        virtual const std::string get_name() const;
+ 
+        BenchmarkSuite() { }
+        ~BenchmarkSuite() { if (pnames != 0) delete pnames; }
+    private:
+        BenchmarkSuite &operator=(const BenchmarkSuite &) { return *this; }
+        BenchmarkSuite(const BenchmarkSuite &) {}
+};
+
+#define DECLARE_BENCHMARK_SUITE_STUFF(SUITE, NAME) \
+template<> BenchmarkSuite<SUITE>::pnames_t *BenchmarkSuite<SUITE>::pnames = 0; \
+template<> BenchmarkSuite<SUITE> *BenchmarkSuite<SUITE>::instance = 0; \
+template <> const std::string BenchmarkSuite<SUITE>::get_name() const { return #NAME; }
diff --git a/src_cpp/benchmark_suite_base.h b/src_cpp/benchmark_suite_base.h
new file mode 100644
index 00000000..79516714
--- /dev/null
+++ b/src_cpp/benchmark_suite_base.h
@@ -0,0 +1,81 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+#include "smart_ptr.h"
+#include "benchmark.h"
+
+class args_parser;
+class Benchmark;
+
+struct BenchmarkSuiteBase {
+    enum BenchListFilter { ALL_BENCHMARKS, DEFAULT_BENCHMARKS };
+    virtual void init() {}
+    virtual bool declare_args(args_parser &,
+                              std::ostream &output = std::cout) const {
+        UNUSED(output);
+        return true;
+    }
+    virtual bool prepare(const args_parser &, const std::vector<std::string> &,
+                         const std::vector<std::string> &, std::ostream &output = std::cout) {
+        UNUSED(output);
+        return true;
+    }
+    virtual void finalize(const std::vector<std::string> &, std::ostream &output = std::cout) {
+        UNUSED(output);
+    }
+    virtual void get_bench_list(std::set<std::string> &, BenchListFilter filter = ALL_BENCHMARKS) const { UNUSED(filter); }
+    virtual void get_bench_list(std::vector<std::string> &, BenchListFilter filter = ALL_BENCHMARKS) const { UNUSED(filter); }
+    virtual smart_ptr<Benchmark> create(const std::string &) { return smart_ptr<Benchmark>(); }
+    virtual const std::string get_name() const = 0;
+    virtual any get_parameter(const std::string &key) { UNUSED(key); return any(); }
+};
+
diff --git a/src_cpp/benchmark_suites_collection.cpp b/src_cpp/benchmark_suites_collection.cpp
new file mode 100644
index 00000000..4e1b3c5e
--- /dev/null
+++ b/src_cpp/benchmark_suites_collection.cpp
@@ -0,0 +1,72 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <vector>
+#include <string>
+#include <set>
+#include <map>
+#include <assert.h>
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "benchmark_suite.h"
+
+using namespace std;
+
+map<const string, BenchmarkSuiteBase*> *BenchmarkSuitesCollection::pnames = NULL;
+
+DECLARE_BENCHMARK_SUITE_STUFF(BS_GENERIC, __generic__)
+
+namespace { 
+    struct Dummy : public Benchmark {
+        void run(const scope_item &) {}
+        DEFINE_INHERITED(Dummy, BenchmarkSuite<BS_GENERIC>)
+    };
+    DECLARE_INHERITED(Dummy, Dummy)
+}
diff --git a/src_cpp/benchmark_suites_collection.h b/src_cpp/benchmark_suites_collection.h
new file mode 100644
index 00000000..4e0001e5
--- /dev/null
+++ b/src_cpp/benchmark_suites_collection.h
@@ -0,0 +1,148 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+#include "benchmark_suite_base.h"
+#include "utils.h"
+
+//!!! to remove
+#include <iostream>
+
+enum benchmark_suite_t {
+    BS_MPI1,
+    BS_NBC,
+    BS_MT,
+    BS_RMA,
+    BS_EXT,
+    BS_IO,
+    BS_GENERIC
+};
+
+class BenchmarkSuitesCollection {
+    static std::map<const std::string, BenchmarkSuiteBase*> *pnames;
+    public:
+    static void register_elem(BenchmarkSuiteBase *elem) {
+        assert(elem != NULL);
+        const std::string name = elem->get_name();
+        if (pnames == NULL) {
+            pnames = new std::map<const std::string, BenchmarkSuiteBase*>();
+        }
+        if (pnames->find(name) == pnames->end()) {
+            (*pnames)[name] = elem;
+        }
+    }
+    static void get_full_list(std::vector<std::string> &all_benchmarks, 
+                              std::map<std::string, std::set<std::string> > &by_suite) {
+        assert(pnames != NULL);
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) {
+            std::set<std::string> &benchmarks = by_suite[it->second->get_name()];
+            it->second->get_bench_list(benchmarks, BenchmarkSuiteBase::ALL_BENCHMARKS);
+            set_operations::combine(all_benchmarks, benchmarks);
+        }
+    }
+    static void get_default_list(std::vector<std::string> &default_benchmarks) {
+        assert(pnames != NULL);
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) {
+            it->second->get_bench_list(default_benchmarks, BenchmarkSuiteBase::DEFAULT_BENCHMARKS);
+        }
+    }
+    static void init_registered_suites() {
+       assert(pnames != NULL);
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) {
+                it->second->init();
+        }
+    }
+    static bool declare_args(args_parser &parser, std::ostream &output) {
+        assert(pnames != NULL);
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) {
+                if (!it->second->declare_args(parser, output))
+                    return false;
+        }
+        return true;
+    }
+    static bool prepare(args_parser &parser, const std::vector<std::string> &benchs,
+                        const std::vector<std::string> &unknown_args, std::ostream &output) {
+        assert(pnames != NULL);
+        std::vector<std::string> suites_to_remove;
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) {
+                if (!it->second->prepare(parser, benchs, unknown_args, output)) {
+                    suites_to_remove.push_back(it->first);
+                }
+        }
+        for (std::vector<std::string>::iterator it = suites_to_remove.begin();
+                it != suites_to_remove.end(); ++it) {
+            pnames->erase(*it);
+        }
+        return true;
+    }
+    static smart_ptr<Benchmark> create(const std::string &name) {
+        assert(pnames != NULL);
+        smart_ptr<Benchmark> b;
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) {
+            b = it->second->create(name);
+            if (b.get() != NULL)
+                break;
+        }
+        return b;
+    }
+    static void finalize(const std::vector<std::string> &benchs, std::ostream &output) {
+        assert(pnames != NULL);
+        for (std::map<const std::string, BenchmarkSuiteBase*>::iterator it = pnames->begin();
+             it != pnames->end(); ++it) 
+                it->second->finalize(benchs, output);
+    }    
+};
diff --git a/src_cpp/example/Makefile.example.mk b/src_cpp/example/Makefile.example.mk
new file mode 100644
index 00000000..859c8433
--- /dev/null
+++ b/src_cpp/example/Makefile.example.mk
@@ -0,0 +1,58 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+override CPPFLAGS += -DEXAMPLE
+override CPPFLAGS += -Iexample 
+
+BECHMARK_SUITE_SRC += example/example_benchmark1.cpp \
+					  example/example_benchmark2.cpp \
+					  example/example_benchmark3.cpp \
+					  example/example_benchmark4.cpp \
+					  example/example_benchmark5.cpp
diff --git a/src_cpp/example/example_benchmark1.cpp b/src_cpp/example/example_benchmark1.cpp
new file mode 100644
index 00000000..9d7b90de
--- /dev/null
+++ b/src_cpp/example/example_benchmark1.cpp
@@ -0,0 +1,89 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "scope.h"
+#include "utils.h"
+
+using namespace std;
+
+// NOTE: The following example uses BS_GENERIC template of BenchmarkSuite.
+// One needs to take care when using it:
+// 1. Put benchmark_suite.h include directive and all its usage in the namespace
+// with a unique name. It is required to avoid multiple occurence of static variables
+// problem if BS_GENERIC is used by several suites as a base.
+// 2. Put DECLARE_BENCHMARK_SUITE_STUFF right after include directive
+// 3. Every benchmark you plan to put in suite must include DEFINE_INHERITED and
+// DECLARE_INHERITED macros. 
+// All other stuff is automated. Enjoy!
+namespace example_suite1 {
+
+    #include "benchmark_suite.h"
+    DECLARE_BENCHMARK_SUITE_STUFF(BS_GENERIC, example_suite1)
+
+    class ExampleBenchmark_1 : public Benchmark {
+        public:
+        virtual void run(const scope_item &item) { 
+            UNUSED(item);
+            cout << get_name() << ": Hello world from example" << endl;
+        }
+        DEFINE_INHERITED(ExampleBenchmark_1, BenchmarkSuite<BS_GENERIC>);
+    };
+
+    DECLARE_INHERITED(ExampleBenchmark_1, example1)
+}
diff --git a/src_cpp/example/example_benchmark2.cpp b/src_cpp/example/example_benchmark2.cpp
new file mode 100644
index 00000000..d18c94dc
--- /dev/null
+++ b/src_cpp/example/example_benchmark2.cpp
@@ -0,0 +1,85 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "scope.h"
+#include "utils.h"
+
+using namespace std;
+
+namespace example_suite2 {
+
+    #include "benchmark_suite.h"
+    DECLARE_BENCHMARK_SUITE_STUFF(BS_GENERIC, example_suite2)
+
+    // EXAMPLE 2: benchmark scope is added by overloading init() virtual function
+    class ExampleBenchmark_2 : public Benchmark {
+        public:
+        virtual void init() {
+            VarLenScope *sc = new VarLenScope(0, 22);
+            scope = sc;
+        }
+        virtual void run(const scope_item &item) { 
+            cout << get_name() << ": Hello world! size=" << item.len << endl;
+        }
+        DEFINE_INHERITED(ExampleBenchmark_2, BenchmarkSuite<BS_GENERIC>);
+    };
+
+    DECLARE_INHERITED(ExampleBenchmark_2, example2)
+
+}
diff --git a/src_cpp/example/example_benchmark3.cpp b/src_cpp/example/example_benchmark3.cpp
new file mode 100644
index 00000000..9a3f9840
--- /dev/null
+++ b/src_cpp/example/example_benchmark3.cpp
@@ -0,0 +1,132 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "scope.h"
+#include "utils.h"
+
+using namespace std;
+
+namespace example_suite3 {
+
+    #include "benchmark_suite.h"
+    DECLARE_BENCHMARK_SUITE_STUFF(BS_GENERIC, example_suite3)
+
+    // EXAMPLE 3: PingPong code based on BenchmarkSuite<BS_GENERIC> specialization
+    // - allocation of buffers added to init() overloaded virtual function
+    // - finalize() overloaded virtual function to do deallocation and
+    // some results output 
+    class ExampleBenchmark_3 : public Benchmark {
+        std::map<int, double> results;
+        char *sbuf, *rbuf;
+        int np, rank;
+        public:
+        virtual void init() {
+            VarLenScope *sc = new VarLenScope(0, 22);
+            scope = sc;
+            rbuf = (char *)malloc(1 << 22);
+            sbuf = (char *)malloc(1 << 22);
+        }
+        virtual void run(const scope_item &item) { 
+            MPI_Comm_size(MPI_COMM_WORLD, &np);
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            if (np < 2) {
+                std::cout << get_name() << ": two or more ranks required" << std::endl;
+                return;
+            }
+            MPI_Status stat;
+            double t1 = 0, t2 = 0, time = 0;
+            const int tag = 1;
+            const int ncycles = 1024;
+            if (rank == 0) {
+                t1 = MPI_Wtime();
+                for(int i = 0; i < ncycles; i++) {
+                    MPI_Send((char*)sbuf, item.len, MPI_BYTE, 1, tag, MPI_COMM_WORLD);
+                    MPI_Recv((char*)rbuf, item.len, MPI_BYTE, 1, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
+                }
+                t2 = MPI_Wtime();
+                time = (t2 - t1) / ncycles;
+            } else if (rank == 1) {
+                t1 = MPI_Wtime();
+                for(int i = 0; i < ncycles; i++) {
+                    MPI_Recv((char*)rbuf, item.len, MPI_BYTE, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
+                    MPI_Send((char*)sbuf, item.len, MPI_BYTE, 0, tag, MPI_COMM_WORLD);
+                }
+                t2 = MPI_Wtime();
+                time = (t2 - t1) / ncycles;
+            } 
+            MPI_Barrier(MPI_COMM_WORLD);
+            results[item.len] = time;
+        }
+        virtual void finalize() { 
+            if (rank == 0) {
+                for (std::map<int, double>::iterator it = results.begin();
+                        it != results.end(); ++it) {
+                    cout << get_name() << ": " << "len=" << it->first << " time=" << it->second << endl; 
+                }
+            }
+        }
+        virtual ~ExampleBenchmark_3() {
+            free(rbuf);
+            free(sbuf);
+        }
+        DEFINE_INHERITED(ExampleBenchmark_3, BenchmarkSuite<BS_GENERIC>);
+    };
+
+    DECLARE_INHERITED(ExampleBenchmark_3, example3)
+}
diff --git a/src_cpp/example/example_benchmark4.cpp b/src_cpp/example/example_benchmark4.cpp
new file mode 100644
index 00000000..728ea956
--- /dev/null
+++ b/src_cpp/example/example_benchmark4.cpp
@@ -0,0 +1,191 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "scope.h"
+#include "utils.h"
+#include "args_parser.h"
+
+using namespace std;
+
+namespace example_suite4 {
+
+    #include "benchmark_suite.h"
+
+    DECLARE_BENCHMARK_SUITE_STUFF(BS_GENERIC, example_suite4)
+
+    // EXAMPLE 4: extended PingPong code with some extra features
+    // - declare_args()/prepare() overloaded virtual function to implement some command line parameters
+    // - get_parameter() overloaded virtual function implements the high-level interface 
+    // to pass parameters from a suite to a benchmark
+    template <> bool BenchmarkSuite<BS_GENERIC>::declare_args(args_parser &parser,
+                                                              ostream &output) const {
+        UNUSED(output);
+        parser.set_current_group(get_name());
+        parser.add_vector<int>("len", "1,2,4,8").
+                     set_mode(args_parser::option::APPLY_DEFAULTS_ONLY_WHEN_MISSING);
+        parser.add<string>("datatype", "int").set_caption("int|char");
+        parser.add<int>("ncycles", 1000);
+        parser.set_default_current_group();
+        return true;
+    }
+
+    vector<int> len;
+    MPI_Datatype datatype;
+    int ncycles;
+
+    template <> bool BenchmarkSuite<BS_GENERIC>::prepare(const args_parser &parser,
+                                                         const vector<string> &,
+                                                         const vector<string> &unknown_args,
+                                                         ostream &output) {
+        if (unknown_args.size() != 0) {
+            output << "Some unknown options or extra arguments." << endl;
+            return false;
+        }
+        parser.get<int>("len", len);
+        string dt = parser.get<string>("datatype");
+        if (dt == "int") datatype = MPI_INT;
+        else if (dt == "char") datatype = MPI_CHAR;
+        else {
+            output << get_name() << ": " << "Unknown data type in datatype option" << endl;
+            return false;
+        }
+        ncycles = parser.get<int>("ncycles");
+        return true;
+    }
+
+#define HANDLE_PARAMETER(TYPE, NAME) if (key == #NAME) { \
+                                        result = smart_ptr< TYPE >(&NAME); \
+                                        result.detach_ptr(); }
+
+#define GET_PARAMETER(TYPE, NAME) TYPE *p_##NAME = suite->get_parameter(#NAME).as< TYPE >(); \
+                                  assert(p_##NAME != NULL); \
+                                  TYPE &NAME = *p_##NAME;
+
+    template <> any BenchmarkSuite<BS_GENERIC>::get_parameter(const string &key) {
+        any result;
+        HANDLE_PARAMETER(vector<int>, len);
+        HANDLE_PARAMETER(MPI_Datatype, datatype);
+        HANDLE_PARAMETER(int, ncycles);
+        return result;
+    }
+
+    class ExampleBenchmark_4 : public Benchmark {
+        map<int, double> results;
+        char *sbuf, *rbuf;
+        int np, rank;
+        public:
+        virtual void init() {
+            GET_PARAMETER(vector<int>, len);
+            GET_PARAMETER(MPI_Datatype, datatype);
+            VarLenScope *sc = new VarLenScope(len);
+            scope = sc;
+            int idts;
+            MPI_Type_size(datatype, &idts);
+            rbuf = (char *)malloc((size_t)scope->get_max_len() * (size_t)idts);
+            sbuf = (char *)malloc((size_t)scope->get_max_len() * (size_t)idts);
+        }
+        virtual void run(const scope_item &item) { 
+            GET_PARAMETER(MPI_Datatype, datatype);
+            GET_PARAMETER(int, ncycles);
+            MPI_Comm_size(MPI_COMM_WORLD, &np);
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            if (np < 2) {
+                cout << get_name() << ": two or more ranks required" << endl;
+                return;
+            }
+            MPI_Status stat;
+            double t1 = 0, t2 = 0, time = 0;
+            const int tag = 1;
+            if (rank == 0) {
+                t1 = MPI_Wtime();
+                for(int i = 0; i < ncycles; i++) {
+                    MPI_Send((char*)sbuf, item.len, datatype, 1, tag, MPI_COMM_WORLD);
+                    MPI_Recv((char*)rbuf, item.len, datatype, 1, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
+                }
+                t2 = MPI_Wtime();
+                time = (t2 - t1) / ncycles;
+            } else if (rank == 1) {
+                t1 = MPI_Wtime();
+                for(int i = 0; i < ncycles; i++) {
+                    MPI_Recv((char*)rbuf, item.len, datatype, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
+                    MPI_Send((char*)sbuf, item.len, datatype, 0, tag, MPI_COMM_WORLD);
+                }
+                t2 = MPI_Wtime();
+                time = (t2 - t1) / ncycles;
+            } 
+            MPI_Barrier(MPI_COMM_WORLD);
+            results[item.len] = time;
+        }
+        virtual void finalize() { 
+            if (rank == 0) {
+                for (map<int, double>::iterator it = results.begin();
+                        it != results.end(); ++it) {
+                    cout << get_name() << ": " << "len=" << it->first << " time=" << it->second << endl; 
+                }
+            }
+        }
+        virtual ~ExampleBenchmark_4() {
+            free(rbuf);
+            free(sbuf);
+        }
+        DEFINE_INHERITED(ExampleBenchmark_4, BenchmarkSuite<BS_GENERIC>);
+    };
+
+    DECLARE_INHERITED(ExampleBenchmark_4, example4)
+}
diff --git a/src_cpp/example/example_benchmark5.cpp b/src_cpp/example/example_benchmark5.cpp
new file mode 100644
index 00000000..7405c0d2
--- /dev/null
+++ b/src_cpp/example/example_benchmark5.cpp
@@ -0,0 +1,119 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+
+#include <mpi.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "scope.h"
+#include "utils.h"
+
+using namespace std;
+
+
+// EXAMPLE 5: generic code, Benchmark class directly inherits from the base Benchmarks class.
+// Suite code directly inherits from BenchmarkSuiteBase class.
+// Registration is done by rather ugly direct register_elem() call from a constructor
+class ExampleBenchmark_5 : public Benchmark 
+{
+    virtual const string get_name() const { return "example5"; }
+    virtual void run(const scope_item &) { std::cout << "Hello from example 5"; }
+};
+
+class BenchmarkSuiteExample5 : public BenchmarkSuiteBase
+{
+    static BenchmarkSuiteExample5 *instance;
+    public:
+    BenchmarkSuiteExample5() { 
+        if (instance == NULL) {
+            instance = (BenchmarkSuiteExample5 *)0xffff;
+            instance = new BenchmarkSuiteExample5(); 
+            BenchmarkSuitesCollection::register_elem(instance);
+        }
+    }
+    virtual ~BenchmarkSuiteExample5() {
+        if (instance != NULL) {
+            BenchmarkSuiteExample5 *to_delete;
+            to_delete = instance;
+            instance = NULL;
+            delete to_delete;
+        }
+    }
+    virtual void get_bench_list(std::set<std::string> &result, 
+        BenchListFilter filter = ALL_BENCHMARKS) const 
+    { 
+        UNUSED(filter);
+        result.insert("example_bench5");
+    }
+    virtual void get_bench_list(std::vector<std::string> &result, 
+        BenchListFilter filter = ALL_BENCHMARKS) const 
+    { 
+        UNUSED(filter);
+        result.push_back("example_bench5");
+    }
+    virtual smart_ptr<Benchmark> create(const std::string &name) 
+    {  
+        if (name == "example_bench5") {
+            return smart_ptr<Benchmark>(new ExampleBenchmark_5);
+        }
+        return smart_ptr<Benchmark>(NULL);
+    }
+    virtual const std::string get_name() const { return "example_suite5"; };
+};
+
+BenchmarkSuiteExample5 *BenchmarkSuiteExample5::instance;
+namespace { BenchmarkSuiteExample5 bench_suite_ex5; }
+
diff --git a/src_cpp/helpers/Makefile.helpers.mk b/src_cpp/helpers/Makefile.helpers.mk
new file mode 100644
index 00000000..b9c4932f
--- /dev/null
+++ b/src_cpp/helpers/Makefile.helpers.mk
@@ -0,0 +1,56 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+C_SRC_DIR=../src_c
+override CPPFLAGS += -Ihelpers
+override CPPFLAGS += -I$(C_SRC_DIR)
+
+HEADERS += helpers/original_benchmark.h \
+helpers/helper_IMB_functions.h
diff --git a/src_cpp/helpers/Makefile_win.helpers.mk b/src_cpp/helpers/Makefile_win.helpers.mk
new file mode 100644
index 00000000..a16a2a9c
--- /dev/null
+++ b/src_cpp/helpers/Makefile_win.helpers.mk
@@ -0,0 +1,55 @@
+#*****************************************************************************
+# *                                                                           *
+# * Copyright 2016-2018 Intel Corporation.                                    *
+# *                                                                           *
+# *****************************************************************************
+#
+# This code is covered by the Community Source License (CPL), version
+# 1.0 as published by IBM and reproduced in the file "license.txt" in the
+# "license" subdirectory. Redistribution in source and binary form, with
+# or without modification, is permitted ONLY within the regulations
+# contained in above mentioned license.
+#
+# Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+# within the regulations of the "License for Use of "Intel(R) MPI
+# Benchmarks" Name and Trademark" as reproduced in the file
+# "use-of-trademark-license.txt" in the "license" subdirectory.
+#
+# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+# solely responsible for determining the appropriateness of using and
+# distributing the Program and assumes all risks associated with its
+# exercise of rights under this Agreement, including but not limited to
+# the risks and costs of program errors, compliance with applicable
+# laws, damage to or loss of data, programs or equipment, and
+# unavailability or interruption of operations.
+#
+# EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+# ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+# WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+# DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+# YOUR JURISDICTION. It is licensee's responsibility to comply with any
+# export regulations applicable in licensee's jurisdiction. Under
+# CURRENT U.S. export regulations this software is eligible for export
+# from the U.S. and can be downloaded by or otherwise exported or
+# reexported worldwide EXCEPT to U.S. embargoed destinations which
+# include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+# Afghanistan and any other country to which the U.S. has embargoed
+# goods and services.
+#
+#  ***************************************************************************
+
+
+
+C_SRC_DIR= ../src_c
+
+CPPFLAGS = $(CPPFLAGS) -I../helpers -I$(C_SRC_DIR)
+CFLAGS   = $(CFLAGS) -I../helpers -I$(C_SRC_DIR)
diff --git a/src_cpp/helpers/helper_IMB_functions.h b/src_cpp/helpers/helper_IMB_functions.h
new file mode 100644
index 00000000..0abec1f7
--- /dev/null
+++ b/src_cpp/helpers/helper_IMB_functions.h
@@ -0,0 +1,778 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+#include <algorithm>
+
+static int do_nonblocking_;
+typedef void (*original_benchmark_func_t)(struct comm_info* c_info, int size,
+                struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time);
+
+enum descr_t {
+    REDUCTION, SELECT_SOURCE,
+    GET, PUT, NO,
+    SINGLE_TRANSFER, PARALLEL_TRANSFER, COLLECTIVE, SINGLE_ELEMENT_TRANSFER, MULT_PASSIVE_TRANSFER,
+    PARALLEL_TRANSFER_MSG_RATE, SYNC,
+    SCALE_TIME_HALF, SCALE_BW_DOUBLE, SCALE_BW_FOUR,
+    SENDBUF_SIZE_I, SENDBUF_SIZE_2I, SENDBUF_SIZE_NP_I, SENDBUF_SIZE_0,
+    RECVBUF_SIZE_I, RECVBUF_SIZE_2I, RECVBUF_SIZE_3I, RECVBUF_SIZE_NP_I, RECVBUF_SIZE_0,
+    HAS_ROOT,
+    BIDIR_1,
+    N_MODES_1,
+    NON_AGGREGATE,
+    NONBLOCKING,
+    NTIMES_3, NTIMES_2,
+    INDV_BLOCK, PRIVATE, SHARED, EXPLICIT,
+    DEFAULT
+};
+
+struct GLOBALS {
+    int NP_min;
+    int NP, iter, size, ci_np, imod;
+    int header, MAXMSG;
+    int x_sample, n_sample;
+    Type_Size unit_size;
+    double time[MAX_TIME_ID];
+};
+
+struct Bmark_descr {
+    Bmark_descr() : stop_iterations(false), sample_time(0) {}
+    typedef std::set<descr_t> descr_set;
+    descr_set flags;
+    std::vector<std::string> comments;
+    std::vector<const char *> cmt;
+    bool stop_iterations;
+    int time_limit[2];
+    double sample_time;
+    BTYPES descr2type(descr_t t) {
+        switch(t) {
+            case SINGLE_TRANSFER:
+                return SingleTransfer;
+            case PARALLEL_TRANSFER:
+                return ParallelTransfer;
+            case COLLECTIVE:
+                return Collective;
+            case PARALLEL_TRANSFER_MSG_RATE:
+                return ParallelTransferMsgRate;
+            case SYNC:
+                return Sync;
+            case SINGLE_ELEMENT_TRANSFER:
+                return SingleElementTransfer;
+            case MULT_PASSIVE_TRANSFER:
+                return MultPassiveTransfer;
+            default:
+                return BTYPE_INVALID;
+        }
+        return BTYPE_INVALID;
+    }
+    size_t descr2len(descr_t t, size_t i, size_t np) {
+        switch(t) {
+            case SENDBUF_SIZE_I:
+                return i;
+            case SENDBUF_SIZE_2I:
+                return (size_t)2 * i;
+            case SENDBUF_SIZE_NP_I:
+                return np * i;
+            case SENDBUF_SIZE_0:
+                return 0;
+            case RECVBUF_SIZE_I:
+                return i;
+            case RECVBUF_SIZE_2I:
+                return (size_t)2 * i;
+            case RECVBUF_SIZE_3I:
+                return (size_t)3 * i;
+            case RECVBUF_SIZE_NP_I:
+                return np * i;
+            case RECVBUF_SIZE_0:
+                return 0;
+            default:
+                throw std::logic_error("descr2len: unknown len");
+        }
+        throw std::logic_error("descr2len: unknown len");
+        return 0;
+    }
+
+    DIRECTION descr2access(descr_t t) {
+        switch(t) {
+            case GET:
+                return get;
+            case PUT:
+                return put;
+            case NO:
+                return no;
+            default:
+                throw std::logic_error("descr2access: unknown access");
+        }
+        throw std::logic_error("descr2access: unknown access");
+    }
+#ifdef MPIIO
+    POSITIONING descr2fpointer(descr_t t) {
+        switch(t) {
+            case INDV_BLOCK:
+                return indv_block;
+            case PRIVATE:
+                return priv;
+            case SHARED:
+                return shared;
+            case EXPLICIT:
+                return explic;
+            default:
+                throw std::logic_error("descr2position: unknown fpointer");
+        }
+    }
+#endif
+    bool is_default() {
+        return flags.count(DEFAULT) > 0;
+    }
+
+    bool IMB_set_bmark(struct Bench* Bmark, original_benchmark_func_t fn)
+    {
+        bool result = true;
+        Bmark->N_Modes = 1;
+        Bmark->RUN_MODES[0].AGGREGATE   =-1;
+        Bmark->RUN_MODES[0].NONBLOCKING = 0;
+
+        Bmark->reduction = (flags.count(REDUCTION) > 0);
+        Bmark->Ntimes = 1;
+        bool found = false;
+#ifdef MPI1
+        Bmark->select_source = (flags.count(SELECT_SOURCE) > 0);
+#endif /*MPI1*/
+
+#if (defined RMA || defined EXT || defined MPIIO)
+        Bmark->N_Modes = flags.count(N_MODES_1) > 0 ? 1 : 2;
+#ifdef RMA
+        Bmark->RUN_MODES[0].AGGREGATE   = 0;
+        Bmark->RUN_MODES[1].AGGREGATE   = 1;
+#else
+        Bmark->RUN_MODES[0].AGGREGATE   = 1;
+        Bmark->RUN_MODES[1].AGGREGATE   = 0;
+#endif /*RMA*/
+        Bmark->RUN_MODES[0].NONBLOCKING = 0;
+        Bmark->RUN_MODES[1].NONBLOCKING = 0;
+        Bmark->RUN_MODES[0].BIDIR       = 0;
+        Bmark->RUN_MODES[1].BIDIR       = 0;
+#ifdef MPIIO
+        descr_set fpointer;
+        fpointer.insert(INDV_BLOCK);
+        fpointer.insert(SHARED);
+        fpointer.insert(PRIVATE);
+        fpointer.insert(EXPLICIT);
+        for (descr_set::iterator it = fpointer.begin(); it != fpointer.end(); ++it) {
+            if (flags.count(*it)) {
+                if (found)
+                    result = false;
+                Bmark->fpointer = descr2fpointer(*it);
+                found = true;
+            }
+        }
+        if (flags.count(NTIMES_2) > 0) {
+            Bmark->Ntimes = 2;
+        }
+
+#endif /*MPIIO*/
+        if (flags.count(NON_AGGREGATE)) {
+            Bmark->RUN_MODES[0].AGGREGATE = -1;
+        }
+
+        if (flags.count(BIDIR_1)) {
+            Bmark->RUN_MODES[0].BIDIR = 1;
+            Bmark->RUN_MODES[1].BIDIR = 1;
+        }
+
+        descr_set access;
+        access.insert(GET);
+        access.insert(PUT);
+        access.insert(NO);
+        for (descr_set::iterator it = access.begin(); it != access.end(); ++it) {
+            if (flags.count(*it)) {
+                if (found)
+                    result = false;
+                Bmark->access = descr2access(*it);
+                found = true;
+            }
+        }
+        if (!found)
+            result = false;
+#endif /*RMA || EXT*/
+
+        if (flags.count(NONBLOCKING)) {
+            Bmark->RUN_MODES[0].NONBLOCKING = 1;
+#ifndef EXT
+           do_nonblocking_ = 1;
+#endif
+        }
+        if (flags.count(NTIMES_3) > 0) {
+            Bmark->Ntimes = 3;
+        }
+        Bmark->Benchmark = fn;
+        for (size_t i = 0; i < comments.size(); i++) {
+            cmt.push_back(comments[i].c_str());
+        }
+        cmt.push_back(NULL);
+        Bmark->bench_comments = const_cast<char **>(&cmt[0]);
+
+        descr_set types;
+        types.insert(SINGLE_TRANSFER);
+        types.insert(PARALLEL_TRANSFER);
+        types.insert(COLLECTIVE);
+        types.insert(PARALLEL_TRANSFER_MSG_RATE);
+        types.insert(SYNC);
+        types.insert(SINGLE_ELEMENT_TRANSFER);
+        types.insert(MULT_PASSIVE_TRANSFER);
+        for (descr_set::iterator it = types.begin(); it != types.end(); ++it) {
+            if (flags.count(*it)) {
+                if (found)
+                    result = false;
+                Bmark->RUN_MODES[1].type = Bmark->RUN_MODES[0].type = descr2type(*it);
+                found = true;
+            }
+        }
+        if (!found)
+            result = false;
+        Bmark->scale_time = 1.0;
+        Bmark->scale_bw = 1.0;
+        if (flags.count(SCALE_TIME_HALF)) {
+            Bmark->scale_time = 0.5;
+        }
+        if (flags.count(SCALE_BW_DOUBLE)) {
+            Bmark->scale_bw = 2.0;
+        }
+        if (flags.count(SCALE_BW_FOUR)) {
+            Bmark->scale_bw = 4.0;
+        }
+
+#if (defined RMA || defined MPIIO)
+        Bmark->RUN_MODES[1].type = Bmark->RUN_MODES[0].type;
+#endif /*RMA*/
+
+        return result;
+    }
+
+    smart_ptr<Scope> helper_init_scope(struct comm_info &c_info,
+                                       struct Bench* Bmark, GLOBALS &glob) {
+        NPLenCombinedScope &scope = *(new NPLenCombinedScope);
+        int len = 0;
+        int iter = 0;
+        bool stop = false;
+        while (true) {
+            if (stop)
+                break;
+            if (Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+                    /* just one size needs to be tested (the size of one element) */
+                MPI_Type_size(c_info.red_data_type,&len);
+            } else {
+                // --- helper_get_next_size(c_info, glob);
+                if (c_info.n_lens > 0) {
+                    len = c_info.msglen[iter];
+                } else {
+                    if( iter == 0 ) {
+                        if (!c_info.zero_size) {
+                            iter++;
+                            continue;
+                        }
+                        len = 0;
+                    } else if (iter == 1) {
+                        len = ((1<<c_info.min_msg_log) + glob.unit_size - 1)/glob.unit_size*glob.unit_size;
+#ifdef EXT
+                        len = std::min(len, asize);
+#endif
+                    } else {
+                        len = std::min(glob.MAXMSG, len + len);
+                    }
+                }
+            }
+
+            // --- helper_adjust_size(c_info, glob);
+            if (len > glob.MAXMSG) {
+                len = glob.MAXMSG;
+            }
+            len = (len + glob.unit_size - 1)/glob.unit_size*glob.unit_size;
+            // --- helper_post_step(glob, BMark);
+            iter++;
+            if (Bmark->RUN_MODES[0].type == Sync || Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+                stop = true;
+            }
+            if( Bmark->RUN_MODES[0].type == Sync ) {
+                len = glob.MAXMSG;
+                iter = c_info.n_lens - 1;
+            }
+            scope.add_len(len);
+            if (!(((c_info.n_lens == 0 && len < glob.MAXMSG ) ||
+                 (c_info.n_lens > 0  && iter < c_info.n_lens))))
+                break;
+        }
+
+        {
+            int &NP_min = glob.NP_min;
+            int &ci_np = c_info.w_num_procs;
+            if (Bmark->RUN_MODES[0].type == ParallelTransferMsgRate) {
+                ci_np -= ci_np % 2;
+                NP_min += NP_min % 2;
+            }
+            int NP = std::max(1, std::min(ci_np, NP_min));
+            bool do_it = true;
+            if (Bmark->RUN_MODES[0].type == SingleTransfer) {
+#ifdef MPIIO
+                    NP = 1;
+#else
+                NP = std::min(2, ci_np);
+#endif
+            }
+            while (do_it) {
+//                std::cout << ">> " << ci_np << " " << NP << std::endl;
+                scope.add_np(NP);
+
+                // CALCULATE THE NUMBER OF PROCESSES FOR NEXT STEP
+                if (NP >= ci_np) { do_it = false; }
+                else {
+                    NP = std::min(NP + NP, ci_np);
+                }
+#ifdef MPIIO
+                if (Bmark->RUN_MODES[0].type == SingleTransfer) {
+                        do_it = false; 
+                }
+#endif
+            }
+        }
+        scope.add_nmodes(Bmark->N_Modes);
+        scope.commit();
+        return smart_ptr<Scope>(&scope);
+    }
+
+    void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS,
+                               struct Bench* Bmark, MODES BMODE, int iter, int size)
+    {
+
+    /* IMB 3.1 << */
+        size_t s_len, r_len, s_alloc, r_alloc;
+        int init_size, irep, i_s, i_r, x_sample;
+
+
+//----------------------------------------------------------------------
+        const bool root_based = (flags.count(HAS_ROOT) > 0);
+//----------------------------------------------------------------------
+//
+//
+//
+//
+// --- STEP 1: set x_sample and ITERATIONS->n_sample
+        x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr;
+
+        /* July 2002 fix V2.2.1: */
+#if (defined EXT || defined MPIIO || RMA)
+        if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
+#endif
+        Bmark->sample_failure = 0;
+
+        init_size = std::max(size, asize);
+
+        if (c_info->rank < 0) {
+            return;
+        }
+
+        if (ITERATIONS->iter_policy == imode_off) {
+            ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample;
+        } else if ((ITERATIONS->iter_policy == imode_multiple_np) ||
+                   (ITERATIONS->iter_policy == imode_auto && root_based)) {
+            /* n_sample for benchmarks with uneven distribution of works
+               must be greater or equal and multiple to num_procs.
+               The formula below is a negative leg of hyperbola.
+               It's moved and scaled relative to max message size
+               and initial n_sample subject to multiple to num_procs.
+            */
+            double d_n_sample = ITERATIONS->msgspersample;
+            int max_msg_size = 1<<c_info->max_msg_log;
+            int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5);
+            ITERATIONS->n_sample = x_sample = std::max(tmp-tmp%c_info->num_procs, c_info->num_procs);
+        } else {
+            ITERATIONS->n_sample = (size > 0)
+                             ? std::max(1, std::min(ITERATIONS->overall_vol / size, x_sample))
+                             : x_sample;
+        }
+// --- STEP 2: set s_len and r_len
+//---------------------------------------------------------------------------------------------------
+        bool result = true;
+        {
+            descr_set types;
+            types.insert(SENDBUF_SIZE_I);
+            types.insert(SENDBUF_SIZE_2I);
+            types.insert(SENDBUF_SIZE_NP_I);
+            types.insert(SENDBUF_SIZE_0);
+            bool found = false;
+            for (descr_set::iterator it = types.begin(); it != types.end(); ++it) {
+                if (flags.count(*it)) {
+                    if (found)
+                        result = false;
+                    s_len = descr2len(*it, (size_t)init_size,  (size_t)c_info->num_procs);
+                    found = true;
+                }
+            }
+            if (!found)
+                result = false;
+        }
+        {
+            descr_set types;
+            types.insert(RECVBUF_SIZE_I);
+            types.insert(RECVBUF_SIZE_2I);
+            types.insert(RECVBUF_SIZE_3I);
+            types.insert(RECVBUF_SIZE_NP_I);
+            types.insert(RECVBUF_SIZE_0);
+            bool found = false;
+            for (descr_set::iterator it = types.begin(); it != types.end(); ++it) {
+                if (flags.count(*it)) {
+                    if (found)
+                        result = false;
+                    r_len = descr2len(*it, (size_t)init_size,  (size_t)c_info->num_procs);
+                    found = true;
+                }
+            }
+            if (!found)
+                result = false;
+        }
+        if (!result) {
+            throw std::logic_error("wrong recv or send buffer requirement description on a benchmark");
+        }
+//        printf(">> s_len=%ld, r_len=%ld\n", s_len, r_len);
+//---------------------------------------------------------------------------------------------------
+// --- STEP 3: set s_alloc and r_alloc AND all these ITERATIONS->s_offs,r_offs,...
+//---------------------------------------------------------------------------------------------------
+
+        /* IMB 3.1: new memory management for -off_cache */
+        if (BMODE->type == Sync) {
+            ITERATIONS->use_off_cache=0;
+            ITERATIONS->n_sample=x_sample;
+        } else {
+#ifdef MPIIO
+            ITERATIONS->use_off_cache=0;
+#else
+            ITERATIONS->use_off_cache = ITERATIONS->off_cache;
+#endif
+            if (ITERATIONS->off_cache) {
+                if ( ITERATIONS->cache_size > 0) {
+                    size_t cls = (size_t) ITERATIONS->cache_line_size;
+                    size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls;
+                    ITERATIONS->s_offs = ofs;
+                    ITERATIONS->s_cache_iter = std::min((int)(ITERATIONS->n_sample), (int)((2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs));
+                    ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
+                    ITERATIONS->r_offs = ofs;
+                    ITERATIONS->r_cache_iter = std::min((int)(ITERATIONS->n_sample), (int)((2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs));
+                } else {
+                    ITERATIONS->s_offs=ITERATIONS->r_offs=0;
+                    ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
+                }
+            }
+        }
+
+#ifdef MPIIO
+        s_alloc = s_len;
+        r_alloc = r_len;
+#else
+        if( ITERATIONS->use_off_cache ) {
+            s_alloc = std::max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
+            r_alloc = std::max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
+        } else {
+            s_alloc = s_len;
+            r_alloc = r_len;
+        }
+#endif
+
+// --- STEP 4: detect too much memory situation
+//--------------------------------------------------------------------------------
+        c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;
+
+#ifdef DEBUG
+        {
+            size_t mx, mu;
+
+            mx = (size_t) MEM_UNIT*c_info->max_mem;
+            mu = (size_t) MEM_UNIT*c_info->used_mem;
+
+            DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
+            DBG_I2("max  / used memory ",mx,mu);
+            DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
+            DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
+            DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
+            DBGF_I2("Got send / recv lengths ",s_len,r_len);
+            DBGF_I2("max  / used memory ",mx,mu);
+            DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
+            DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
+            DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
+        }
+#endif
+
+        if( c_info->used_mem > c_info->max_mem ) {
+            Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
+            return;
+        }
+
+
+// --- call IMB_set_buf, IMB_init_transfer
+// -------------------------------------------------------------------------------------
+        if (s_alloc > 0  && r_alloc > 0) {
+            if (ITERATIONS->use_off_cache) {
+                IMB_alloc_buf(c_info, (char *)"IMB_init_buffers_iter 1", s_alloc, r_alloc);
+                IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);
+
+                for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) {
+                    i_s = irep % ITERATIONS->s_cache_iter;
+                    memcpy((void*)((char*)c_info->s_buffer + i_s * ITERATIONS->s_offs), c_info->s_buffer, s_len);
+                }
+
+                for (irep = 1; irep < ITERATIONS->r_cache_iter; irep++) {
+                    i_r = irep % ITERATIONS->r_cache_iter;
+                    memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len);
+                }
+            } else {
+                IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
+            }
+        }
+
+        IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) std::max(s_alloc, r_alloc));
+
+
+// --- change  ITERATIONS->n_sample
+// -------------------------------------------------------------------------------------
+//
+        /* Determine #iterations if dynamic adaptation requested */
+        if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) {
+            double time[MAX_TIME_ID];
+            int acc_rep_test, t_sample;
+            int selected_n_sample = ITERATIONS->n_sample;
+
+            memset(time, 0, MAX_TIME_ID);
+            if (iter == 0 || BMODE->type == Sync) {
+                ITERATIONS->n_sample_prev = ITERATIONS->msgspersample;
+                if (c_info->n_lens > 0) {
+                    memset(ITERATIONS->numiters, 0, c_info->n_lens);
+                }
+            }
+
+            /* first, run 1 iteration only */
+            ITERATIONS->n_sample=1;
+#ifdef MPI1
+            c_info->select_source = Bmark->select_source;
+#endif
+            Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
+
+            time[1] = time[0];
+
+#ifdef MPIIO
+            if( Bmark->access != no) {
+                ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
+                MPI_ERRHAND(ierr);
+
+                if( Bmark->fpointer == shared) {
+                    ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
+                    MPI_ERRHAND(ierr);
+                }
+            }
+#endif /*MPIIO*/
+
+            MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
+
+            { /* determine rough #repetitions for a run time of 1 sec */
+                int rep_test = 1;
+                if (time[0] < (1.0 / MSGSPERSAMPLE)) {
+                    rep_test = MSGSPERSAMPLE;
+                } else if ((time[0] < 1.0)) {
+                    rep_test = (int)(1.0 / time[0] + 0.5);
+                }
+
+                MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator);
+            }
+
+            ITERATIONS->n_sample = std::min(selected_n_sample, acc_rep_test);
+
+            if (ITERATIONS->n_sample > 1) {
+#ifdef MPI1
+                c_info->select_source = Bmark->select_source;
+#endif
+                Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
+                time[1] = time[0];
+#ifdef MPIIO
+                if( Bmark->access != no) {
+                    ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
+                    MPI_ERRHAND(ierr);
+
+                    if ( Bmark->fpointer == shared) {
+                        ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
+                        MPI_ERRHAND(ierr);
+                    }
+                }
+#endif /*MPIIO*/
+
+                MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
+            }
+
+            {
+                float val = (float) (1+ITERATIONS->secs/time[0]);
+                t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff))
+                            ? (int)val
+                            : selected_n_sample;
+            }
+
+            if (c_info->n_lens>0 && BMODE->type != Sync) {
+                // check monotonicity with msg sizes
+                int i;
+                for (i = 0; i < iter; i++) {
+                    t_sample = ( c_info->msglen[i] < size )
+                                ? std::min(t_sample,ITERATIONS->numiters[i])
+                                : std::max(t_sample,ITERATIONS->numiters[i]);
+                }
+                ITERATIONS->n_sample = ITERATIONS->numiters[iter] = std::min(selected_n_sample, t_sample);
+            } else {
+                ITERATIONS->n_sample = std::min(selected_n_sample,
+                                            std::min(ITERATIONS->n_sample_prev, t_sample));
+            }
+
+            MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);
+
+#ifdef DEBUG
+            {
+                int usec=time*1000000;
+
+                DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
+                DBGF_I1("=> # samples, aligned with previous ",t_sample);
+                DBGF_I1("final #samples ",ITERATIONS->n_sample);
+            }
+#endif
+
+// --- call Benchmark
+// -------------------------------------------------------------------------------------
+//
+        } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/
+            double time[MAX_TIME_ID];
+            Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
+        }
+
+// --- save n_sample_prev
+// -------------------------------------------------------------------------------------
+//
+         ITERATIONS->n_sample_prev=ITERATIONS->n_sample;
+
+    /* >> IMB 3.1  */
+
+    }
+
+    void helper_sync_globals_1(comm_info &c_info, GLOBALS &glob, struct Bench *Bmark) {
+        // NP_min is already initialized by IMB_basic_input
+        glob.ci_np = c_info.w_num_procs;
+        if (Bmark->RUN_MODES[0].type == ParallelTransferMsgRate) {
+            glob.ci_np -= glob.ci_np % 2;
+            glob.NP_min += glob.NP_min % 2;
+        }
+        glob.NP=std::max(1,std::min(glob.ci_np,glob.NP_min));
+        if (Bmark->RUN_MODES[0].type == SingleTransfer ||
+            Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+#ifdef MPIIO
+            glob.NP = 1;
+#else
+            glob.NP = (std::min(2,glob.ci_np));
+#endif /*MPIIO*/
+        }
+#ifdef RMA
+        if (Bmark->RUN_MODES[0].type == MultPassiveTransfer) {
+            /* Just sanity check */
+            if (c_info.num_procs > 1) {
+                Bmark->scale_bw = (double)c_info.num_procs - 1;
+            }
+        }
+#endif /*RMA*/
+#ifdef EXT
+        MPI_Type_size(c_info.red_data_type, &glob.unit_size);
+#else
+        if (Bmark->reduction ||
+            Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+            MPI_Type_size(c_info.red_data_type,&glob.unit_size);
+        } else {
+            MPI_Type_size(c_info.s_data_type,&glob.unit_size);
+        }
+#endif /*EXT*/
+    }
+
+    void helper_sync_globals_2(comm_info &c_info, GLOBALS &glob, struct Bench *Bmark) {
+        glob.MAXMSG=(1<<c_info.max_msg_log)/glob.unit_size * glob.unit_size;
+        glob.header=1;
+        Bmark->sample_failure = 0;
+        sample_time = MPI_Wtime();
+        time_limit[0] = time_limit[1] = 0;
+        Bmark->success = 1;
+#ifdef MPI1
+        c_info.select_source = Bmark->select_source;
+#endif
+        stop_iterations = false;
+        glob.iter = 0;
+        glob.size = 0;
+        if (Bmark->RUN_MODES[0].type == SingleElementTransfer) {
+            /* just one size needs to be tested (the size of one element) */
+            MPI_Type_size(c_info.red_data_type, &glob.size);
+        }
+//        if (Bmark->RUN_MODES[0].type == BTYPE_INVALID)
+//            stop_iterations = true;
+    }
+
+    void helper_time_check(comm_info &c_info, GLOBALS &,
+                           Bench *Bmark, iter_schedule &ITERATIONS) {
+        if (!Bmark->sample_failure) {
+            time_limit[1] = 0;
+            if (c_info.rank >= 0) {
+                time_limit[1] = (MPI_Wtime() - sample_time < std::max(std::max(c_info.n_lens, c_info.max_msg_log - c_info.min_msg_log) - 1, 1) * ITERATIONS.secs) ? 0 : 1;
+            }
+        }
+        MPI_Allreduce(&time_limit[1], &time_limit[0], 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+        if (time_limit[0]) {
+            Bmark->sample_failure = SAMPLE_FAILED_TIME_OUT;
+            stop_iterations = true;
+        }
+        return;
+    }
+};
diff --git a/src_cpp/helpers/original_benchmark.h b/src_cpp/helpers/original_benchmark.h
new file mode 100644
index 00000000..e760ee95
--- /dev/null
+++ b/src_cpp/helpers/original_benchmark.h
@@ -0,0 +1,213 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+#include <map>
+#include "IMB_benchmark.h"
+#include "IMB_comm_info.h"
+
+
+extern "C" {
+#include "IMB_prototypes.h"
+}
+
+#define GLUE_TYPENAME(A,B) A,B
+
+#include "helper_IMB_functions.h"
+
+#define GET_GLOBAL(TYPE, NAME) { TYPE *p = suite->get_parameter(#NAME).as<TYPE>(); \
+                                 assert(p != NULL); \
+                                 memcpy(&NAME, p, sizeof(TYPE)); }
+
+
+extern "C" { void IMB_Barrier(MPI_Comm comm); }
+
+template <class bs, original_benchmark_func_t fn_ptr>
+class OriginalBenchmark : public Benchmark {
+    protected:
+        static smart_ptr<Bmark_descr> descr;
+        comm_info c_info;
+        iter_schedule ITERATIONS;
+        MODES BMODE;
+        double time[MAX_TIME_ID];
+        Bench BMark[1];
+
+        int FULL_NP;
+        int RANK;
+
+        GLOBALS glob;
+    public:
+        using Benchmark::scope;
+        virtual void allocate_internals() {
+            BMark[0].name = NULL;
+            if (descr.get() == NULL)
+                descr = new Bmark_descr;
+        }
+        virtual bool init_description();
+        virtual void init() {
+            MPI_Comm_size(MPI_COMM_WORLD, &FULL_NP);
+            MPI_Comm_rank(MPI_COMM_WORLD, &RANK);
+
+            // Copy some global data from BenchmarkSuite
+            GET_GLOBAL(comm_info, c_info);
+            GET_GLOBAL(iter_schedule, ITERATIONS);
+            GET_GLOBAL(GLOBALS, glob);
+
+            assert(RANK == c_info.w_rank);
+            assert(FULL_NP == c_info.w_num_procs);
+
+            BMark->name = strdup(name);
+            descr->IMB_set_bmark(BMark, fn_ptr);
+            descr->helper_sync_globals_1(c_info, glob, BMark);
+            descr->helper_sync_globals_2(c_info, glob, BMark);
+
+            scope = descr->helper_init_scope(c_info, BMark, glob);
+
+            // FIXME glob.NP is used inside helper_init_scope, it's easy to mess it up
+            glob.NP = 0;
+            initialized = true;
+        }
+        virtual void run(const scope_item &item) {
+            int size = item.len;
+            int np = item.np;
+            int imod = *(item.extra_fields.as<int>());
+            MPI_Datatype base_s_dt, base_r_dt, base_red_dt;
+            if (!initialized)
+                return;
+            if (descr->stop_iterations)
+                return;
+            if ((c_info.contig_type == CT_BASE_VEC || c_info.contig_type == CT_RESIZE_VEC) &&
+                size != 0) {
+
+                int size_dt;
+
+                base_s_dt   = c_info.s_data_type;
+                MPI_Type_size(base_s_dt, &size_dt);
+                MPI_Type_vector(size / size_dt, 1, 1, base_s_dt, &(c_info.s_data_type));
+                MPI_Type_commit(&(c_info.s_data_type));
+
+                base_r_dt   = c_info.r_data_type;
+                MPI_Type_size(base_r_dt, &size_dt);
+                MPI_Type_vector(size / size_dt, 1, 1, base_r_dt, &(c_info.r_data_type));
+                MPI_Type_commit(&(c_info.r_data_type));
+
+                base_red_dt = c_info.red_data_type;
+                MPI_Type_size(base_red_dt, &size_dt);
+                MPI_Type_vector(size / size_dt, 1, 1, base_red_dt, &(c_info.red_data_type));
+                MPI_Type_commit(&(c_info.red_data_type));
+            }
+
+            if (np != glob.NP || imod != glob.imod) {
+                glob.NP = np;
+                glob.imod = imod;
+                if (!IMB_valid(&c_info, BMark, glob.NP)) {
+                    descr->stop_iterations = true;
+                    return;
+                }
+                IMB_init_communicator(&c_info, glob.NP);
+#ifdef MPIIO
+                if (IMB_init_file(&c_info, BMark, &ITERATIONS, glob.NP) != 0) IMB_err_hand(0, -1);
+#endif /*MPIIO*/
+                descr->helper_sync_globals_2(c_info, glob, BMark);
+            }
+            if( BMark->RUN_MODES[0].type == Sync ) {
+                glob.iter = c_info.n_lens - 1;
+            }
+#ifdef MPIIO
+            if(c_info.w_rank == 0 &&
+               do_nonblocking_) {
+                double MFlops = IMB_cpu_exploit_reworked(TARGET_CPU_SECS, 1);
+                printf("\n\n# For nonblocking benchmarks:\n\n");
+                printf("# Function CPU_Exploit obtains an undisturbed\n");
+                printf("# performance of %7.2f MFlops\n",MFlops);
+                do_nonblocking_ = 0;
+            }
+#endif
+            glob.size = size;
+            BMODE = &(BMark->RUN_MODES[imod]);
+            descr->IMB_init_buffers_iter(&c_info, &ITERATIONS, BMark, BMODE, glob.iter, size);
+            descr->helper_time_check(c_info, glob, BMark, ITERATIONS);
+            bool failed = (descr->stop_iterations || (BMark->sample_failure));
+            if (!failed) {
+                IMB_warm_up(BMark, &c_info, size, &ITERATIONS, glob.iter);
+                fn_ptr(&c_info, size, &ITERATIONS, BMODE, time);
+            }
+            MPI_Barrier(MPI_COMM_WORLD);
+            IMB_output(&c_info, BMark, BMODE, glob.header, size, &ITERATIONS, time);
+            IMB_close_transfer(&c_info, BMark, size);
+            if ((c_info.contig_type == CT_BASE_VEC || c_info.contig_type == CT_RESIZE_VEC) &&
+                size != 0) {
+
+                MPI_Type_free(&(c_info.s_data_type));
+                c_info.s_data_type = base_s_dt;
+
+                MPI_Type_free(&(c_info.r_data_type));
+                c_info.r_data_type = base_r_dt;
+
+                MPI_Type_free(&(c_info.red_data_type));
+                c_info.red_data_type = base_red_dt;
+            }
+            IMB_del_s_buf(&c_info);
+            IMB_del_r_buf(&c_info);
+            glob.header = 0;
+            glob.iter++;
+        }
+        virtual bool is_default() {
+            return descr->is_default();
+        }
+        virtual std::vector<std::string> get_comments() {
+            return descr->comments;
+        }
+        ~OriginalBenchmark() {
+            free(BMark[0].name);
+        }
+        DEFINE_INHERITED(GLUE_TYPENAME(OriginalBenchmark<bs, fn_ptr>), bs);
+};
+
diff --git a/src_cpp/imb.cpp b/src_cpp/imb.cpp
new file mode 100644
index 00000000..68534145
--- /dev/null
+++ b/src_cpp/imb.cpp
@@ -0,0 +1,382 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include <mpi.h>
+#include <stdexcept>
+#include <fstream>
+#include <algorithm>
+
+#include "smart_ptr.h"
+#include "args_parser.h"
+#include "benchmark.h"
+#include "benchmark_suites_collection.h"
+#include "utils.h"
+#include "scope.h"
+#include "benchmark_suite.h"
+
+using namespace std;
+
+extern void check_parser();
+
+int main(int argc, char * *argv)
+{
+    bool no_mpi_init_flag = true;
+    int return_value = 0;
+    int rank = 0, size = 0;
+    const char *program_name = "Intel(R) MPI Benchmarks 2019";
+    std::ostringstream output;
+
+    // Some unit tests for args parser
+#if 0
+    check_parser();
+    return 1;
+#endif    
+
+    
+    try {
+        // Allow very first init steps for each suite -- each benchmark
+        // is allowed to init flags and do other fundamental things before MPI_Init and
+        // even args parsing
+        BenchmarkSuitesCollection::init_registered_suites();
+
+        // Do basic initialisation of exapected args
+        //args_parser parser(argc, argv, "/", ':');
+        //args_parser parser(argc, argv, "--", '=');
+        args_parser parser(argc, argv, "-", ' ', output);
+
+        parser.set_program_name(program_name);
+        parser.set_flag(args_parser::ALLOW_UNEXPECTED_ARGS);
+
+        parser.add<string>("thread_level", "single").
+               set_caption("single|funneled|serialized|multiple|nompinit").
+               set_description(
+                   "Sets up the type of MPI_Init call to use:\n"
+                   "single: MPI_Init\n"
+                   "funneled: MPI_Init_thread with MPI_THREAD_FUNNELED\n"
+                   "serialized: MPI_Init_thread with MPI_THREAD_SERIALIZED\n"
+                   "multiple: MPI_Init_thread with MPI_THREAD_MULTIPLE\n"
+                   "nompiinit: don't call MPI_Init (the MPI_Init call may be made then in error case\n"
+                   "to prevent rubbish output\n");
+        parser.add<string>("input", "").set_caption("filename").
+               set_description(
+                   "The argument after -input is a filename is any text file containing, line by line,\n" 
+                   "benchmark names facilitates running particular benchmarks as compared to\n"
+                   "using the command line.\n"
+                   "\n"
+                   "default:\n"
+                   "no input file exists\n");
+
+        parser.add_vector<string>("include", "").set_caption("benchmark[,benchmark,[...]").
+               set_description("The argument after -include is one or more benchmark names separated by comma");
+        parser.add_vector<string>("exclude", "").set_caption("benchmark[,benchmark,[...]").
+               set_description("The argument after -exclude is one or more benchmark names separated by comma");
+
+        // Extra non-option arguments 
+        parser.set_current_group("EXTRA_ARGS");
+        parser.add_vector<string>("(benchmarks)", "").set_caption("benchmark[,benchmark,[...]]"); 
+        parser.set_default_current_group();
+
+
+        // Now fill in bechmark suite related args
+        if (!BenchmarkSuitesCollection::declare_args(parser, output)) {
+            throw runtime_error("one or more benchmark suites failed on options declaration stage");
+        }
+
+        // "system" option args to do special things, not dumped to files
+        parser.set_current_group("SYS");
+#ifdef WITH_YAML_CPP        
+        parser.add<string>("dump", "").set_caption("config.yaml").
+               set_description(
+                   "Dump the YAML config file with the set of actual options for\n"
+                   "the benchmark session. Parameter sets up the config file name\n");
+        parser.add<string>("load", "").set_caption("config.yaml").
+               set_description(
+                   "Load session options from YAML config file given as a parameter\n");
+#endif        
+        parser.add_flag("list").
+               set_description(
+                   "Prints out all the benchmark names available in this IMB build.\n"
+                   "The information about the benchmarks suite each benchmark belongs to\n"
+                   "and the benchmark description (if available) is printed out also\n");
+        parser.set_default_current_group();
+         
+        if (!parser.parse()) {
+            throw 1;
+        }
+        
+#ifdef WITH_YAML_CPP        
+        string infile;  
+        infile = parser.get<string>("load");
+        if (infile != "") {
+            ifstream in(infile.c_str(), ios_base::in);
+            parser.load(in);
+            if (!parser.parse()) {
+                throw runtime_error("input config file parse error");
+            }
+        }
+        string outfile;  
+        outfile = parser.get<string>("dump");
+        if (outfile != "") {
+            string out;
+            out = parser.dump();
+            ofstream of(outfile.c_str(), ios_base::out);
+            of << out;
+        }
+#endif
+        
+        vector<string> requested_benchmarks, to_include, to_exclude;
+        parser.get<string>("(benchmarks)", requested_benchmarks);
+        parser.get_unknown_args(requested_benchmarks);
+        parser.get<string>("include", to_include);
+        parser.get<string>("exclude", to_exclude);
+
+        string filename = parser.get<string>("input");
+        if (filename != "") {
+            FILE *t = fopen(filename.c_str(), "r");
+            if (t == NULL) {
+                throw runtime_error("can't open a file given in -input option");
+            }
+            char input_line[72+1], name[32+1];
+            while (fgets(input_line, 72, t)) {
+                if (input_line[0] != '#' && strlen(input_line) > 0) {
+                    sscanf(input_line, "%32s", name);
+                    requested_benchmarks.push_back(name);
+                }
+            }
+            fclose(t);
+        }
+
+
+        // Complete benchmark list filling in: combine -input, -include, -exclude options,
+        // make sure all requested benchmarks are found
+        vector<string> default_benchmarks, all_benchmarks;
+        vector<string> actual_benchmark_list;
+        vector<string> benchmarks_to_run;
+        vector<string> missing;
+        map<string, set<string> > by_suite;
+        BenchmarkSuitesCollection::get_full_list(all_benchmarks, by_suite);
+        BenchmarkSuitesCollection::get_default_list(default_benchmarks);
+        if (parser.get<bool>("list")) {
+            output << program_name << endl;
+            output << "List of benchmarks:" << endl;
+            for (map<string, set<string> >::iterator it_s = by_suite.begin(); 
+                 it_s != by_suite.end(); ++it_s) {
+                set<string> &benchmarks = it_s->second;
+                string sn = it_s->first;
+                if (sn == "__generic__")
+                    continue;
+                output << sn << ":" << endl;
+                for (set<string>::iterator it_b = benchmarks.begin(); 
+                     it_b != benchmarks.end(); ++it_b) {
+                    smart_ptr<Benchmark> b = BenchmarkSuitesCollection::create(*it_b);
+                    string bn = b->get_name();
+                    vector<string> comments = b->get_comments();
+                    output << "    " << bn;
+                    if (!b->is_default()) output << " (non-default)";
+                    output << endl;
+                    for (size_t i = 0; i < comments.size(); i++)
+                       output << "        " << comments[i] << endl;
+                }
+            }
+            throw 0;
+        }
+        {
+            using namespace set_operations;
+           
+            preprocess_list(requested_benchmarks);
+            preprocess_list(to_include);
+            preprocess_list(to_exclude);
+            preprocess_list(all_benchmarks);
+            preprocess_list(default_benchmarks);
+
+            if (requested_benchmarks.size() != 0) {
+                combine(to_include, requested_benchmarks);
+            } else {
+                combine(actual_benchmark_list, default_benchmarks);
+            }
+            exclude(to_include, to_exclude);
+            exclude(actual_benchmark_list, to_exclude);
+            combine(to_include, actual_benchmark_list);
+            actual_benchmark_list = to_include;
+            missing = actual_benchmark_list;
+            exclude(missing, all_benchmarks);
+            if (missing.size() != 0) {
+                exclude(actual_benchmark_list, missing);
+                if (actual_benchmark_list.size() == 0) {
+                    combine(actual_benchmark_list, default_benchmarks);
+                }
+            }
+
+            // Change benchmark names to their canonical form
+            all_benchmarks.resize(0);
+            by_suite.clear();
+            BenchmarkSuitesCollection::get_full_list(all_benchmarks, by_suite);
+            for (size_t i = 0; i < actual_benchmark_list.size(); i++) {
+                string b = to_lower(actual_benchmark_list[i]);
+                for (size_t i = 0; i < all_benchmarks.size(); i++) {
+                    if (to_lower(all_benchmarks[i]) == b) {
+                        benchmarks_to_run.push_back(all_benchmarks[i]);
+                    }
+                }
+            }
+        }
+
+        // Do aproppriate MPI_Init call
+        string mpi_init_mode = parser.get<string>("thread_level");
+        int required_mode, provided_mode;
+        if (mpi_init_mode == "single") {
+            no_mpi_init_flag = false;
+            required_mode = MPI_THREAD_SINGLE;
+        } else if (mpi_init_mode == "funneled") {
+            no_mpi_init_flag = false;
+            required_mode = MPI_THREAD_FUNNELED;
+        } else if (mpi_init_mode == "serialized") {
+            no_mpi_init_flag = false;
+            required_mode = MPI_THREAD_SERIALIZED;
+        } else if (mpi_init_mode == "multiple") {
+            no_mpi_init_flag = false;
+            required_mode = MPI_THREAD_MULTIPLE;
+        } else if (mpi_init_mode == "nompiinit") {
+            ;
+        } else {
+            throw logic_error("wrong value of `thread_level' option");
+        }
+        if (!no_mpi_init_flag) {
+            MPI_Init_thread(&argc, (char ***)&argv, required_mode, &provided_mode);
+            MPI_Comm_size(MPI_COMM_WORLD, &size);
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            if (required_mode != provided_mode) {
+                throw logic_error("can't setup a required MPI threading mode");
+            }
+        }
+ 
+        //---------------------------------------------------------------------
+        // ACTUAL BENCHMARKING
+        //
+        // 1, Preparation phase on suite level
+        if (!BenchmarkSuitesCollection::prepare(parser, benchmarks_to_run, missing, output)) {
+            throw logic_error("One or more benchmark suites failed at preparation stage");
+        }
+        {
+            using namespace set_operations;
+            vector<string> benchmarks_to_exclude = benchmarks_to_run;
+            all_benchmarks.clear();
+            BenchmarkSuitesCollection::get_full_list(all_benchmarks, by_suite);
+            exclude(benchmarks_to_exclude, all_benchmarks);
+            exclude(benchmarks_to_run, benchmarks_to_exclude);
+        }
+        if (rank == 0) {
+           cout << output.str();
+           output.str("");
+           output.clear();
+        }
+
+        // 2. All benchmarks wrappers constructors, initializers and scope definition
+        typedef pair<smart_ptr<Benchmark>, smart_ptr<Scope> > item;
+        typedef vector<item> running_sequence;
+        running_sequence sequence;
+        for (vector<string>::iterator it = benchmarks_to_run.begin(); 
+             it != benchmarks_to_run.end(); ++it) {
+            smart_ptr<Benchmark> b = BenchmarkSuitesCollection::create(*it);
+            if (b.get() == NULL) {
+                throw logic_error("benchmark creator failed!");
+            }
+            b->init();
+            smart_ptr<Scope> scope = b->get_scope();
+            sequence.push_back(item(b, scope));
+        }
+
+        // 3. Actual running cycle
+        for (running_sequence::iterator it = sequence.begin(); it != sequence.end(); ++it) {
+            smart_ptr<Benchmark> &b = it->first;
+            smart_ptr<Scope> &scope = it->second;
+            for (Scope::iterator s = scope->begin(); s != scope->end(); ++s)
+                b->run(*s);
+        }
+
+        // 4. Finalize cycle
+        for (running_sequence::iterator it = sequence.begin(); it != sequence.end(); ++it) {
+            smart_ptr<Benchmark> &b = it->first;
+            b->finalize();
+        }
+
+        // 5. Final steps on suite-level
+        BenchmarkSuitesCollection::finalize(benchmarks_to_run, output);
+        if (rank == 0) {
+           cout << output.str();
+           output.str("");
+           output.clear();
+        }
+    }
+    catch(exception &ex) {
+        if (no_mpi_init_flag) {
+            MPI_Init(NULL, NULL);
+            no_mpi_init_flag = false;
+        }
+        if (!no_mpi_init_flag && rank == 0) {
+            cout << "EXCEPTION: " << ex.what() << endl;
+            cout << output.str();
+        }
+        return_value = 1;
+    }
+    catch(int ret) {
+        if (no_mpi_init_flag) {
+            MPI_Init(NULL, NULL);
+            no_mpi_init_flag = false;
+        }
+        if (!no_mpi_init_flag && rank == 0) {
+            cout << output.str();
+        }
+        return_value = ret;
+    }
+    if (!no_mpi_init_flag)
+        MPI_Finalize();
+    return return_value;
+}
diff --git a/src_cpp/scope.cpp b/src_cpp/scope.cpp
new file mode 100644
index 00000000..1cacf98e
--- /dev/null
+++ b/src_cpp/scope.cpp
@@ -0,0 +1,88 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#include "scope.h"
+
+using namespace std;
+
+bool ScopeIterator::operator==(const ScopeIterator &other) { 
+    return other.scope.sequence.size() == scope.sequence.size() && other.n == n; 
+}
+
+bool ScopeIterator::operator!=(const ScopeIterator &other) { 
+    return !operator==(other); 
+}
+
+ScopeIterator &ScopeIterator::operator++() { 
+    n++; 
+    return *this; 
+}
+
+ScopeIterator ScopeIterator::operator++(int) { 
+    ScopeIterator tmp(*this); 
+    operator++(); 
+    return tmp; 
+}
+
+scope_item ScopeIterator::operator*() { 
+    return scope.sequence[n]; 
+}
+
+ScopeIterator Scope::begin() { 
+    assert(formed); 
+    return ScopeIterator(*this, 0); 
+}
+
+ScopeIterator Scope::end() { 
+    assert(formed); 
+    return ScopeIterator(*this, sequence.size()); 
+}
+
+
diff --git a/src_cpp/scope.h b/src_cpp/scope.h
new file mode 100644
index 00000000..5d1f13a3
--- /dev/null
+++ b/src_cpp/scope.h
@@ -0,0 +1,158 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <set>
+#include <assert.h>
+#include <iostream>
+#include <algorithm>
+
+#include "any.h"
+
+class Scope;
+
+struct scope_item { 
+    int np; 
+    size_t len; 
+    any extra_fields;
+    scope_item(int _len) : np(0), len(_len) { }
+    scope_item(int _np, int _len) : np(_np), len(_len) { }
+    scope_item(int _np, int _len, any _extra) : np(_np), len(_len), extra_fields(_extra) { }
+};
+
+struct ScopeIterator {
+    ScopeIterator(Scope &_scope, int _n) : scope(_scope), n(_n) { }
+    Scope &scope;
+    int n;
+    bool operator==(const ScopeIterator &other);
+    bool operator!=(const ScopeIterator &other);
+    ScopeIterator &operator++();
+    ScopeIterator operator++(int);
+    scope_item operator*();
+};
+
+struct Scope {
+    friend class ScopeIterator;
+    typedef ScopeIterator iterator;
+    Scope() : formed(false) {}
+    bool formed;
+    std::vector<scope_item> sequence;
+    ScopeIterator begin();
+    ScopeIterator end();
+    virtual void commit() { formed = true; }
+    int get_max_len() { 
+        assert(formed);  
+        std::vector<int> lens;
+        for (std::vector<scope_item>::iterator it = sequence.begin();
+                it != sequence.end(); it++) {
+            lens.push_back(it->len);
+        }
+        if (lens.size() == 0)
+            return 0;    
+        return *(std::max_element(lens.begin(), lens.end()));
+    }
+    virtual ~Scope() {};
+};
+
+struct VarLenScope : public Scope {
+    int first_log, last_log;
+    std::vector<int> lens;
+    VarLenScope(int _first, int _last) : first_log(_first), last_log(_last) {
+        for (int i = first_log; i <= last_log; i++) {
+            lens.push_back(1 << i);
+        }
+        commit();
+    }
+    VarLenScope(const std::vector<int> &alens) : lens(alens) {
+        commit();
+    }
+    VarLenScope(int *alens, size_t n) {
+        for (size_t i = 0; i < n; i++) {
+            lens.push_back(alens[i]);
+        }
+        commit();
+    }
+
+    virtual void commit() {
+        for (size_t i = 0; i < lens.size(); i++) {
+            sequence.push_back(scope_item(lens[i]));
+        }
+        formed = true;
+    }
+    virtual ~VarLenScope() {};
+};
+
+struct NPLenCombinedScope : public Scope {
+    std::vector<int> lens;
+    std::vector<int> nps;
+    size_t nmodes;
+    NPLenCombinedScope() : nmodes(1) {}
+    void fill_lens(std::vector<int> _lens) { lens = _lens; }
+    void add_len(int len) { lens.push_back(len); }
+    void add_np(int np) { nps.push_back(np); }
+    void add_nmodes(size_t _nmodes) { nmodes = _nmodes; }
+    virtual void commit() {
+        formed = true;
+        assert(sequence.size() == 0);
+        for (size_t i = 0; i < nps.size(); i++) {
+            for (size_t k = 0; k < nmodes; k++) {
+                for (size_t j = 0; j < lens.size(); j++) {
+                    any mode = smart_ptr<int>(new int(k));
+                    sequence.push_back(scope_item(nps[i], lens[j], mode));
+                }
+            }
+        }
+    }
+    virtual ~NPLenCombinedScope() {};
+};
+
diff --git a/src_cpp/smart_ptr.h b/src_cpp/smart_ptr.h
new file mode 100644
index 00000000..34ef09e6
--- /dev/null
+++ b/src_cpp/smart_ptr.h
@@ -0,0 +1,97 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once
+
+#include <stdexcept>
+
+template <typename T>
+class smart_ptr {
+private:
+    typedef int counter_t;
+public:
+    smart_ptr() : pointer(NULL), rc(NULL), detached(false) { }
+    smart_ptr(T * p) : pointer(p), rc(p ? new counter_t(0) : NULL), detached(false) { increment(); } 
+    smart_ptr(const smart_ptr& rhs) : pointer(rhs.pointer), rc(rhs.rc), detached(rhs.detached) { increment(); }
+    ~smart_ptr() {
+        if(rc && decrement() == 0) { if (!detached) delete pointer; delete rc; }
+    }
+    void assign(T *p) {
+        if(rc == NULL && pointer == NULL) { 
+            pointer = p;
+            rc = (p ? new counter_t(0) : NULL);
+            increment();
+        } else {
+            throw std::logic_error("smart_ptr: assign: bad usage");
+        }
+    }
+    void swap(smart_ptr& rhs) {
+        std::swap(pointer, rhs.pointer);
+        std::swap(rc, rhs.rc);
+    }
+    void detach() { detached = true; }
+    T *get() const { return pointer; }
+    smart_ptr& operator=(const smart_ptr& rhs) {
+        smart_ptr tmp(rhs);
+        this->swap(tmp);
+        return *this;
+    }
+    T& operator*() { return *pointer; }
+    const T& operator*() const { return *pointer; }
+    T* operator->() { return pointer; }
+    const T* operator->() const { return pointer; }
+private:
+    void increment() { if(rc) ++(*rc); }
+    counter_t decrement() { return --(*rc); }
+
+    T *pointer;
+    counter_t *rc;
+    bool detached;
+};
diff --git a/src_cpp/utils.h b/src_cpp/utils.h
new file mode 100644
index 00000000..94def3b0
--- /dev/null
+++ b/src_cpp/utils.h
@@ -0,0 +1,103 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright 2016-2018 Intel Corporation.                                    *
+ *                                                                           *
+ *****************************************************************************
+
+This code is covered by the Community Source License (CPL), version
+1.0 as published by IBM and reproduced in the file "license.txt" in the
+"license" subdirectory. Redistribution in source and binary form, with
+or without modification, is permitted ONLY within the regulations
+contained in above mentioned license.
+
+Use of the name and trademark "Intel(R) MPI Benchmarks" is allowed ONLY
+within the regulations of the "License for Use of "Intel(R) MPI
+Benchmarks" Name and Trademark" as reproduced in the file
+"use-of-trademark-license.txt" in the "license" subdirectory.
+
+THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+solely responsible for determining the appropriateness of using and
+distributing the Program and assumes all risks associated with its
+exercise of rights under this Agreement, including but not limited to
+the risks and costs of program errors, compliance with applicable
+laws, damage to or loss of data, programs or equipment, and
+unavailability or interruption of operations.
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
+ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
+WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
+DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+EXPORT LAWS: THIS LICENSE ADDS NO RESTRICTIONS TO THE EXPORT LAWS OF
+YOUR JURISDICTION. It is licensee's responsibility to comply with any
+export regulations applicable in licensee's jurisdiction. Under
+CURRENT U.S. export regulations this software is eligible for export
+from the U.S. and can be downloaded by or otherwise exported or
+reexported worldwide EXCEPT to U.S. embargoed destinations which
+include Cuba, Iraq, Libya, North Korea, Iran, Syria, Sudan,
+Afghanistan and any other country to which the U.S. has embargoed
+goods and services.
+
+ ***************************************************************************
+*/
+
+#pragma once 
+
+#include <iterator>
+#include <algorithm>
+#include <string.h>
+
+namespace set_operations {
+    // to = to \/ from      (in place set union)
+    template <typename T1, typename T2>
+    void combine(T1 &to, T2 &from) {
+        copy(from.begin(), from.end(), inserter(to, to.end()));
+    }
+    // from = from \ what   (in place set difference)
+    template <typename T1, typename T2>
+    void exclude(T1 &from, T2 &what) {
+        for (typename T2::iterator what_it = what.begin();
+             what_it != what.end(); ++what_it) {
+            do {
+                typename T1::iterator it = find(from.begin(), from.end(), *what_it);
+                if (it != from.end())
+                    from.erase(it);
+                else 
+                    break;
+            } while (true);
+        }
+    }
+    // result = one \ two    (set difference)
+    template <typename T1, typename T2, typename T3>
+    void diff(T1 &one, T2 &two, T3 &result) {
+        set_difference(one.begin(), one.end(), two.begin(), two.end(), inserter(result, result.end()));
+    }
+    static const std::string to_lower(const std::string &in) {
+        std::string out = in;
+        transform(in.begin(), in.end(), out.begin(), ::tolower);
+        return out;
+    }
+    template <typename T>
+    void preprocess_list(T &list) {
+        T tmp;
+        transform(list.begin(), list.end(), inserter(tmp, tmp.end()), to_lower);
+        list = tmp;
+    }
+    struct case_insens_cmp : public std::binary_function<std::string, std::string, bool> {
+        bool operator()(const std::string &lhs, const std::string &rhs) const {
+#ifdef WIN_IMB
+            return stricmp(lhs.c_str(), rhs.c_str()) < 0;
+#else
+            return ::strcasecmp(lhs.c_str(), rhs.c_str()) < 0 ;
+#endif
+        }
+    };
+}
+