diff --git a/jmh_full_get_java21.json b/jmh_full_get_java21.json new file mode 100644 index 0000000..1445fff --- /dev/null +++ b/jmh_full_get_java21.json @@ -0,0 +1,19 @@ +{ + "benchmark": "GetJNIBenchmark", + "jvmargs": ["Xmx4G", "XX:ErrorFile=./results/hs_err_pid%p.log", "XX:+HeapDumpOnOutOfMemoryError", "-enable-preview"], + "params": { + "valueSize": [10, 50, 512, 1024, 4096, 8192, 16384, 32768, 65536], + "cacheMB": [1], + "checksum": ["none", "copyout"] + }, + "options": { + "batchsize": 1, + "warmupiterations": 20, + "warmuptime": "50ms", + "iterations": 50, + "time": "500ms" + }, + "result.path": "./results", + "java.library.path": "target/jni-benchmarks-1.0.1-SNAPSHOT-application/jni-benchmarks-1.0.1-SNAPSHOT/lib", + "jar": "target/jni-benchmarks-1.0.1-SNAPSHOT-benchmarks.nar" +} diff --git a/jmh_full_put_java21.json b/jmh_full_put_java21.json new file mode 100644 index 0000000..592c75d --- /dev/null +++ b/jmh_full_put_java21.json @@ -0,0 +1,19 @@ +{ + "benchmark": "PutJNIBenchmark", + "jvmargs": ["Xmx4G", "XX:ErrorFile=./results/hs_err_pid%p.log", "XX:+HeapDumpOnOutOfMemoryError", "-enable-preview"], + "params": { + "valueSize": [10, 50, 512, 1024, 4096, 8192, 16384, 32768, 65536], + "cacheMB": [1], + "checksum": ["none", "copyin"] + }, + "options": { + "batchsize": 1, + "warmupiterations": 20, + "warmuptime": "50ms", + "iterations": 50, + "time": "500ms" + }, + "result.path": "./results", + "java.library.path": "target/jni-benchmarks-1.0.1-SNAPSHOT-application/jni-benchmarks-1.0.1-SNAPSHOT/lib", + "jar": "target/jni-benchmarks-1.0.1-SNAPSHOT-benchmarks.nar" +} diff --git a/jmh_plot.json b/jmh_plot.json index da9d398..cb7aa90 100644 --- a/jmh_plot.json +++ b/jmh_plot.json @@ -5,7 +5,8 @@ "name": "valueSize", "min": 1024 }, - "label": "allbig" + "label": "allbig", + "valueSizeTitle": ">= 1024" }, { "xaxisparam": { @@ -13,7 +14,8 @@ "min": 1, "max": 4096 }, - "label": "allsmall" + "label": "allsmall", + "valueSizeTitle": "<= 4096" # defaults to include_patterns of all matching }, { @@ -23,7 +25,8 @@ "max": 4096 }, "exclude_patterns": ["Pooled"], - "label": "nopoolsmall" + "label": "nopoolsmall", + "valueSizeTitle": "<= 4096" }, { "xaxisparam": { @@ -31,7 +34,8 @@ "min": 1024 }, "exclude_patterns": ["Pooled"], - "label": "nopoolbig" + "label": "nopoolbig", + "valueSizeTitle": ">= 1024" }, { "xaxisparam": { @@ -84,4 +88,4 @@ } ], "result.path": "./analysis/testplots" -} \ No newline at end of file +} diff --git a/jmh_small_get_java21.json b/jmh_small_get_java21.json new file mode 100644 index 0000000..a13c410 --- /dev/null +++ b/jmh_small_get_java21.json @@ -0,0 +1,19 @@ +{ + "benchmark": "GetJNIBenchmark", + "jvmargs": ["Xmx4G", "XX:ErrorFile=./results/hs_err_pid%p.log", "XX:+HeapDumpOnOutOfMemoryError", "-enable-preview"], + "params": { + "valueSize": [10, 50, 512, 1024, 4096, 8192, 16384, 32768, 65536], + "cacheMB": [1], + "checksum": ["none", "copyout"] + }, + "options": { + "batchsize": 1, + "warmupiterations": 10, + "warmuptime": "20ms", + "iterations": 20, + "time": "200ms" + }, + "result.path": "./results", + "java.library.path": "target/jni-benchmarks-1.0.1-SNAPSHOT-application/jni-benchmarks-1.0.1-SNAPSHOT/lib", + "jar": "target/jni-benchmarks-1.0.1-SNAPSHOT-benchmarks.nar" +} diff --git a/jmh_small_put_java21.json b/jmh_small_put_java21.json new file mode 100644 index 0000000..e92e8a0 --- /dev/null +++ b/jmh_small_put_java21.json @@ -0,0 +1,19 @@ +{ + "benchmark": "PutJNIBenchmark", + "jvmargs": ["Xmx4G", "XX:ErrorFile=./results/hs_err_pid%p.log", "XX:+HeapDumpOnOutOfMemoryError", "-enable-preview"], + "params": { + "valueSize": [10, 50, 512, 1024, 4096, 8192, 16384, 32768, 65536], + "cacheMB": [1], + "checksum": ["none", "copyin"] + }, + "options": { + "batchsize": 1, + "warmupiterations": 5, + "warmuptime": "20ms", + "iterations": 10, + "time": "100ms" + }, + "result.path": "./results", + "java.library.path": "target/jni-benchmarks-1.0.1-SNAPSHOT-application/jni-benchmarks-1.0.1-SNAPSHOT/lib", + "jar": "target/jni-benchmarks-1.0.1-SNAPSHOT-benchmarks.nar" +} diff --git a/jmh_tiny_get_java21.json b/jmh_tiny_get_java21.json new file mode 100644 index 0000000..9d3b78e --- /dev/null +++ b/jmh_tiny_get_java21.json @@ -0,0 +1,19 @@ +{ + "benchmark": "GetJNIBenchmark", + "jvmargs": ["Xmx4G", "XX:ErrorFile=./results/hs_err_pid%p.log", "XX:+HeapDumpOnOutOfMemoryError", "-enable-preview"], + "params": { + "valueSize": [50, 1024, 4096, 16384], + "cacheMB": [1], + "checksum": ["none", "copyout"] + }, + "options": { + "batchsize": 1, + "warmupiterations": 5, + "warmuptime": "10ms", + "iterations": 5, + "time": "50ms" + }, + "result.path": "./results", + "java.library.path": "target/jni-benchmarks-1.0.1-SNAPSHOT-application/jni-benchmarks-1.0.1-SNAPSHOT/lib", + "jar": "target/jni-benchmarks-1.0.1-SNAPSHOT-benchmarks.nar" +} diff --git a/jmh_tiny_put_java21.json b/jmh_tiny_put_java21.json new file mode 100644 index 0000000..e390dd5 --- /dev/null +++ b/jmh_tiny_put_java21.json @@ -0,0 +1,19 @@ +{ + "benchmark": "PutJNIBenchmark", + "jvmargs": ["Xmx4G", "XX:ErrorFile=./results/hs_err_pid%p.log", "XX:+HeapDumpOnOutOfMemoryError", "-enable-preview"], + "params": { + "valueSize": [50, 1024, 4096, 16384], + "cacheMB": [1], + "checksum": ["none", "copyin"] + }, + "options": { + "batchsize": 1, + "warmupiterations": 5, + "warmuptime": "10ms", + "iterations": 5, + "time": "50ms" + }, + "result.path": "./results", + "java.library.path": "target/jni-benchmarks-1.0.1-SNAPSHOT-application/jni-benchmarks-1.0.1-SNAPSHOT/lib", + "jar": "target/jni-benchmarks-1.0.1-SNAPSHOT-benchmarks.nar" +} diff --git a/jmhplot.py b/jmhplot.py index bf4089a..c626c88 100755 --- a/jmhplot.py +++ b/jmhplot.py @@ -35,6 +35,7 @@ import numpy as np import matplotlib.pyplot as plt +import matplotlib.ticker as ticker import pandas as pd from pandas.core.frame import DataFrame import re @@ -116,8 +117,8 @@ def normalize_data_frame_from_path(path: pathlib.Path): except pd.errors.EmptyDataError: break - # every 9th line is the interesting one, discard the rest - df = df.iloc[::9, :] + # df = df.iloc[::9, :] + df = df[~df['Benchmark'].str.contains(':')] df["Benchmark"] = df["Benchmark"].apply(lambda x: x.split('.')[-1]) if normalized is None: normalized = df @@ -193,11 +194,11 @@ def tuple_of_secondary_keys(params: BMParams) -> Tuple: return tuple(secondaryKeys) -def plot_all_results(params: BMParams, resultSets: ResultSets, path, include_benchmarks: str, exclude_benchmarks: str, label: str) -> None: +def plot_all_results(params: BMParams, xaxisparam:Dict, result_sets: ResultSets, path, include_benchmarks: str, exclude_benchmarks: str, label: str, value_size_title: str, system_info: str) -> None: indexKeys = tuple_of_secondary_keys(params) - for indexTuple, resultSet in resultSets.items(): - plot_result_set(indexKeys, indexTuple, resultSet, - path, include_benchmarks, exclude_benchmarks, label) + for indexTuple, resultSet in result_sets.items(): + plot_result_set(xaxisparam, indexKeys, indexTuple, resultSet, + path, include_benchmarks, exclude_benchmarks, label, value_size_title, system_info) def plot_result_axis_errorbars(ax, resultSet: ResultSet) -> None: @@ -256,25 +257,42 @@ def plot_result_axis_bars(ax, resultSet: ResultSet) -> None: bmIndex = bmIndex + 1 -def plot_result_set(indexKeys: Tuple, indexTuple: Tuple, resultSet: ResultSet, path: pathlib.Path, include_benchmarks: str, exclude_benchmarks: str, label: str): +def plot_result_set(xaxisparam:Dict, indexKeys: Tuple, indexTuple: Tuple, resultSet: ResultSet, path: pathlib.Path, include_benchmarks: str, exclude_benchmarks: str, label: str, value_size_title: str, system_info: str): + # Determine how many colors we need + num_benchmarks = len(resultSet) + + # Sample gist_ncar (or nipy_spectral) at discrete intervals + cmap = plt.get_cmap('gist_ncar') + colors = [cmap(i / num_benchmarks) for i in range(num_benchmarks)] + + # Set the property cycle with these colors + plt.rc('axes', prop_cycle=plt.cycler('color', colors)) + fig = plt.figure(num=None, figsize=(18, 12), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot() plot_result_axis_bars(ax, resultSet) - plt.title( - f'{str(indexKeys)}={str(indexTuple)} include={include_benchmarks} exclude={exclude_benchmarks}') - plt.xlabel("X") + # Ensure more marks on the x-axis for log scale + ax.xaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=15)) + ax.xaxis.set_minor_locator(ticker.LogLocator(base=10.0, subs='auto', numticks=15)) + ax.xaxis.set_major_formatter(ticker.ScalarFormatter()) + ax.xaxis.set_minor_formatter(ticker.NullFormatter()) + + plt.suptitle(system_info) + title = f'{str(indexKeys)}={str(indexTuple)} include={include_benchmarks} exclude={exclude_benchmarks} Value Size="{value_size_title}"' + plt.title(title) + plt.xlabel(extract_parameter_name(xaxisparam)) plt.ylabel("t (ns)") - plt.legend(loc='lower right') - plt.grid(b='True', which='both') + plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) + plt.grid(visible='True', which='both') name = f'fig_{"_".join([str(t) for t in indexTuple])}_{label}.png' if path.is_file(): - path = path.parent() - fig.savefig(path.joinpath(name)) + path = path.parent + fig.savefig(path.joinpath(name), bbox_inches='tight') alpha_pattern = re.compile(f'[A-Za-z0-9_\-+]') @@ -311,7 +329,7 @@ def filter_for_benchmarks(dataframe: DataFrame, include_benchmarks, exclude_benc def filter_for_range(dataframe: DataFrame, xaxisparam: Dict) -> DataFrame: - param_name = required('name', xaxisparam) + param_name = extract_parameter_name(xaxisparam) xmin = optional('min', xaxisparam, lambda x: int(x)) xmax = optional('max', xaxisparam, lambda x: int(x)) if xmax is None and xmin is None: @@ -329,6 +347,14 @@ def filter_for_range(dataframe: DataFrame, xaxisparam: Dict) -> DataFrame: lambda x: int(x) >= xmin and int(x) <= xmax)] +def extract_parameter_name(xaxisparam): + return required('name', xaxisparam) + + +def default_if_none(optional_string, default_value: str) -> str: + return default_value if optional_string is None else optional_string + + def process_some_plots(path: pathlib.Path, plot: Dict) -> None: xaxisparam = required('xaxisparam', plot) @@ -337,6 +363,26 @@ def process_some_plots(path: pathlib.Path, plot: Dict) -> None: include_benchmarks = optional('include_patterns', plot) exclude_benchmarks = optional('exclude_patterns', plot) label = required('label', plot) + value_size_title = default_if_none(optional('valueSizeTitle', plot), "All") + + # Check for system_info.json in the path + system_info = None + system_info_file = None + if path.is_dir(): + system_info_file = path.joinpath('system_info.json') + if path.is_file(): + system_info_file = path.parent.joinpath('system_info.json') + + if system_info_file and system_info_file.exists(): + try: + with system_info_file.open(mode='r', encoding='UTF-8') as f: + info_json = json.load(f) + system_info = info_json.get('system_info') + except Exception: + pass + + if system_info is None: + system_info = "System Info unavailable" dataframe = normalize_data_frame_from_path(path) if len(dataframe) == 0: @@ -357,8 +403,8 @@ def process_some_plots(path: pathlib.Path, plot: Dict) -> None: params: BMParams = split_params( extract_params(dataframe), primary_param_name) resultSets = extract_results_per_param(dataframe, params) - plot_all_results(params, resultSets, path, - include_benchmarks, exclude_benchmarks, label) + plot_all_results(params, xaxisparam, resultSets, path, + include_benchmarks, exclude_benchmarks, label, value_size_title, system_info) def process_benchmarks(config: Dict) -> None: diff --git a/jmhrun.py b/jmhrun.py index ebc9bd7..771c026 100755 --- a/jmhrun.py +++ b/jmhrun.py @@ -32,6 +32,7 @@ import pathlib import json import subprocess +import platform from typing import Dict @@ -112,6 +113,81 @@ def output_options(config: Dict) -> list: return ['-rff', str(path.joinpath(pathlib.Path(f'jmh_{const_datetime_str}.csv')))] +def get_system_info() -> str: + try: + arch = platform.machine() + system = platform.system() + kernel = platform.release() + + cpu_model = "" + ram_info = "" + os_info = "" + java_info = "" + + try: + java_version_out = subprocess.check_output(['java', '-version'], stderr=subprocess.STDOUT).decode().strip() + # The first line usually contains the version information + java_info = java_version_out.splitlines()[0] + except Exception: + java_info = "Unknown Java" + + if system == "Darwin": + try: + cpu_model = subprocess.check_output(['sysctl', '-n', 'machdep.cpu.brand_string']).decode().strip() + except Exception: + cpu_model = platform.processor() + + try: + mem_bytes = int(subprocess.check_output(['sysctl', '-n', 'hw.memsize']).decode().strip()) + ram_info = f"{mem_bytes // (1024**3)}GB RAM" + except Exception: + ram_info = "Unknown RAM" + + os_info = f"macOS {platform.mac_ver()[0]}" + + elif system == "Linux": + try: + with open("/proc/cpuinfo", "r") as f: + for line in f: + if "model name" in line: + cpu_model = line.split(":")[1].strip() + break + except Exception: + cpu_model = platform.processor() + + try: + with open("/proc/meminfo", "r") as f: + for line in f: + if "MemTotal" in line: + mem_kb = int(line.split(":")[1].strip().split()[0]) + ram_info = f"{mem_kb // (1024**2)}GB RAM" + break + except Exception: + ram_info = "Unknown RAM" + + try: + import lsb_release + os_info = lsb_release.get_distro_information()['DESCRIPTION'] + except Exception: + try: + with open("/etc/os-release", "r") as f: + for line in f: + if line.startswith("PRETTY_NAME="): + os_info = line.split("=")[1].strip().strip('"') + break + except Exception: + os_info = f"Linux {platform.release()}" + + else: + cpu_model = platform.processor() + os_info = f"{system} {platform.release()}" + + return f"{arch} - {cpu_model} - {ram_info} - {os_info} - Kernel: {kernel} - {java_info}" + + except Exception as e: + return f"Unknown System - {str(e)}" + + def build_jmh_command(config: Dict) -> list: cmd = ["java"] @@ -192,6 +268,11 @@ def log_jmh_session(cmd: list, config: Dict, config_file: str): log.writelines(line + '\n' for line in ['```', '#### Command', 'The java command executed to run the tests', '```', ' '.join(cmd), '```']) + # Save system info + system_info_file = output_dir_path(config).joinpath('system_info.json') + with system_info_file.open(mode='w', encoding='UTF-8') as f: + json.dump({"system_info": get_system_info()}, f, indent=4) + def exec_jmh_cmd(cmd: list, help_requested): cmd_str = ' '.join(cmd) diff --git a/pom.xml b/pom.xml index f67ad03..b07a44b 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 com.evolvedbinary.jni @@ -195,7 +196,8 @@ ${project.artifactId}-${project.version}-${uberjar.name} - + org.openjdk.jmh.Main @@ -277,6 +279,21 @@ 21 21 + + + + maven-compiler-plugin + + + -proc:full + -h + ${project.build.directory}/nar/javah-include + --enable-preview + + + + + java25 diff --git a/src/main/c++/getputjni/GetPutJNI.cpp b/src/main/c++/getputjni/GetPutJNI.cpp index 810e74e..e4b67c8 100644 --- a/src/main/c++/getputjni/GetPutJNI.cpp +++ b/src/main/c++/getputjni/GetPutJNI.cpp @@ -377,6 +377,19 @@ jint Java_com_evolvedbinary_jnibench_common_getputjni_GetPutJNI_getIntoIndirectB return get_size; } +extern "C" int getIntoMemorySegment(const char* key, char* dest, int dest_len) { + std::string value = GetByteArrayInternal(key); + int size = std::min((int)value.size(), dest_len); + memcpy(dest, value.c_str(), size); + return size; +} + +extern "C" int putFromMemorySegment(const char* key, const char* src, int src_len) { + char *db_buf = GetByteArrayInternalForWrite(key, src_len); + memcpy(db_buf, src, src_len); + return src_len; +} + /* * Class: com_evolvedbinary_jnibench_common_getputjni_GetPutJNI * Method: putFromIndirectByteBufferGetRegion diff --git a/src/main/java/com/evolvedbinary/jnibench/jmhbench/GetJNIBenchmark.java b/src/main/java/com/evolvedbinary/jnibench/jmhbench/GetJNIBenchmark.java index 7887ecb..e665cd0 100644 --- a/src/main/java/com/evolvedbinary/jnibench/jmhbench/GetJNIBenchmark.java +++ b/src/main/java/com/evolvedbinary/jnibench/jmhbench/GetJNIBenchmark.java @@ -1,18 +1,18 @@ /** * Copyright © 2021, Evolved Binary Ltd * All rights reserved. - * + *

* Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + *

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE @@ -28,22 +28,44 @@ import com.evolvedbinary.jnibench.common.getputjni.GetPutJNI; import com.evolvedbinary.jnibench.consbench.NarSystem; -import com.evolvedbinary.jnibench.jmhbench.cache.*; -import com.evolvedbinary.jnibench.jmhbench.common.*; -import io.netty.buffer.PooledByteBufAllocator; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; +import com.evolvedbinary.jnibench.jmhbench.cache.AllocationCache; +import com.evolvedbinary.jnibench.jmhbench.cache.ByteArrayCache; +import com.evolvedbinary.jnibench.jmhbench.cache.DirectByteBufferCache; +import com.evolvedbinary.jnibench.jmhbench.cache.IndirectByteBufferCache; +import com.evolvedbinary.jnibench.jmhbench.cache.MemorySegmentCache; +import com.evolvedbinary.jnibench.jmhbench.cache.NettyByteBufCache; +import com.evolvedbinary.jnibench.jmhbench.cache.UnsafeBufferCache; +import com.evolvedbinary.jnibench.jmhbench.common.JMHCaller; import io.netty.buffer.ByteBuf; - +import io.netty.buffer.PooledByteBufAllocator; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; import java.nio.ByteBuffer; import java.text.SimpleDateFormat; import java.util.Date; import java.util.concurrent.TimeUnit; import java.util.logging.Logger; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; /** * Benchmark getting byte arrays from native methods. @@ -56,288 +78,371 @@ //@Measurement(iterations = 500, time = 2000, timeUnit = TimeUnit.NANOSECONDS) public class GetJNIBenchmark { - private static final Logger LOG = Logger.getLogger(GetJNIBenchmark.class.getName()); - - static { - NarSystem.loadLibrary(); - } - - @State(Scope.Benchmark) - public static class GetJNIBenchmarkState { - - @Param({ - "10", - "50", - "128", - "512", - "1024", - "4096", - "8192", - "16384", - "32768", - "65536", - "131072"}) - int valueSize; - - @Param({"4", "16"}) int cacheMB; - final static int MB = 1024 * 1024; - @Param({"1024"}) int cacheEntryOverhead; - - @Param({"none", "copyout", "bytesum", "longsum"}) String checksum; - AllocationCache.Checksum readChecksum; - - String keyBase; - byte[] keyBytes; - - JMHCaller caller; - - @Setup - public void setup() { - this.caller = JMHCaller.fromStack(); - - keyBase = "testKeyWithReturnValueSize" + String.format("%07d", valueSize) + "Bytes"; - - keyBytes = keyBase.getBytes(); - - readChecksum = AllocationCache.Checksum.valueOf(checksum); - } - - @TearDown - public void tearDown() { - - } - } - - @State(Scope.Thread) - public static class GetJNIThreadState { - - private DirectByteBufferCache directByteBufferCache = new DirectByteBufferCache(); - private UnsafeBufferCache unsafeBufferCache = new UnsafeBufferCache(); - private ByteArrayCache byteArrayCache = new ByteArrayCache(); - private IndirectByteBufferCache indirectByteBufferCache = new IndirectByteBufferCache(); - private PooledByteBufAllocator pooledByteBufAllocator; - private NettyByteBufCache nettyByteBufCache = new NettyByteBufCache(); - - int valueSize; - int cacheSize; - - @Setup - public void setup(GetJNIBenchmarkState benchmarkState, Blackhole blackhole) { - valueSize = benchmarkState.valueSize; - cacheSize = benchmarkState.cacheMB * GetJNIBenchmarkState.MB; - - switch (benchmarkState.caller.benchmarkMethod) { - case "getIntoPooledNettyByteBuf": - pooledByteBufAllocator = PooledByteBufAllocator.DEFAULT; - //create a 0-sized cache so that we can use it to do checksum - nettyByteBufCache.setup(valueSize, 0/*cacheSize*/, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, blackhole); - break; - case "getIntoNettyByteBuf": - nettyByteBufCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, blackhole); - break; - case "getIntoDirectByteBuffer": - directByteBufferCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, blackhole); - break; - case "getIntoIndirectByteBufferSetRegion": - case "getIntoIndirectByteBufferGetElements": - case "getIntoIndirectByteBufferGetCritical": - indirectByteBufferCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, blackhole); - break; - case "getIntoDirectByteBufferFromUnsafe": - case "buffersOnlyDirectByteBufferFromUnsafe": - case "getIntoUnsafe": - unsafeBufferCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, blackhole); - break; - case "getIntoByteArraySetRegion": - case "getIntoByteArrayGetElements": - case "getIntoByteArrayCritical": - byteArrayCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, blackhole); - break; - default: - throw new RuntimeException("Don't know how to setup() for benchmark: " + benchmarkState.caller.benchmarkMethod); - } - } - - @TearDown - public void tearDown(GetJNIBenchmarkState benchmarkState) { - - switch (benchmarkState.caller.benchmarkMethod) { - case "getIntoPooledNettyByteBuf": - pooledByteBufAllocator = null; - break; - case "getIntoNettyByteBuf": - nettyByteBufCache.tearDown(); - break; - case "getIntoDirectByteBuffer": - directByteBufferCache.tearDown(); - break; - case "getIntoIndirectByteBufferSetRegion": - case "getIntoIndirectByteBufferGetElements": - case "getIntoIndirectByteBufferGetCritical": - indirectByteBufferCache.tearDown(); - break; - case "getIntoDirectByteBufferFromUnsafe": - case "buffersOnlyDirectByteBufferFromUnsafe": - case "getIntoUnsafe": - unsafeBufferCache.tearDown(); - break; - case "getIntoByteArraySetRegion": - case "getIntoByteArrayGetElements": - case "getIntoByteArrayCritical": - byteArrayCache.tearDown(); - break; - default: - throw new RuntimeException("Don't know how to tearDown() for benchmark: " + benchmarkState.caller.benchmarkMethod); - } - } + private static final Logger LOG = Logger.getLogger(GetJNIBenchmark.class.getName()); + + private static final MethodHandle GET_INTO_MEMORY_SEGMENT_HANDLE; + + static { + NarSystem.loadLibrary(); + +// 2. Initialize the Linker and Lookup + Linker linker = Linker.nativeLinker(); + SymbolLookup loaderLookup = SymbolLookup.loaderLookup(); + + // 3. Find the symbol and create the Downcall Handle once + GET_INTO_MEMORY_SEGMENT_HANDLE = loaderLookup.find("getIntoMemorySegment") + .map(symbol -> linker.downcallHandle(symbol, + FunctionDescriptor.of( + ValueLayout.JAVA_INT, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.JAVA_INT))) + .orElseThrow(); + } + + @State(Scope.Benchmark) + public static class GetJNIBenchmarkState { + + @Param({ + "10", + "50", + "128", + "512", + "1024", + "4096", + "8192", + "16384", + "32768", + "65536", + "131072"}) + int valueSize; + + @Param({"4", "16"}) + int cacheMB; + final static int MB = 1024 * 1024; + @Param({"1024"}) + int cacheEntryOverhead; + + @Param({"none", "copyout", "bytesum", "longsum"}) + String checksum; + AllocationCache.Checksum readChecksum; + + String keyBase; + byte[] keyBytes; + private Arena arena; + private MemorySegment keyMemorySegment; + + JMHCaller caller; + + @Setup + public void setup() { + this.caller = JMHCaller.fromStack(); + arena = Arena.ofShared(); + + keyBase = "testKeyWithReturnValueSize" + String.format("%07d", valueSize) + "Bytes"; + + keyBytes = keyBase.getBytes(); + keyMemorySegment = arena.allocateArray(ValueLayout.JAVA_BYTE, keyBytes); + + readChecksum = AllocationCache.Checksum.valueOf(checksum); } - //@Benchmark - public void buffersOnlyDirectByteBufferFromUnsafe(GetJNIThreadState threadState) { - UnsafeBufferCache.UnsafeBuffer unsafeBuffer = threadState.unsafeBufferCache.acquire(); - threadState.unsafeBufferCache.release(unsafeBuffer); + @TearDown + public void tearDown() { + if (arena != null) { + arena.close(); + } } - - @Benchmark - public void getIntoDirectByteBuffer(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - ByteBuffer byteBuffer = threadState.directByteBufferCache.acquire(); - byteBuffer.clear(); - GetPutJNI.getIntoDirectByteBuffer(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuffer, benchmarkState.valueSize); - threadState.directByteBufferCache.checksumBuffer(byteBuffer); - threadState.directByteBufferCache.release(byteBuffer); - } - - @Benchmark - public void getIntoUnsafe(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - UnsafeBufferCache.UnsafeBuffer unsafeBuffer = threadState.unsafeBufferCache.acquire(); - int size = GetPutJNI.getIntoUnsafe(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, unsafeBuffer.handle, benchmarkState.valueSize); - threadState.unsafeBufferCache.checksumBuffer(unsafeBuffer); - threadState.unsafeBufferCache.release(unsafeBuffer); + } + + @State(Scope.Thread) + public static class GetJNIThreadState { + + private DirectByteBufferCache directByteBufferCache = new DirectByteBufferCache(); + private UnsafeBufferCache unsafeBufferCache = new UnsafeBufferCache(); + private ByteArrayCache byteArrayCache = new ByteArrayCache(); + private IndirectByteBufferCache indirectByteBufferCache = new IndirectByteBufferCache(); + private PooledByteBufAllocator pooledByteBufAllocator; + private NettyByteBufCache nettyByteBufCache = new NettyByteBufCache(); + private MemorySegmentCache memorySegmentCache = new MemorySegmentCache(); + + int valueSize; + int cacheSize; + + @Setup + public void setup(GetJNIBenchmarkState benchmarkState, Blackhole blackhole) { + valueSize = benchmarkState.valueSize; + cacheSize = benchmarkState.cacheMB * GetJNIBenchmarkState.MB; + + switch (benchmarkState.caller.benchmarkMethod) { + case "getIntoPooledNettyByteBuf": + pooledByteBufAllocator = PooledByteBufAllocator.DEFAULT; + //create a 0-sized cache so that we can use it to do checksum + nettyByteBufCache.setup(valueSize, 0/*cacheSize*/, benchmarkState.cacheEntryOverhead, + benchmarkState.readChecksum, blackhole); + break; + case "getIntoNettyByteBuf": + nettyByteBufCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, + blackhole); + break; + case "getIntoDirectByteBuffer": + directByteBufferCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, + benchmarkState.readChecksum, blackhole); + break; + case "getIntoIndirectByteBufferSetRegion": + case "getIntoIndirectByteBufferGetElements": + case "getIntoIndirectByteBufferGetCritical": + indirectByteBufferCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, + benchmarkState.readChecksum, blackhole); + break; + case "getIntoDirectByteBufferFromUnsafe": + case "buffersOnlyDirectByteBufferFromUnsafe": + case "getIntoUnsafe": + unsafeBufferCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, + blackhole); + break; + case "getIntoByteArraySetRegion": + case "getIntoByteArrayGetElements": + case "getIntoByteArrayCritical": + byteArrayCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, + blackhole); + break; + case "getIntoMemorySegment": + memorySegmentCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.readChecksum, + blackhole); + break; + default: + throw new RuntimeException( + "Don't know how to setup() for benchmark: " + benchmarkState.caller.benchmarkMethod); + } } - @Benchmark - public void getIntoPooledNettyByteBuf(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - ByteBuf byteBuf = threadState.pooledByteBufAllocator.directBuffer(benchmarkState.valueSize); - byteBuf.readerIndex(0); - int size = GetPutJNI.getIntoUnsafe(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuf.memoryAddress(), benchmarkState.valueSize); - byteBuf.writerIndex(size); - //Use 0-sized cache which we created specially to do checksumBuffer operation - threadState.nettyByteBufCache.checksumBuffer(byteBuf); - // Allocated buffer already has retain count of 1 - byteBuf.release(); + @TearDown + public void tearDown(GetJNIBenchmarkState benchmarkState) { + + switch (benchmarkState.caller.benchmarkMethod) { + case "getIntoPooledNettyByteBuf": + pooledByteBufAllocator = null; + break; + case "getIntoNettyByteBuf": + nettyByteBufCache.tearDown(); + break; + case "getIntoDirectByteBuffer": + directByteBufferCache.tearDown(); + break; + case "getIntoIndirectByteBufferSetRegion": + case "getIntoIndirectByteBufferGetElements": + case "getIntoIndirectByteBufferGetCritical": + indirectByteBufferCache.tearDown(); + break; + case "getIntoDirectByteBufferFromUnsafe": + case "buffersOnlyDirectByteBufferFromUnsafe": + case "getIntoUnsafe": + unsafeBufferCache.tearDown(); + break; + case "getIntoByteArraySetRegion": + case "getIntoByteArrayGetElements": + case "getIntoByteArrayCritical": + byteArrayCache.tearDown(); + break; + case "getIntoMemorySegment": + memorySegmentCache.tearDown(); + break; + default: + throw new RuntimeException( + "Don't know how to tearDown() for benchmark: " + benchmarkState.caller.benchmarkMethod); + } } - - @Benchmark - public void getIntoNettyByteBuf(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - ByteBuf byteBuf = threadState.nettyByteBufCache.acquire(); - byteBuf.readerIndex(0); - int size = GetPutJNI.getIntoUnsafe(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuf.memoryAddress(), benchmarkState.valueSize); - byteBuf.writerIndex(size); - threadState.nettyByteBufCache.checksumBuffer(byteBuf); - threadState.nettyByteBufCache.release(byteBuf); - } - - @Benchmark - public void getIntoByteArraySetRegion(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - byte[] array = threadState.byteArrayCache.acquire(); - int size = GetPutJNI.getIntoByteArraySetRegion(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, array, benchmarkState.valueSize); - threadState.byteArrayCache.checksumBuffer(array); - threadState.byteArrayCache.release(array); + } + + //@Benchmark + public void buffersOnlyDirectByteBufferFromUnsafe(GetJNIThreadState threadState) { + UnsafeBufferCache.UnsafeBuffer unsafeBuffer = threadState.unsafeBufferCache.acquire(); + threadState.unsafeBufferCache.release(unsafeBuffer); + } + + @Benchmark + public void getIntoDirectByteBuffer(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + ByteBuffer byteBuffer = threadState.directByteBufferCache.acquire(); + byteBuffer.clear(); + GetPutJNI.getIntoDirectByteBuffer(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuffer, + benchmarkState.valueSize); + threadState.directByteBufferCache.checksumBuffer(byteBuffer); + threadState.directByteBufferCache.release(byteBuffer); + } + + @Benchmark + public void getIntoUnsafe(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { + UnsafeBufferCache.UnsafeBuffer unsafeBuffer = threadState.unsafeBufferCache.acquire(); + int size = GetPutJNI.getIntoUnsafe(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, unsafeBuffer.handle, + benchmarkState.valueSize); + threadState.unsafeBufferCache.checksumBuffer(unsafeBuffer); + threadState.unsafeBufferCache.release(unsafeBuffer); + } + + @Benchmark + public void getIntoMemorySegment(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + final var segment = threadState.memorySegmentCache.acquire(); + + try { + final var size = (int) GET_INTO_MEMORY_SEGMENT_HANDLE.invokeExact( + benchmarkState.keyMemorySegment, // Pre-allocated segment for key + segment, + benchmarkState.valueSize + ); + blackhole.consume(size); + } catch (Throwable e) { + throw new RuntimeException(e); } - @Benchmark - public void getIntoByteArrayGetElements(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - byte[] array = threadState.byteArrayCache.acquire(); - int size = GetPutJNI.getIntoByteArrayGetElements(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, array, benchmarkState.valueSize); - threadState.byteArrayCache.checksumBuffer(array); - threadState.byteArrayCache.release(array); - } - - @Benchmark - public void getIntoByteArrayCritical(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - byte[] array = threadState.byteArrayCache.acquire(); - int size = GetPutJNI.getIntoByteArrayCritical(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, array, benchmarkState.valueSize); - threadState.byteArrayCache.checksumBuffer(array); - threadState.byteArrayCache.release(array); - } - - //final supplied buffer(s) - //TODO this can be done in as many different ways as supplying a byte[] - //But why shouldn't we just expect the same performance as byte[] ? - //Start with one instance (one that seems good in the byte[] case), and check for surprises... - @Benchmark - public void getIntoIndirectByteBufferSetRegion(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - ByteBuffer byteBuffer = threadState.indirectByteBufferCache.acquire(); - byteBuffer.clear(); - GetPutJNI.getIntoIndirectByteBufferSetRegion(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuffer, benchmarkState.valueSize); - threadState.indirectByteBufferCache.checksumBuffer(byteBuffer); - threadState.indirectByteBufferCache.release(byteBuffer); - } - - @Benchmark - public void getIntoIndirectByteBufferGetElements(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - ByteBuffer byteBuffer = threadState.indirectByteBufferCache.acquire(); - byteBuffer.clear(); - int size = GetPutJNI.getIntoIndirectByteBufferGetElements(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuffer, benchmarkState.valueSize); - threadState.indirectByteBufferCache.checksumBuffer(byteBuffer); - threadState.indirectByteBufferCache.release(byteBuffer); - } - - @Benchmark - public void getIntoIndirectByteBufferGetCritical(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { - ByteBuffer byteBuffer = threadState.indirectByteBufferCache.acquire(); - byteBuffer.clear(); - int size = GetPutJNI.getIntoIndirectByteBufferGetCritical(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuffer, benchmarkState.valueSize); - threadState.indirectByteBufferCache.checksumBuffer(byteBuffer); - threadState.indirectByteBufferCache.release(byteBuffer); - } - - //create/allocate the result buffers, analogous to the "into" methods (but no unsafe ones here) - //TODO getReturnDirectByteBuffer - //TODO getReturnIndirectByteBuffer - //TODO getReturnByteArrayCritical - //TODO getReturnByteArrayGetElements - //TODO getReturnByteArraySetRegion - - //TODO env->NewDirectByteBuffer() - what aree the ownership rules ? - //TODO track whether the byte[] copying/sharing methods we are using are doing copies - //env->GetByteArrayElements(..., &is_copy) - - //TODO graphing - dig into the Python stuff a bit more - - /** - * Run from the IDE - * - * You will need this in the VM args of the run configuration, - * in order for NAR to find at runtime the native lib it has built: - * - * -Djava.library.path=PATH_TO_REPO/target/jni-benchmarks-1.0.0-SNAPSHOT-application/jni-benchmarks-1.0.0-SNAPSHOT/lib - * - * The parameters we set here configure for debugging, - * typically we want a much shorter runs than is needed for accurate benchmarking - * SO DON'T TRUST THE NUMBERS GENERATED BY THIS RUN - * fork(0) runs everything is in a single process so we don't need to configure JDWP - * Again this affects JMH - * {@link https://github.com/openjdk/jmh/blob/master/jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_12_Forking.java} - * It's a convenience for debugging the tests so that they actually run, that is all. - * - * @param args - * @throws RunnerException - */ - public static void main(String[] args) throws RunnerException { - SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss.SSS"); - Options opt = new OptionsBuilder() - .forks(0) - .param("checksum", "none", "copyout") - .param("valueSize", "50", "4096", "16384", "65536") - .param("cacheMB", "4") - .warmupIterations(10) - .measurementIterations(50) - .include(GetJNIBenchmark.class.getSimpleName()) - .result("analysis/testplots/" + simpleDateFormat.format(new Date()) + "_" + GetJNIBenchmark.class.getSimpleName() + ".csv") - .build(); - - new Runner(opt).run(); - } + threadState.memorySegmentCache.checksumBuffer(segment); + threadState.memorySegmentCache.release(segment); + } + + @Benchmark + public void getIntoPooledNettyByteBuf(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + ByteBuf byteBuf = threadState.pooledByteBufAllocator.directBuffer(benchmarkState.valueSize); + byteBuf.readerIndex(0); + int size = GetPutJNI.getIntoUnsafe(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, + byteBuf.memoryAddress(), benchmarkState.valueSize); + byteBuf.writerIndex(size); + //Use 0-sized cache which we created specially to do checksumBuffer operation + threadState.nettyByteBufCache.checksumBuffer(byteBuf); + // Allocated buffer already has retain count of 1 + byteBuf.release(); + } + + @Benchmark + public void getIntoNettyByteBuf(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + ByteBuf byteBuf = threadState.nettyByteBufCache.acquire(); + byteBuf.readerIndex(0); + int size = GetPutJNI.getIntoUnsafe(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, + byteBuf.memoryAddress(), benchmarkState.valueSize); + byteBuf.writerIndex(size); + threadState.nettyByteBufCache.checksumBuffer(byteBuf); + threadState.nettyByteBufCache.release(byteBuf); + } + + @Benchmark + public void getIntoByteArraySetRegion(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + byte[] array = threadState.byteArrayCache.acquire(); + int size = GetPutJNI.getIntoByteArraySetRegion(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, array, + benchmarkState.valueSize); + threadState.byteArrayCache.checksumBuffer(array); + threadState.byteArrayCache.release(array); + } + + @Benchmark + public void getIntoByteArrayGetElements(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + byte[] array = threadState.byteArrayCache.acquire(); + int size = GetPutJNI.getIntoByteArrayGetElements(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, array, + benchmarkState.valueSize); + threadState.byteArrayCache.checksumBuffer(array); + threadState.byteArrayCache.release(array); + } + + @Benchmark + public void getIntoByteArrayCritical(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + byte[] array = threadState.byteArrayCache.acquire(); + int size = GetPutJNI.getIntoByteArrayCritical(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, array, + benchmarkState.valueSize); + threadState.byteArrayCache.checksumBuffer(array); + threadState.byteArrayCache.release(array); + } + + //final supplied buffer(s) + //TODO this can be done in as many different ways as supplying a byte[] + //But why shouldn't we just expect the same performance as byte[] ? + //Start with one instance (one that seems good in the byte[] case), and check for surprises... + @Benchmark + public void getIntoIndirectByteBufferSetRegion(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + ByteBuffer byteBuffer = threadState.indirectByteBufferCache.acquire(); + byteBuffer.clear(); + GetPutJNI.getIntoIndirectByteBufferSetRegion(benchmarkState.keyBytes, 0, benchmarkState.keyBytes.length, byteBuffer, + benchmarkState.valueSize); + threadState.indirectByteBufferCache.checksumBuffer(byteBuffer); + threadState.indirectByteBufferCache.release(byteBuffer); + } + + @Benchmark + public void getIntoIndirectByteBufferGetElements(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + ByteBuffer byteBuffer = threadState.indirectByteBufferCache.acquire(); + byteBuffer.clear(); + int size = GetPutJNI.getIntoIndirectByteBufferGetElements(benchmarkState.keyBytes, 0, + benchmarkState.keyBytes.length, byteBuffer, + benchmarkState.valueSize); + threadState.indirectByteBufferCache.checksumBuffer(byteBuffer); + threadState.indirectByteBufferCache.release(byteBuffer); + } + + @Benchmark + public void getIntoIndirectByteBufferGetCritical(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + ByteBuffer byteBuffer = threadState.indirectByteBufferCache.acquire(); + byteBuffer.clear(); + int size = GetPutJNI.getIntoIndirectByteBufferGetCritical(benchmarkState.keyBytes, 0, + benchmarkState.keyBytes.length, byteBuffer, + benchmarkState.valueSize); + threadState.indirectByteBufferCache.checksumBuffer(byteBuffer); + threadState.indirectByteBufferCache.release(byteBuffer); + } + + //create/allocate the result buffers, analogous to the "into" methods (but no unsafe ones here) + //TODO getReturnDirectByteBuffer + //TODO getReturnIndirectByteBuffer + //TODO getReturnByteArrayCritical + //TODO getReturnByteArrayGetElements + //TODO getReturnByteArraySetRegion + + //TODO env->NewDirectByteBuffer() - what aree the ownership rules ? + //TODO track whether the byte[] copying/sharing methods we are using are doing copies + //env->GetByteArrayElements(..., &is_copy) + + //TODO graphing - dig into the Python stuff a bit more + + /** + * Run from the IDE + *

+ * You will need this in the VM args of the run configuration, + * in order for NAR to find at runtime the native lib it has built: + *

+ * -Djava.library.path=PATH_TO_REPO/target/jni-benchmarks-1.0.0-SNAPSHOT-application/jni-benchmarks-1.0.0-SNAPSHOT/lib + *

+ * The parameters we set here configure for debugging, + * typically we want a much shorter runs than is needed for accurate benchmarking + * SO DON'T TRUST THE NUMBERS GENERATED BY THIS RUN + * fork(0) runs everything is in a single process so we don't need to configure JDWP + * Again this affects JMH + * {@link https://github.com/openjdk/jmh/blob/master/jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_12_Forking.java} + * It's a convenience for debugging the tests so that they actually run, that is all. + * + * @param args + * @throws RunnerException + */ + public static void main(String[] args) throws RunnerException { + SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss.SSS"); + Options opt = new OptionsBuilder() + .forks(0) + .param("checksum", "none", "copyout") + .param("valueSize", "50", "4096", "16384", "65536") + .param("cacheMB", "4") + .warmupIterations(10) + .measurementIterations(50) + .include(GetJNIBenchmark.class.getSimpleName()) + .result("analysis/testplots/" + simpleDateFormat.format( + new Date()) + "_" + GetJNIBenchmark.class.getSimpleName() + ".csv") + .build(); + + new Runner(opt).run(); + } } diff --git a/src/main/java/com/evolvedbinary/jnibench/jmhbench/PutJNIBenchmark.java b/src/main/java/com/evolvedbinary/jnibench/jmhbench/PutJNIBenchmark.java index e3b3355..d2f242a 100644 --- a/src/main/java/com/evolvedbinary/jnibench/jmhbench/PutJNIBenchmark.java +++ b/src/main/java/com/evolvedbinary/jnibench/jmhbench/PutJNIBenchmark.java @@ -30,20 +30,26 @@ import com.evolvedbinary.jnibench.consbench.NarSystem; import com.evolvedbinary.jnibench.jmhbench.cache.*; import com.evolvedbinary.jnibench.jmhbench.common.*; +import io.netty.buffer.ByteBuf; import io.netty.buffer.PooledByteBufAllocator; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.nio.ByteBuffer; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import io.netty.buffer.ByteBuf; - -import java.nio.ByteBuffer; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.concurrent.TimeUnit; -import java.util.logging.Logger; /** * Benchmark getting byte arrays from native methods. @@ -58,8 +64,24 @@ public class PutJNIBenchmark { private static final Logger LOG = Logger.getLogger(GetJNIBenchmark.class.getName()); + private static final MethodHandle PUT_FROM_MEMORY_SEGMENT_HANDLE; + static { NarSystem.loadLibrary(); + + // 2. Initialize the Linker and Lookup + Linker linker = Linker.nativeLinker(); + SymbolLookup loaderLookup = SymbolLookup.loaderLookup(); + + // 3. Find the symbol and create the Downcall Handle once + PUT_FROM_MEMORY_SEGMENT_HANDLE = loaderLookup.find("putFromMemorySegment") + .map(symbol -> linker.downcallHandle(symbol, + FunctionDescriptor.of( + ValueLayout.JAVA_INT, + ValueLayout.ADDRESS, + ValueLayout.ADDRESS, + ValueLayout.JAVA_INT))) + .orElseThrow(); } @State(Scope.Benchmark) @@ -90,6 +112,8 @@ public static class GetJNIBenchmarkState { String keyBase; byte[] keyBytes; + MemorySegment keyMemorySegment; + private Arena benchmarkArena; JMHCaller caller; @@ -99,14 +123,19 @@ public void setup() { keyBase = "testKeyWithReturnValueSize" + String.format("%07d", valueSize) + "Bytes"; + benchmarkArena = Arena.ofShared(); + keyBytes = keyBase.getBytes(); + keyMemorySegment = benchmarkArena.allocateArray(ValueLayout.JAVA_BYTE, keyBytes); writePreparation = AllocationCache.Prepare.valueOf(preparation); } @TearDown public void tearDown() { - + if (benchmarkArena != null) { + benchmarkArena.close(); + } } } @@ -117,6 +146,7 @@ public static class GetJNIThreadState { private final UnsafeBufferCache unsafeBufferCache = new UnsafeBufferCache(); private final ByteArrayCache byteArrayCache = new ByteArrayCache(); private final IndirectByteBufferCache indirectByteBufferCache = new IndirectByteBufferCache(); + private final MemorySegmentCache memorySegmentCache = new MemorySegmentCache(); private final PooledByteBufAllocator pooledByteBufAllocator = PooledByteBufAllocator.DEFAULT; private final NettyByteBufCache nettyByteBufCache = new NettyByteBufCache(); @@ -152,6 +182,9 @@ public void setup(GetJNIBenchmarkState benchmarkState, Blackhole blackhole) { case "putFromByteArrayCritical": byteArrayCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.writePreparation, blackhole); break; + case "putFromMemorySegment": + memorySegmentCache.setup(valueSize, cacheSize, benchmarkState.cacheEntryOverhead, benchmarkState.writePreparation, blackhole); + break; default: throw new RuntimeException("Don't know how to setup() for benchmark: " + benchmarkState.caller.benchmarkMethod); } @@ -184,6 +217,9 @@ public void tearDown(GetJNIBenchmarkState benchmarkState) { case "putFromByteArrayCritical": byteArrayCache.tearDown(); break; + case "putFromMemorySegment": + memorySegmentCache.tearDown(); + break; default: throw new RuntimeException("Don't know how to tearDown() for benchmark: " + benchmarkState.caller.benchmarkMethod); } @@ -196,6 +232,26 @@ public void buffersOnlyDirectByteBufferFromUnsafe(GetJNIThreadState threadState) threadState.unsafeBufferCache.release(unsafeBuffer); } + @Benchmark + public void putFromMemorySegment(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, + Blackhole blackhole) { + final var segment = threadState.memorySegmentCache.acquire(); + threadState.memorySegmentCache.prepareBuffer(segment, benchmarkState.fillByte); + + try { + final var size = (int) PUT_FROM_MEMORY_SEGMENT_HANDLE.invokeExact( + benchmarkState.keyMemorySegment, // Pre-allocated segment for key + segment, + benchmarkState.valueSize + ); + blackhole.consume(size); + } catch (Throwable e) { + throw new RuntimeException(e); + } + + threadState.memorySegmentCache.release(segment); + } + @Benchmark public void putFromDirectByteBuffer(GetJNIBenchmarkState benchmarkState, GetJNIThreadState threadState, Blackhole blackhole) { ByteBuffer byteBuffer = threadState.directByteBufferCache.acquire(); diff --git a/src/main/java/com/evolvedbinary/jnibench/jmhbench/cache/MemorySegmentCache.java b/src/main/java/com/evolvedbinary/jnibench/jmhbench/cache/MemorySegmentCache.java new file mode 100644 index 0000000..9c6079f --- /dev/null +++ b/src/main/java/com/evolvedbinary/jnibench/jmhbench/cache/MemorySegmentCache.java @@ -0,0 +1,62 @@ +package com.evolvedbinary.jnibench.jmhbench.cache; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.util.stream.IntStream; + +public class MemorySegmentCache extends LinkedListAllocationCache { + private final Arena arena; + + public MemorySegmentCache() { + arena = Arena.ofShared(); + } + + + @Override + MemorySegment allocate(final int valueSize) { + return arena.allocate(valueSize); + } + + @Override + void free(final MemorySegment buffer) { + // Nothing to do here, as we override taerdown() directly. + } + + @Override + public void tearDown() { + super.tearDown(); + arena.close(); + } + + @Override + protected int byteChecksum(final MemorySegment item) { + return IntStream.range(0, (int) item.byteSize()).map(offset -> item.get(JAVA_BYTE, offset)).sum(); + } + + @Override + protected int longChecksum(final MemorySegment item) { + return byteChecksum(item); + } + + @Override + protected byte[] copyOut(final MemorySegment item) { + // Get a cached byte array of the correct size + byte[] array = byteArrayOfSize((int) item.byteSize()); + + // Perform bulk copy from native memory to Java heap array + MemorySegment.copy(item, ValueLayout.JAVA_BYTE, 0, array, 0, (int) item.byteSize()); + + return array; + } + + @Override + protected long copyIn(final MemorySegment item, final byte fillByte) { + // Highly optimized bulk fill (native memset equivalent) + item.fill(fillByte); + + return fillByte; + } +}