Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 69 additions & 18 deletions test/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import re
import shutil
import subprocess
import sys
import time
import unittest
Expand Down Expand Up @@ -141,31 +142,57 @@ def add_stat(name, size, gzip_size):
total_size = 0
total_gzip_size = 0

for file in self.get_output_files():
size = os.path.getsize(file)
gzip_size = len(zlib.compress(read_binary(file)))
files = self.get_output_files()
if files:
for file in files:
size = os.path.getsize(file)
gzip_size = len(zlib.compress(read_binary(file)))
if self.record_stats:
add_stat(os.path.basename(file).removeprefix('size_'), size, gzip_size)
total_size += size
total_gzip_size += gzip_size

if self.record_stats:
add_stat(os.path.basename(file).removeprefix('size_'), size, gzip_size)
total_size += size
total_gzip_size += gzip_size
add_stat('total', total_size, total_gzip_size)

if self.record_stats:
add_stat('total', total_size, total_gzip_size)
print(' size: %8s, compressed: %8s' % (total_size, total_gzip_size), end=' ')

print(' size: %8s, compressed: %8s' % (total_size, total_gzip_size), end=' ')
if self.get_size_text():
print(' (' + self.get_size_text() + ')', end=' ')
print()
if self.get_size_text():
print(' (' + self.get_size_text() + ')', end=' ')
print()

return recorded_stats

def get_size_text(self):
return ''


class ToolchainBenchmarker(Benchmarker):
"""ToolchainBenchmarker performs the compile step during run. i.e. it measures the perf of the
compiler rather than the generated code.

Some simple tests will just work with these benchmarkers but more complex ones will not because
the arguments to `build` are all ignored.
"""

def __init__(self, name, command):
super().__init__(name)
self.command = command

def run(self, args):
return run_process(self.command + args, stdout=PIPE, stderr=subprocess.STDOUT, check=False).stdout

def get_output_files(self):
return []

def build(self, parent, filename, shared_args, emcc_args, native_args, native_exec, lib_builder):
# no-op
pass


class NativeBenchmarker(Benchmarker):
def __init__(self, name, cc, cxx, cflags=None):
self.name = name
super().__init__(name)
self.cc = cc
self.cxx = cxx
self.cflags = cflags or [OPTIMIZATIONS]
Expand Down Expand Up @@ -196,7 +223,7 @@ def build(self, parent, filename, shared_args, emcc_args, native_args, native_ex
self.filename = final

def run(self, args):
return run_process([self.filename] + args, stdout=PIPE, stderr=PIPE, check=False).stdout
return run_process([self.filename] + args, stdout=PIPE, stderr=subprocess.STDOUT, check=False).stdout

def get_output_files(self):
return [self.filename]
Expand All @@ -207,7 +234,7 @@ def get_size_text(self):

class EmscriptenBenchmarker(Benchmarker):
def __init__(self, name, engine, cflags=None, env=None):
self.name = name
super().__init__(name)
self.engine = engine
self.cflags = cflags or []
self.env = os.environ.copy()
Expand Down Expand Up @@ -253,7 +280,7 @@ def build(self, parent, filename, shared_args, emcc_args, native_args, native_ex
self.filename = final

def run(self, args):
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=PIPE)
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=subprocess.STDOUT)

def get_output_files(self):
ret = [self.filename]
Expand Down Expand Up @@ -288,7 +315,7 @@ def __init__(self, name):

class CheerpBenchmarker(Benchmarker):
def __init__(self, name, engine, cflags=None):
self.name = name
super().__init__(name)
self.engine = engine
self.cflags = cflags or [OPTIMIZATIONS]

Expand Down Expand Up @@ -339,7 +366,7 @@ def build(self, parent, filename, shared_args, emcc_args, native_args, native_ex
utils.delete_dir(dir_)

def run(self, args):
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=PIPE)
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=subprocess.STDOUT)

def get_output_files(self):
return [self.filename, utils.replace_suffix(self.filename, '.wasm')]
Expand Down Expand Up @@ -368,6 +395,8 @@ def get_output_files(self):
# TODO: ensure no baseline compiler is used, see v8
'sm': EmscriptenBenchmarker('sm', config.SPIDERMONKEY_ENGINE),
'cherp-sm': CheerpBenchmarker('cheerp-sm-wasm', config.SPIDERMONKEY_ENGINE),
'clang-build': ToolchainBenchmarker('clang', [CLANG_CC]),
'emcc-build': ToolchainBenchmarker('emcc', [EMCC]),
}

for name in EMTEST_BENCHMARKERS.split(','):
Expand Down Expand Up @@ -526,6 +555,28 @@ def test_primes(self, check):
'''
self.do_benchmark('primes' if check else 'primes-nocheck', src, 'lastprime:' if check else '', shared_args=['-DCHECK'] if check else [])

def do_toolchain_benchmark(self, args):
# TODO: Perhaps this can be merged with the regular `do_benchmark` somehow.
benchmarkers = [
named_benchmarkers['clang-build'],
named_benchmarkers['emcc-build'],
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not add these to the normal toplevel list of benchmarkers? That is, we have a "primes" benchmark, and we could have a "hello world" benchmark. And there could be a Benchmarker that just compiles in emcc or clang and doesn't run. That could run on all the benchmarks in principle.

Then, picking the right Benchmarkers using the env var, and the right benchmarks using benchmark.test_*, you can get the results you want iiuc?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I lot of the existing benchmarks would not work with these benchmarkers I fear since they anything in the build phase and instead run the compiler in the run phase (which doesn't have any of the build arguments such as filename, shared_args, emcc_args, native_args, native_exec, lib_builder.

For very simple cases it does work to just do the compiling during run. So I added these new benchmarkers to the named list so folks can try them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, a lot of other benchmarks might not work - but that's fine I think as users can pick which they want?

Now that we have lines 398-399, do we need 559-562? That is, can't we run these benchmarks the normal way? I am just trying to avoid adding a second "harness" that iterates over benchmarks, that handles using the first benchmarker as the baseline (568-571), etc.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to re-use do_bennchmark for these, but its different enough I think it warrants keeping this simple/separate do_toolchain_benchmark.

For example, with the toolchain benchmark there is no "src" or "name" to pass.

Maybe we can find a way to merge them later, but this seems OK for now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'd hope we can make src, name optional somehow, but I guess this new function is not a huge amount of code, so I won't object strongly.

baseline = None
print()
for b in benchmarkers:
b.bench(args)
b.display(baseline)
if not baseline:
# Use the first benchmarker as the baseline. Other benchmarkers can then
# report relative performance compared to this.
baseline = b

def test_compile_noop(self):
self.do_toolchain_benchmark(['--version'])

def test_compile_hello(self):
self.do_toolchain_benchmark(['-c', test_file('hello_world.c')])

def test_memops(self):
src = '''
#include <stdio.h>
Expand Down