From 2e2825eb54359a49c36e206690e5be0fb0e0c8ee Mon Sep 17 00:00:00 2001 From: Andreas Wahlen Date: Tue, 15 Oct 2024 19:58:22 +0200 Subject: [PATCH 1/6] Add MsgPack serializer support --- README.md | 5 ++- composer.json | 3 +- src/LargeArrayBuffer.php | 14 ++++-- test/LargeArrayBufferTest.php | 81 +++++++++++++++-------------------- 4 files changed, 50 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 149985e..cd82cf8 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ There are pretty much no dependencies with some exceptions: - If you want to use the `toJSONFile()` method, you need to install `ext-json` (PHP's PECL JSON extension) as well. - If you want to use the igbinary serializer, `ext-igbinary` is required. See [php-ext-igbinary](https://github.com/igbinary/igbinary). +- If you want to use the msgpack serializer, `ext-msgpack` is required. See [php-ext-msgpack](https://github.com/msgpack/msgpack-php). - If you want to use LZ4 compression, `ext-lz4` is required. See [php-ext-lz4](https://github.com/kjdev/php-ext-lz4). ## Usage @@ -55,9 +56,9 @@ The constructor of `LargeArrayBuffer` provides some options: 1. You can set the threshold when to move the data to disk. When pushing data to the buffer, it is stored in memory until it gets too large. E.g.: `new LargeArrayBuffer(512);` to set a 512 MiB threshold. -1. You can choose either the PHP serializer or the [igbinary](https://github.com/igbinary/igbinary) serializer (PHP serializer is default). +1. You can choose either the PHP serializer, the [igbinary](https://github.com/igbinary/igbinary) serializer or the [msgpack](https://github.com/msgpack/msgpack-php) serializer (PHP serializer is default). E.g.: `new LargeArrayBuffer(serializer: LargeArrayBuffer::COMPRESSION_IGBINARY);` -1. You can enable GZIP or LZ4 compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. E.g.: `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);`. Note, that LZ4 compression requires [ext-lz4](https://github.com/kjdev/php-ext-lz4) to be installed. +1. You can enable GZIP or LZ4 compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. E.g.: `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);`. Note, that LZ4 compression requires [ext-lz4](https://github.com/kjdev/php-ext-lz4) to be loaded. ### Read from the buffer diff --git a/composer.json b/composer.json index 82784c3..604adc1 100644 --- a/composer.json +++ b/composer.json @@ -14,7 +14,8 @@ "suggest": { "ext-json": "Requirement of toJSONFile() method", "ext-lz4": "To enable support of LZ4 compression", - "ext-igbinary": "To enable support for igbinary serializer" + "ext-igbinary": "To enable support for igbinary serializer", + "ext-msgpack": "To enable support for msgpack serializer" }, "require": { "php": ">=8.0" diff --git a/src/LargeArrayBuffer.php b/src/LargeArrayBuffer.php index 0f66b24..2b96761 100644 --- a/src/LargeArrayBuffer.php +++ b/src/LargeArrayBuffer.php @@ -12,6 +12,7 @@ class LargeArrayBuffer implements ArrayBufferInterface { public const SERIALIZER_PHP = 1; public const SERIALIZER_IGBINARY = 2; + public const SERIALIZER_MSGPACK = 3; public const COMPRESSION_NONE = 0; public const COMPRESSION_GZIP = 1; @@ -57,13 +58,16 @@ class LargeArrayBuffer implements ArrayBufferInterface { */ public function __construct(int $maxMemoryMiB = 1024, int $serializer = self::SERIALIZER_PHP, int $compression = self::COMPRESSION_NONE) { $this->serializer = $serializer; - if($this->serializer === self::SERIALIZER_IGBINARY && !function_exists('igbinary_serialize')){ - throw new \InvalidArgumentException('igbinary serializer was requested, but ext-igbinary is not installed'); + if($this->serializer === self::SERIALIZER_IGBINARY && !extension_loaded('igbinary')){ + throw new \InvalidArgumentException('igbinary serializer was requested, but ext-igbinary is not loaded'); + } + if($this->serializer === self::SERIALIZER_MSGPACK && !extension_loaded('msgpack')){ + throw new \InvalidArgumentException('msgpack serializer was requested, but ext-msgpack is not loaded'); } $this->compression = $compression; - if($this->compression === self::COMPRESSION_LZ4 && !function_exists('lz4_compress')){ - throw new \InvalidArgumentException('LZ4 compression was requested, but ext-lz4 is not installed'); + if($this->compression === self::COMPRESSION_LZ4 && !extension_loaded('lz4')){ + throw new \InvalidArgumentException('LZ4 compression was requested, but ext-lz4 is not loaded'); } $stream = fopen('php://temp/maxmemory:'.($maxMemoryMiB * 1024 * 1024), 'r+'); @@ -80,6 +84,7 @@ public function __construct(int $maxMemoryMiB = 1024, int $serializer = self::SE public function push(mixed $item): void { $serialized = match($this->serializer){ self::SERIALIZER_IGBINARY => igbinary_serialize($item), + self::SERIALIZER_MSGPACK => msgpack_serialize($item), default => serialize($item) }; /** @var string|false $compressed */ @@ -145,6 +150,7 @@ public function current(): mixed { /** @psalm-var E $res */ $res = match($this->serializer){ self::SERIALIZER_IGBINARY => igbinary_unserialize($this->current), + self::SERIALIZER_MSGPACK => msgpack_unserialize($this->current), default => unserialize($this->current) }; return $res; diff --git a/test/LargeArrayBufferTest.php b/test/LargeArrayBufferTest.php index f461641..5cddc73 100644 --- a/test/LargeArrayBufferTest.php +++ b/test/LargeArrayBufferTest.php @@ -5,6 +5,7 @@ use LargeArrayBuffer\LargeArrayBuffer; use PHPUnit\Framework\TestCase; +use PHPUnit\Framework\Attributes\DataProvider; /** * @author Andreas Wahlen @@ -31,52 +32,44 @@ public function testEmpty(): void { $this->assertEquals(0, $runs); } - public static function provideObject(): array { - $o = self::getObject(); - return [ - [$o, LargeArrayBuffer::SERIALIZER_PHP, LargeArrayBuffer::COMPRESSION_NONE], - [$o, LargeArrayBuffer::SERIALIZER_PHP, LargeArrayBuffer::COMPRESSION_GZIP], + public static function provideConfig(): \Generator { + $serializers = [ + 'PHP' => LargeArrayBuffer::SERIALIZER_PHP ]; + if(extension_loaded('igbinary')){ + $serializers['IGBinary'] = LargeArrayBuffer::SERIALIZER_IGBINARY; + } + if(extension_loaded('msgpack')){ + $serializers['MsgPack'] = LargeArrayBuffer::SERIALIZER_MSGPACK; + } + $compressors = [ + 'none' => LargeArrayBuffer::COMPRESSION_NONE, + 'GZIP' => LargeArrayBuffer::COMPRESSION_GZIP + ]; + if(extension_loaded('lz4')){ + $compressors['LZ4'] = LargeArrayBuffer::COMPRESSION_LZ4; + } + foreach($serializers as $s => $serializer){ + foreach($compressors as $c => $compressor){ + yield $s.'-'.$c => [$serializer, $compressor]; + } + } } - /** - * @dataProvider provideObject - */ - public function testReadWrite(object $o, int $serializer, int $compression): void { - $buf = new LargeArrayBuffer(serializer: $serializer, compression: $compression); - $buf->push($o); - $buf->rewind(); - $buf->next(); - $this->assertEquals($o, $buf->current()); - } - - /** - * @requires extension igbinary - */ - public function testReadWriteIgbinary(): void { - $o = self::getObject(); - $buf = new LargeArrayBuffer(serializer: LargeArrayBuffer::SERIALIZER_IGBINARY); - $buf->push($o); - $buf->rewind(); - $buf->next(); - $this->assertEquals($o, $buf->current()); - } - - /** - * @requires extension lz4 - */ - public function testReadWriteLZ4(): void { + #[DataProvider('provideConfig')] + public function testReadWrite(int $serializer, int $compression): void { $o = self::getObject(); - $buf = new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_LZ4); + $buf = new LargeArrayBuffer(serializer: $serializer, compression: $compression); $buf->push($o); $buf->rewind(); $buf->next(); $this->assertEquals($o, $buf->current()); } - public function testLoop(): void { - $count = 15; - $buf = new LargeArrayBuffer(); + #[DataProvider('provideConfig')] + public function testLoop(int $serializer, int $compression): void { + $count = 1500; + $buf = new LargeArrayBuffer(serializer: $serializer, compression: $compression); $objs = []; for($i=0;$i<$count;$i++){ $o = new \stdClass(); @@ -85,22 +78,18 @@ public function testLoop(): void { $buf->push($o); } $this->assertCount($count, $buf); - $runs = 0; + $expIdx = 0; foreach($buf as $idx => $item){ - $runs++; - $this->assertGreaterThanOrEqual(0, $idx); - $this->assertLessThan($count, $idx); + $this->assertEquals($expIdx, $idx); + $this->assertEquals($item->idx, $idx); $this->assertEquals($objs[$idx], $item); + $expIdx++; } - $this->assertEquals($count, $runs); + $this->assertEquals($count, $expIdx); } public function testToJSON(): void { - $o = new \stdClass(); - $o->foo = 'hello world!'.PHP_EOL; - $o->bar = new \DateTimeImmutable(); - $o->a = ['test', 123]; - $o->str = 'hello world!\\n'; + $o = self::getObject(); $buf = new LargeArrayBuffer(); $buf->push($o); From d2d14b0a741032b28ae209274dd91ab0621740a4 Mon Sep 17 00:00:00 2001 From: Andreas Wahlen Date: Tue, 15 Oct 2024 20:13:09 +0200 Subject: [PATCH 2/6] Fixes for psalm actions and benchmark --- bench/benchmark.php | 86 ++++++++++++++++++++++++++++++++--- psalm.baseline.xml | 2 + test/LargeArrayBufferTest.php | 10 +++- 3 files changed, 90 insertions(+), 8 deletions(-) diff --git a/bench/benchmark.php b/bench/benchmark.php index 0c10b08..2cca94e 100644 --- a/bench/benchmark.php +++ b/bench/benchmark.php @@ -97,7 +97,7 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1, unset($buf); // buffer with LZ4 - if(function_exists('lz4_compress')){ + if(extension_loaded('lz4')){ $start = microtime(true); $memBefore = memory_get_usage(true); $buf = new LargeArrayBuffer(128, compression: LargeArrayBuffer::COMPRESSION_LZ4); @@ -118,7 +118,7 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1, unset($buf); } - if(function_exists('igbinary_serialize')){ + if(extension_loaded('igbinary')){ // normal buffer with igbinary $start = microtime(true); $memBefore = memory_get_usage(true); @@ -160,7 +160,7 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1, unset($buf); // buffer with LZ4 and igbinary - if(function_exists('lz4_compress')){ + if(extension_loaded('lz4')){ $start = microtime(true); $memBefore = memory_get_usage(true); $buf = new LargeArrayBuffer(128, serializer: LargeArrayBuffer::SERIALIZER_IGBINARY, compression: LargeArrayBuffer::COMPRESSION_LZ4); @@ -182,6 +182,70 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1, } } + if(extension_loaded('msgpack')){ + // normal buffer with msgpack + $start = microtime(true); + $memBefore = memory_get_usage(true); + $buf = new LargeArrayBuffer(128, serializer: LargeArrayBuffer::SERIALIZER_MSGPACK); + $bench->bufferMeasurementsFill($buf); + $metrics['fill_buffer_mp'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + + $start = microtime(true); + $bench->bufferMeasurementsIterate($buf); + $metrics['iterate_buffer_mp'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + unset($buf); + + // buffer with GZIP and msgpack + $start = microtime(true); + $memBefore = memory_get_usage(true); + $buf = new LargeArrayBuffer(128, serializer: LargeArrayBuffer::SERIALIZER_MSGPACK, compression: LargeArrayBuffer::COMPRESSION_GZIP); + $bench->bufferMeasurementsFill($buf); + $metrics['fill_buffer_gz_mp'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + + $start = microtime(true); + $bench->bufferMeasurementsIterate($buf); + $metrics['iterate_buffer_gz_mp'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + unset($buf); + + // buffer with LZ4 and msgpack + if(extension_loaded('lz4')){ + $start = microtime(true); + $memBefore = memory_get_usage(true); + $buf = new LargeArrayBuffer(128, serializer: LargeArrayBuffer::SERIALIZER_MSGPACK, compression: LargeArrayBuffer::COMPRESSION_LZ4); + $bench->bufferMeasurementsFill($buf); + $metrics['fill_buffer_lz4_mp'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + + $start = microtime(true); + $bench->bufferMeasurementsIterate($buf); + $metrics['iterate_buffer_lz4_mp'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + unset($buf); + } + } + unset($bench); } @@ -191,17 +255,27 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1, printResult('Iterate over buffer', $metrics, 'iterate_buffer', 3, true); printResult('Fill buffer (GZIP)', $metrics, 'fill_buffer_gz', 3, true); printResult('Iterate over buffer (GZIP)', $metrics, 'iterate_buffer_gz', 2, true); -if(function_exists('lz4_compress')){ +if(extension_loaded('lz4')){ printResult('Fill buffer (LZ4)', $metrics, 'fill_buffer_lz4', 3, true); printResult('Iterate over buffer (LZ4)', $metrics, 'iterate_buffer_lz4', 2, true); } -if(function_exists('igbinary_serialize')){ +if(extension_loaded('igbinary')){ printResult('Fill buffer (igbinary)', $metrics, 'fill_buffer_ig', 2, true); printResult('Iterate over buffer (igbinary)', $metrics, 'iterate_buffer_ig', 1, true); printResult('Fill buffer (GZIP, igbinary)', $metrics, 'fill_buffer_gz_ig', 2, true); printResult('Iterate over buffer (GZIP, igbinary)', $metrics, 'iterate_buffer_gz_ig', 1, true); - if(function_exists('lz4_compress')){ + if(extension_loaded('lz4')){ printResult('Fill buffer (LZ4, igbinary)', $metrics, 'fill_buffer_lz4_ig', 2, true); printResult('Iterate over buffer (LZ4, igbinary)', $metrics, 'iterate_buffer_lz4_ig', 1, true); } } +if(extension_loaded('msgpack')){ + printResult('Fill buffer (msgpack)', $metrics, 'fill_buffer_mp', 2, true); + printResult('Iterate over buffer (msgpack)', $metrics, 'iterate_buffer_mp', 1, true); + printResult('Fill buffer (GZIP, msgpack)', $metrics, 'fill_buffer_gz_mp', 2, true); + printResult('Iterate over buffer (GZIP, msgpack)', $metrics, 'iterate_buffer_gz_mp', 1, true); + if(extension_loaded('lz4')){ + printResult('Fill buffer (LZ4, msgpack)', $metrics, 'fill_buffer_lz4_mp', 2, true); + printResult('Iterate over buffer (LZ4, msgpack)', $metrics, 'iterate_buffer_lz4_mp', 1, true); + } +} diff --git a/psalm.baseline.xml b/psalm.baseline.xml index 0c1b90f..4ee51ad 100644 --- a/psalm.baseline.xml +++ b/psalm.baseline.xml @@ -4,6 +4,8 @@ lz4_compress($serialized) lz4_uncompress($compressed) + msgpack_serialize($item) + msgpack_unserialize($this->current) diff --git a/test/LargeArrayBufferTest.php b/test/LargeArrayBufferTest.php index 5cddc73..7e492f6 100644 --- a/test/LargeArrayBufferTest.php +++ b/test/LargeArrayBufferTest.php @@ -56,7 +56,10 @@ public static function provideConfig(): \Generator { } } - #[DataProvider('provideConfig')] + /** + * @dataProvider provideConfig + */ + //#[DataProvider('provideConfig')] public function testReadWrite(int $serializer, int $compression): void { $o = self::getObject(); $buf = new LargeArrayBuffer(serializer: $serializer, compression: $compression); @@ -66,7 +69,10 @@ public function testReadWrite(int $serializer, int $compression): void { $this->assertEquals($o, $buf->current()); } - #[DataProvider('provideConfig')] + /** + * @dataProvider provideConfig + */ + //#[DataProvider('provideConfig')] public function testLoop(int $serializer, int $compression): void { $count = 1500; $buf = new LargeArrayBuffer(serializer: $serializer, compression: $compression); From 240094f201e5b7c3197fa86fbc073faa400efdd4 Mon Sep 17 00:00:00 2001 From: Andreas Wahlen Date: Tue, 15 Oct 2024 20:15:09 +0200 Subject: [PATCH 3/6] Bump actions/cache to v4 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cda48d3..612f1d0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Cache Composer dependencies - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /tmp/composer-cache key: ${{ runner.os }}-${{ hashFiles('**/composer.lock') }} @@ -57,7 +57,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Cache Composer dependencies - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /tmp/composer-cache key: ${{ runner.os }}-${{ hashFiles('**/composer.lock') }} From 5125022176378b244ebb1da4607395ad0b45ee02 Mon Sep 17 00:00:00 2001 From: Andreas Wahlen Date: Tue, 15 Oct 2024 20:15:55 +0200 Subject: [PATCH 4/6] Update version number --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 604adc1..f758869 100644 --- a/composer.json +++ b/composer.json @@ -1,6 +1,6 @@ { "name": "nerou/large-array-buffer", - "version": "1.0.0", + "version": "1.1.0", "type": "library", "license": "MIT", "authors": [ From 73fb98af8fc0f2151d1e440a631a7e48b9c7b849 Mon Sep 17 00:00:00 2001 From: Andreas Wahlen Date: Mon, 10 Mar 2025 16:43:03 +0100 Subject: [PATCH 5/6] Attempt to fix Psalm action --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 612f1d0..f295a10 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,7 @@ jobs: uses: docker://ghcr.io/psalm/psalm-github-actions with: args: --shepherd + php_version: "8.0" test: runs-on: ubuntu-latest From d7533ab2ba4d4866a856037c289a9242b0c8dc98 Mon Sep 17 00:00:00 2001 From: Andreas Wahlen Date: Mon, 10 Mar 2025 17:17:17 +0100 Subject: [PATCH 6/6] Fix missing error handling --- .github/workflows/ci.yml | 3 +-- .phive/phars.xml | 4 ++-- psalm.xml | 3 ++- src/LargeArrayBuffer.php | 14 ++++++++++++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f295a10..15155aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,10 +34,9 @@ jobs: uses: actions/checkout@v4 - name: Psalm - uses: docker://ghcr.io/psalm/psalm-github-actions + uses: docker://ghcr.io/psalm/psalm-github-actions:6.4.1 with: args: --shepherd - php_version: "8.0" test: runs-on: ubuntu-latest diff --git a/.phive/phars.xml b/.phive/phars.xml index 8e42d60..79d9787 100644 --- a/.phive/phars.xml +++ b/.phive/phars.xml @@ -1,6 +1,6 @@ - - + + diff --git a/psalm.xml b/psalm.xml index 10aac6a..d3b2f47 100644 --- a/psalm.xml +++ b/psalm.xml @@ -7,7 +7,8 @@ errorBaseline="psalm.baseline.xml" errorLevel="2" findUnusedBaselineEntry="false" - findUnusedCode="false"> + findUnusedCode="false" + phpVersion="8.0"> diff --git a/src/LargeArrayBuffer.php b/src/LargeArrayBuffer.php index 1303145..5f4a7f5 100644 --- a/src/LargeArrayBuffer.php +++ b/src/LargeArrayBuffer.php @@ -79,7 +79,7 @@ public function __construct(int $maxMemoryMiB = 1024, int $serializer = self::SE /** * @psalm-param E $item - * @throws \RuntimeException if unable to write to php://temp + * @throws \RuntimeException if unable to write to php://temp, the serialization failed or the compression failed */ public function push(mixed $item): void { $serialized = match($this->serializer){ @@ -87,6 +87,9 @@ public function push(mixed $item): void { self::SERIALIZER_MSGPACK => msgpack_serialize($item), default => serialize($item) }; + if($serialized === false){ + throw new \RuntimeException('failed to serialize data'); + } /** @var string|false $compressed */ $compressed = match($this->compression){ self::COMPRESSION_GZIP => gzdeflate($serialized), @@ -219,7 +222,14 @@ public function toJSONFile($dest, int $flags = JSON_THROW_ON_ERROR, int $depth = if(($flags & JSON_PRETTY_PRINT) > 0){ fwrite($stream, PHP_EOL.' '); } - fwrite($stream, json_encode($item, $flags, $depth)); + $json = json_encode($item, $flags, $depth); + if($json === false){ + if(is_string($dest)){ + fclose($stream); + } + throw new \RuntimeException('failed to serialize data'); + } + fwrite($stream, $json); fflush($stream); $first = false; }