From 384b6a97613f15d66030d0040a466ec547d4d32b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muhammet=20=C5=9Eafak?= Date: Mon, 25 May 2026 08:43:50 +0300 Subject: [PATCH 1/4] Add support for Composer dist tarballs and implement CI workflow --- .gitattributes | 12 +- .github/workflows/ci.yml | 87 +++++ .gitignore | 5 +- README.md | 258 ++++++------- composer.json | 51 ++- docs/README.md | 33 ++ docs/context-css.md | 59 +++ docs/context-html-attribute.md | 65 ++++ docs/context-html.md | 48 +++ docs/context-javascript.md | 64 +++ docs/context-url.md | 74 ++++ docs/encodings.md | 70 ++++ docs/exceptions.md | 66 ++++ docs/getting-started.md | 77 ++++ docs/security-notes.md | 83 ++++ phpunit.xml.dist | 13 + src/Esc.php | 128 ++++-- src/Escaper.php | 364 +++++++++++++----- src/Exception/EncodingConversionException.php | 13 + .../EncodingNotSupportedException.php | 13 + src/Exception/EscaperException.php | 17 + src/Exception/InvalidContextException.php | 13 + src/Exception/InvalidUtf8Exception.php | 13 + tests/EscTest.php | 152 ++++++++ tests/EscaperCssTest.php | 66 ++++ tests/EscaperEncodingTest.php | 76 ++++ tests/EscaperHtmlAttrTest.php | 99 +++++ tests/EscaperHtmlTest.php | 65 ++++ tests/EscaperJsTest.php | 63 +++ tests/EscaperUrlTest.php | 45 +++ tests/ExceptionHierarchyTest.php | 45 +++ 31 files changed, 1945 insertions(+), 292 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 docs/README.md create mode 100644 docs/context-css.md create mode 100644 docs/context-html-attribute.md create mode 100644 docs/context-html.md create mode 100644 docs/context-javascript.md create mode 100644 docs/context-url.md create mode 100644 docs/encodings.md create mode 100644 docs/exceptions.md create mode 100644 docs/getting-started.md create mode 100644 docs/security-notes.md create mode 100644 phpunit.xml.dist create mode 100644 src/Exception/EncodingConversionException.php create mode 100644 src/Exception/EncodingNotSupportedException.php create mode 100644 src/Exception/EscaperException.php create mode 100644 src/Exception/InvalidContextException.php create mode 100644 src/Exception/InvalidUtf8Exception.php create mode 100644 tests/EscTest.php create mode 100644 tests/EscaperCssTest.php create mode 100644 tests/EscaperEncodingTest.php create mode 100644 tests/EscaperHtmlAttrTest.php create mode 100644 tests/EscaperHtmlTest.php create mode 100644 tests/EscaperJsTest.php create mode 100644 tests/EscaperUrlTest.php create mode 100644 tests/ExceptionHierarchyTest.php diff --git a/.gitattributes b/.gitattributes index 6e5411e..44fa4dc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,9 @@ -.gitignore export-ignore -.gitattributes export-ignore -/Examples export-ignore \ No newline at end of file +# Exclude from Composer dist tarballs. +/.github export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/.phpunit.cache export-ignore +/Examples export-ignore +/docs export-ignore +/tests export-ignore +/phpunit.xml.dist export-ignore diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..66354ae --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,87 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + validate: + name: composer validate + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + coverage: none + tools: composer:v2 + - run: composer validate --strict + + tests: + name: PHPUnit (PHP ${{ matrix.php }}) + runs-on: ubuntu-latest + needs: validate + strategy: + fail-fast: false + matrix: + php: ["7.4", "8.0", "8.1", "8.2", "8.3", "8.4"] + steps: + - uses: actions/checkout@v4 + + - name: Set up PHP ${{ matrix.php }} + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + extensions: ctype, mbstring, iconv + coverage: none + tools: composer:v2 + + - name: Validate composer.json + run: composer validate --no-check-publish + + - name: Get composer cache directory + id: composer-cache + run: echo "dir=$(composer config cache-files-dir)" >> "$GITHUB_OUTPUT" + + - name: Cache composer dependencies + uses: actions/cache@v4 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: composer-${{ matrix.php }}-${{ hashFiles('**/composer.json') }} + restore-keys: composer-${{ matrix.php }}- + + - name: Install dependencies + run: composer update --prefer-dist --no-progress --no-interaction + + - name: Run PHPUnit + run: vendor/bin/phpunit + + coverage: + name: Coverage + runs-on: ubuntu-latest + needs: tests + steps: + - uses: actions/checkout@v4 + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + extensions: ctype, mbstring, iconv + coverage: pcov + tools: composer:v2 + + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + + - name: Run PHPUnit with coverage + run: vendor/bin/phpunit --coverage-clover=coverage.xml + + - name: Upload coverage artifact + uses: actions/upload-artifact@v4 + with: + name: coverage-clover + path: coverage.xml + retention-days: 14 diff --git a/.gitignore b/.gitignore index 0abe7ad..359db29 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,7 @@ /.vscode/ /.vs/ /vendor/ -/composer.lock \ No newline at end of file +/composer.lock +/build/ +/.phpunit.cache/ +/.phpunit.result.cache diff --git a/README.md b/README.md index 5c8e332..062595a 100644 --- a/README.md +++ b/README.md @@ -1,180 +1,146 @@ -# InitPHP Escaper +# initphp/escaper -Securely and safely escape HTML, HTML attributes, JavaScript, CSS, and URLs. +Context-aware output escaper for PHP. Safely render untrusted user input inside +HTML, HTML attributes, JavaScript, CSS and URLs. -[![Latest Stable Version](http://poser.pugx.org/initphp/escaper/v)](https://packagist.org/packages/initphp/escaper) [![Total Downloads](http://poser.pugx.org/initphp/escaper/downloads)](https://packagist.org/packages/initphp/escaper) [![Latest Unstable Version](http://poser.pugx.org/initphp/escaper/v/unstable)](https://packagist.org/packages/initphp/escaper) [![License](http://poser.pugx.org/initphp/escaper/license)](https://packagist.org/packages/initphp/escaper) [![PHP Version Require](http://poser.pugx.org/initphp/escaper/require/php)](https://packagist.org/packages/initphp/escaper) +[![Latest Stable Version](https://poser.pugx.org/initphp/escaper/v)](https://packagist.org/packages/initphp/escaper) +[![PHP Version Require](https://poser.pugx.org/initphp/escaper/require/php)](https://packagist.org/packages/initphp/escaper) +[![CI](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml/badge.svg)](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml) +[![License](https://poser.pugx.org/initphp/escaper/license)](https://packagist.org/packages/initphp/escaper) +[![Total Downloads](https://poser.pugx.org/initphp/escaper/downloads)](https://packagist.org/packages/initphp/escaper) -## Requirements +`htmlspecialchars()` is not enough on its own. Each output context — an HTML +body, an attribute, a JavaScript string literal, a CSS value, a URL parameter +— needs its own escaping rules, and using the wrong one can leave you exposed +to XSS even when you *think* you have escaped your data. -- PHP 7.4 or higher -- PHP _CType_ Extension -- PHP _MB_String_ or _Iconv_ Extension +`initphp/escaper` implements the rules from the +[OWASP XSS Prevention Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html) +for the five most common contexts, behind a small, dependency-free API. ## Installation -```php +```bash composer require initphp/escaper ``` -## Usage +### Requirements -`\InitPHP\Escaper\Esc::esc()` : +- PHP 7.4 or newer +- `ext-ctype` +- `ext-mbstring` (required); `ext-iconv` is used when present and preferred + over mbstring -```php -public static function esc(string[]|string $data, string $context = 'html', ?string $encoding = null): array|string; -``` +## Quick start + +```php +use InitPHP\Escaper\Esc; + +echo Esc::esc(''); +// <script>alert(1)</script> + +echo Esc::esc('faketitle onmouseover=alert(1);', 'attr'); +// faketitle onmouseover=alert(1); -- `$data` : The content to be cleared. -- `$context` : The method to be used for cleaning. If the value is not one of the following; Throws `Exception`. - - `html` - - `js` - - `css` - - `url` - - `attr` -- `$encoding` : If the character set to be used is not specified or `NULL`; `UTF-8` is used by default. - -`html` Escaper Example : -```php -alert("initphp")'; -?> - - - - Encodings set correctly! - - - - - - +echo Esc::esc('"; alert(1); var x="', 'js'); +// \x22\x3B\x20alert\x281\x29\x3B\x20var\x20x\x3D\x22 + +echo Esc::esc('', 'css'); +// \3C \2F style\3E \3C script\3E alert\28 1\29 \3C \2F script\3E + +echo Esc::esc('" onmouseover="alert(1)', 'url'); +// %22%20onmouseover%3D%22alert%281%29 ``` -`attr` Escaper Example : +`Esc::esc()` also accepts arrays and recurses into them, so escaping a whole +request payload at the view boundary is a one-liner: ```php - - - - - Quoteless Attribute - - - -
- - ?> - > - Hello World - -
- - +$safe = Esc::esc($_GET, 'html'); ``` -`Js` Escaper Example : +## API -```php - - - - - Escaped Entities - - - - -

Hello World

- - +```php +public static function esc( + array|string $data, + string $context = 'html', + ?string $encoding = null +): array|string; ``` -`css` Escaper Example : +| Argument | Description | +| ----------- | --------------------------------------------------------------------------- | +| `$data` | A string, or an array (which is escaped recursively). | +| `$context` | `html`, `attr`, `js`, `css`, `url`, or `raw` (returns input unchanged). | +| `$encoding` | Output encoding. `null` resolves to UTF-8. See [Encodings](docs/encodings.md). | + +Throws `InitPHP\Escaper\Exception\InvalidContextException` for unknown contexts. + +### `Escaper` + +For lower-level use, instantiate `Escaper` directly. Each instance is bound to +one encoding and exposes one method per context: ```php -'); -} -INPUT; -?> - - - - Escaped CSS - - - - -

User controlled CSS needs to be properly escaped!

- - +use InitPHP\Escaper\Escaper; + +$escaper = new Escaper(); // utf-8 +$escaper = new Escaper('windows-1252'); + +$escaper->escHtml($string); +$escaper->escHtmlAttr($string); +$escaper->escJs($string); +$escaper->escCss($string); +$escaper->escUrl($string); ``` -`url` Escaper Example : +## Documentation -```php - - - - - Unescaped URL - - - - -Click - - +The [`docs/`](docs/) directory contains a per-context walkthrough with +examples, do-and-don't guidance and security notes: + +- [Getting started](docs/getting-started.md) +- [HTML body context](docs/context-html.md) +- [HTML attribute context](docs/context-html-attribute.md) +- [JavaScript context](docs/context-javascript.md) +- [CSS context](docs/context-css.md) +- [URL context](docs/context-url.md) +- [Encodings](docs/encodings.md) +- [Exceptions](docs/exceptions.md) +- [Security notes](docs/security-notes.md) + +## A word of warning + +> Output escaping prevents XSS but it is not a substitute for input validation, +> authentication, or authorisation. It is also context-sensitive: the +> JavaScript escaper assumes the caller wraps the result in quotes, the HTML +> attribute escaper assumes the value is used as a single attribute value, and +> so on. Read the per-context docs before mixing contexts. + +## Contributing + +Contributions are welcome. Please read the +[org-wide CONTRIBUTING guide](https://github.com/InitPHP/.github/blob/main/CONTRIBUTING.md) +for the workflow, coding standards and test expectations. + +A typical loop is: + +```bash +git clone https://github.com/InitPHP/Escaper.git +cd Escaper +composer install +composer test ``` -## Credits +## Security -- [Muhammet ŞAFAK](https://www.muhammetsafak.com.tr) <> +If you discover a security issue, please follow the disclosure process +documented in [SECURITY.md](https://github.com/InitPHP/.github/blob/main/SECURITY.md) +rather than opening a public issue. ## License -Copyright © 2022 [MIT License](./LICENSE) +Released under the [MIT License](./LICENSE). © InitPHP. diff --git a/composer.json b/composer.json index ad53f95..ef46f80 100644 --- a/composer.json +++ b/composer.json @@ -1,13 +1,20 @@ { "name": "initphp/escaper", - "description": "InitPHP Escaper Class", + "description": "Context-aware output escaper (HTML, attribute, JavaScript, CSS, URL) for safely rendering untrusted user input.", "type": "library", "license": "MIT", - "autoload": { - "psr-4": { - "InitPHP\\Escaper\\": "src/" - } - }, + "keywords": [ + "escaper", + "escape", + "xss", + "security", + "html", + "javascript", + "css", + "url", + "output-encoding", + "owasp" + ], "authors": [ { "name": "Muhammet ŞAFAK", @@ -16,9 +23,35 @@ "homepage": "https://www.muhammetsafak.com.tr" } ], - "minimum-stability": "stable", + "support": { + "issues": "https://github.com/InitPHP/Escaper/issues", + "source": "https://github.com/InitPHP/Escaper", + "docs": "https://github.com/InitPHP/Escaper/tree/main/docs" + }, "require": { "php": ">=7.4", - "ext-ctype": "*" - } + "ext-ctype": "*", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^9.6 || ^10.5 || ^11.5" + }, + "suggest": { + "ext-iconv": "Preferred for encoding conversion; iconv is tried before mbstring." + }, + "autoload": { + "psr-4": { + "InitPHP\\Escaper\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "InitPHP\\Escaper\\Tests\\": "tests/" + } + }, + "scripts": { + "test": "phpunit", + "test-coverage": "phpunit --coverage-html build/coverage" + }, + "minimum-stability": "stable" } diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..3167a53 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,33 @@ +# initphp/escaper — Documentation + +This directory is the developer reference for `initphp/escaper`. The +top-level [README](../README.md) is intentionally short; everything in +depth lives here. + +## Index + +1. [Getting started](getting-started.md) — install, first call, the `Esc` + facade vs. instantiating `Escaper`. +2. **Per-context guides** — one file per output context, with the rules + the escaper applies, the threats it defeats, and runnable examples: + - [HTML body context](context-html.md) (`escHtml`) + - [HTML attribute context](context-html-attribute.md) (`escHtmlAttr`) + - [JavaScript context](context-javascript.md) (`escJs`) + - [CSS context](context-css.md) (`escCss`) + - [URL context](context-url.md) (`escUrl`) +3. [Encodings](encodings.md) — non-UTF-8 input/output, the supported list + and how conversion is performed. +4. [Exceptions](exceptions.md) — the exception tree and when each one + is thrown. +5. [Security notes](security-notes.md) — caveats, common misuses, and + pointers to authoritative sources. + +## Conventions used in these docs + +- Code samples assume the autoloader has already been required. +- Output shown in `// comments` is the literal string the escaper + returns. Each sample was generated by running the escaper itself, not + hand-written. +- "Untrusted" means any data that has touched the network, the + filesystem, a database, or anything else outside your PHP process — + in other words, "almost everything". diff --git a/docs/context-css.md b/docs/context-css.md new file mode 100644 index 0000000..ba47c91 --- /dev/null +++ b/docs/context-css.md @@ -0,0 +1,59 @@ +# CSS context (`escCss`) + +> Use when the value lands inside a CSS property value: +> `color: HERE;`, `background-image: url(HERE);`, ``. + +## What it does + +`escCss` whitelists `[A-Za-z0-9]`. Every other character is rewritten as +the CSS escape sequence `\HEX `, with the **mandatory trailing space** +that terminates the escape. + +The trailing space looks redundant when followed by another character, +but CSS uses it as a delimiter — without it, the parser would eat +hex-digit-looking characters that follow the escape. Always emit it. + +## Example — preventing a `` breakout + +```php +use InitPHP\Escaper\Esc; + +$untrusted = ''; + +echo Esc::esc($untrusted, 'css'); +// \3C \2F style\3E \3C script\3E alert\28 1\29 \3C \2F script\3E +``` + +The `<`, `>`, `/`, `(`, `)`, and spaces all turn into CSS escapes, so +the attacker cannot close the `'; + $output = $this->escaper->escCss($input); + + self::assertSame( + '\\3C \\2F style\\3E \\3C script\\3E alert\\28 1\\29 \\3C \\2F script\\3E ', + $output + ); + } + + public function testBmpMultibyteCharacterBecomesHexEscape(): void + { + // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş) + self::assertSame('\\15F ', $this->escaper->escCss('ş')); + } + + public function testSupplementaryPlaneCharacterBecomesHexEscape(): void + { + // U+1F680 → 1F680 in hex. + self::assertSame('\\1F680 ', $this->escaper->escCss('🚀')); + } +} diff --git a/tests/EscaperEncodingTest.php b/tests/EscaperEncodingTest.php new file mode 100644 index 0000000..60ae31e --- /dev/null +++ b/tests/EscaperEncodingTest.php @@ -0,0 +1,76 @@ +getEncoding()); + } + + public function testNullEncodingResolvesToUtf8(): void + { + self::assertSame('utf-8', (new Escaper(null))->getEncoding()); + } + + public function testEmptyStringEncodingResolvesToUtf8(): void + { + self::assertSame('utf-8', (new Escaper(''))->getEncoding()); + } + + public function testEncodingLookupIsCaseInsensitive(): void + { + self::assertSame('utf-8', (new Escaper('UTF-8'))->getEncoding()); + self::assertSame('windows-1252', (new Escaper('Windows-1252'))->getEncoding()); + self::assertSame('iso-8859-1', (new Escaper('ISO-8859-1'))->getEncoding()); + } + + public function testUnsupportedEncodingThrows(): void + { + $this->expectException(EncodingNotSupportedException::class); + $this->expectExceptionMessage('Encoding "utf-16" is not supported.'); + + new Escaper('utf-16'); + } + + public function testEncodingExceptionIsAnEscaperException(): void + { + try { + new Escaper('not-a-real-encoding'); + self::fail('Expected EncodingNotSupportedException'); + } catch (EscaperException $e) { + self::assertInstanceOf(EncodingNotSupportedException::class, $e); + } + } + + public function testNonUtf8InputIsConvertedThenEscaped(): void + { + $escaper = new Escaper('iso-8859-1'); + + // ISO-8859-1 byte 0xE9 == "é". When fed in as a single byte the + // escaper must first decode it to UTF-8, then re-encode the output + // back to ISO-8859-1. + $output = $escaper->escHtml("\xE9"); + + // htmlspecialchars receives 'é' in UTF-8 and leaves it alone, but + // returns it encoded back to ISO-8859-1 → 0xE9. + self::assertSame("\xE9", $output); + } + + public function testInvalidUtf8InAttributeContextThrows(): void + { + $this->expectException(InvalidUtf8Exception::class); + + // 0xC3 0x28 is a broken 2-byte sequence. + (new Escaper())->escHtmlAttr("\xC3\x28"); + } +} diff --git a/tests/EscaperHtmlAttrTest.php b/tests/EscaperHtmlAttrTest.php new file mode 100644 index 0000000..831d2f2 --- /dev/null +++ b/tests/EscaperHtmlAttrTest.php @@ -0,0 +1,99 @@ +escaper = new Escaper(); + } + + public function testEmptyStringShortCircuits(): void + { + self::assertSame('', $this->escaper->escHtmlAttr('')); + } + + public function testDigitsOnlyShortCircuits(): void + { + self::assertSame('12345', $this->escaper->escHtmlAttr('12345')); + } + + public function testWhitelistCharactersPassThrough(): void + { + self::assertSame( + 'abc,XYZ.-_0123', + $this->escaper->escHtmlAttr('abc,XYZ.-_0123') + ); + } + + public function testQuotelessAttributeInjectionVector(): void + { + $input = 'faketitle onmouseover=alert(/InitPHP!/);'; + + self::assertSame( + 'faketitle onmouseover=alert(/InitPHP!/);', + $this->escaper->escHtmlAttr($input) + ); + } + + public function testNamedEntitiesPreferredOverNumericForms(): void + { + self::assertSame('"', $this->escaper->escHtmlAttr('"')); + self::assertSame('&', $this->escaper->escHtmlAttr('&')); + self::assertSame('<', $this->escaper->escHtmlAttr('<')); + self::assertSame('>', $this->escaper->escHtmlAttr('>')); + } + + public function testControlCharactersBecomeReplacementCharacter(): void + { + // 0x00, 0x01, 0x1B all fall into the C0 range and must not survive. + self::assertSame('�', $this->escaper->escHtmlAttr("\x00")); + self::assertSame('�', $this->escaper->escHtmlAttr("\x01")); + self::assertSame('�', $this->escaper->escHtmlAttr("\x1B")); + } + + public function testTabLineFeedAndCarriageReturnAreEscapedNotReplaced(): void + { + // Tab/LF/CR are explicitly exempted from the replacement rule. + self::assertSame(' ', $this->escaper->escHtmlAttr("\t")); + self::assertSame(' ', $this->escaper->escHtmlAttr("\n")); + self::assertSame(' ', $this->escaper->escHtmlAttr("\r")); + } + + public function testC1ControlsBecomeReplacementCharacter(): void + { + // U+007F DEL (single-byte UTF-8). + self::assertSame('�', $this->escaper->escHtmlAttr("\x7F")); + // U+0080 PADDING CHARACTER (multibyte UTF-8: 0xC2 0x80). + self::assertSame('�', $this->escaper->escHtmlAttr("\xC2\x80")); + // U+009F APPLICATION PROGRAM COMMAND (multibyte UTF-8: 0xC2 0x9F). + self::assertSame('�', $this->escaper->escHtmlAttr("\xC2\x9F")); + } + + public function testU00A0IsEscapedNotReplaced(): void + { + // U+00A0 NO-BREAK SPACE sits just outside the C1 range and must be + // escaped as a normal character, not replaced. + self::assertSame(' ', $this->escaper->escHtmlAttr("\xC2\xA0")); + } + + public function testBmpMultibyteCharacterUsesFourDigitHex(): void + { + // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş) + self::assertSame('ş', $this->escaper->escHtmlAttr('ş')); + } + + public function testSupplementaryPlaneCharacterEmitsFullHex(): void + { + // U+1F680 ROCKET — beyond the BMP. + self::assertSame('🚀', $this->escaper->escHtmlAttr('🚀')); + } +} diff --git a/tests/EscaperHtmlTest.php b/tests/EscaperHtmlTest.php new file mode 100644 index 0000000..966260d --- /dev/null +++ b/tests/EscaperHtmlTest.php @@ -0,0 +1,65 @@ +escaper = new Escaper(); + } + + public function testEscapesAngleBracketsAndQuotes(): void + { + $input = ''; + $output = $this->escaper->escHtml($input); + + self::assertSame( + '<script>alert("xss")</script>', + $output + ); + } + + public function testEscapesSingleQuoteWithEntQuotes(): void + { + self::assertSame(''', $this->escaper->escHtml("'")); + } + + public function testEscapesAmpersand(): void + { + self::assertSame('Tom & Jerry', $this->escaper->escHtml('Tom & Jerry')); + } + + public function testEmptyStringReturnsEmptyString(): void + { + self::assertSame('', $this->escaper->escHtml('')); + } + + public function testPlainAsciiPassesThroughUnchanged(): void + { + self::assertSame('Hello, world!', $this->escaper->escHtml('Hello, world!')); + } + + public function testMultibyteCharactersPassThroughInUtf8(): void + { + // htmlspecialchars only touches &, <, >, ", ' — multibyte stays. + self::assertSame('Merhaba dünya — şŞıİğĞ', $this->escaper->escHtml('Merhaba dünya — şŞıİğĞ')); + } + + public function testInvalidByteSequenceIsReplacedNotDropped(): void + { + // ENT_SUBSTITUTE replaces malformed UTF-8 with U+FFFD instead of + // returning an empty string (the unsafe ENT_IGNORE behaviour). + $invalid = "\xC3\x28"; // invalid 2-byte sequence + $output = $this->escaper->escHtml($invalid); + + self::assertNotSame('', $output); + } +} diff --git a/tests/EscaperJsTest.php b/tests/EscaperJsTest.php new file mode 100644 index 0000000..42a9c22 --- /dev/null +++ b/tests/EscaperJsTest.php @@ -0,0 +1,63 @@ +escaper = new Escaper(); + } + + public function testEmptyStringShortCircuits(): void + { + self::assertSame('', $this->escaper->escJs('')); + } + + public function testDigitsOnlyShortCircuits(): void + { + self::assertSame('98765', $this->escaper->escJs('98765')); + } + + public function testWhitelistCharactersPassThrough(): void + { + self::assertSame('abc,XYZ._0', $this->escaper->escJs('abc,XYZ._0')); + } + + public function testEntityBasedInjectionVectorIsEscaped(): void + { + $input = 'bar"; alert("Hello!"); var xss="true'; + + self::assertSame( + 'bar\\x26quot\\x3B\\x3B\\x20alert\\x28\\x26quot\\x3BHello\\x21\\x26quot\\x3B\\x29\\x3B\\x20var\\x20xss\\x3D\\x26quot\\x3Btrue', + $this->escaper->escJs($input) + ); + } + + public function testSingleByteSpecialCharsBecomeHexEscapes(): void + { + self::assertSame('\\x20', $this->escaper->escJs(' ')); + self::assertSame('\\x22', $this->escaper->escJs('"')); + self::assertSame('\\x2F', $this->escaper->escJs('/')); + self::assertSame('\\x3C', $this->escaper->escJs('<')); + } + + public function testBmpMultibyteCharacterBecomesUnicodeEscape(): void + { + // U+015F LATIN SMALL LETTER S WITH CEDILLA (ş) + self::assertSame('\\u015F', $this->escaper->escJs('ş')); + } + + public function testSupplementaryPlaneCharacterBecomesSurrogatePair(): void + { + // U+1F680 → high surrogate D83D + low surrogate DE80 + self::assertSame('\\uD83D\\uDE80', $this->escaper->escJs('🚀')); + } +} diff --git a/tests/EscaperUrlTest.php b/tests/EscaperUrlTest.php new file mode 100644 index 0000000..043fd51 --- /dev/null +++ b/tests/EscaperUrlTest.php @@ -0,0 +1,45 @@ +escaper = new Escaper(); + } + + public function testEmptyStringReturnsEmptyString(): void + { + self::assertSame('', $this->escaper->escUrl('')); + } + + public function testRfc3986UnreservedCharactersAreNotEncoded(): void + { + self::assertSame('Hello.world-1_2~3', $this->escaper->escUrl('Hello.world-1_2~3')); + } + + public function testSpaceIsPercentEncodedAsPercent20(): void + { + // rawurlencode (RFC 3986) — not "+" like urlencode. + self::assertSame('foo%20bar', $this->escaper->escUrl('foo bar')); + } + + public function testJavascriptInjectionVectorIsPercentEncoded(): void + { + $input = '" onmouseover="alert(\'hello\')'; + $output = $this->escaper->escUrl($input); + + self::assertSame( + '%22%20onmouseover%3D%22alert%28%27hello%27%29', + $output + ); + } +} diff --git a/tests/ExceptionHierarchyTest.php b/tests/ExceptionHierarchyTest.php new file mode 100644 index 0000000..3dc3fe9 --- /dev/null +++ b/tests/ExceptionHierarchyTest.php @@ -0,0 +1,45 @@ + Date: Mon, 25 May 2026 08:52:39 +0300 Subject: [PATCH 2/4] Update Composer dependencies and add PHP-CS-Fixer configuration file --- .gitattributes | 17 ++++++++------- .github/workflows/ci.yml | 34 +++++++++++++++++++++++++++++ .gitignore | 1 + .php-cs-fixer.dist.php | 32 +++++++++++++++++++++++++++ Examples/Attr.php | 23 -------------------- Examples/Css.php | 28 ------------------------ Examples/Html.php | 20 ----------------- Examples/Js.php | 24 --------------------- Examples/Url.php | 21 ------------------ README.md | 11 +++++++++- composer.json | 22 +++++++++++++++++-- phpstan.neon.dist | 6 ++++++ src/Esc.php | 29 ++++++++++++++----------- src/Escaper.php | 37 ++++++++++++++------------------ tests/EscTest.php | 18 ++++++++-------- tests/EscaperEncodingTest.php | 4 ++-- tests/EscaperHtmlAttrTest.php | 6 +++--- tests/ExceptionHierarchyTest.php | 25 ++++++++++++++++----- 18 files changed, 178 insertions(+), 180 deletions(-) create mode 100644 .php-cs-fixer.dist.php delete mode 100644 Examples/Attr.php delete mode 100644 Examples/Css.php delete mode 100644 Examples/Html.php delete mode 100644 Examples/Js.php delete mode 100644 Examples/Url.php create mode 100644 phpstan.neon.dist diff --git a/.gitattributes b/.gitattributes index 44fa4dc..7f5d248 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,9 +1,10 @@ # Exclude from Composer dist tarballs. -/.github export-ignore -/.gitattributes export-ignore -/.gitignore export-ignore -/.phpunit.cache export-ignore -/Examples export-ignore -/docs export-ignore -/tests export-ignore -/phpunit.xml.dist export-ignore +/.github export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/.php-cs-fixer.dist.php export-ignore +/.phpunit.cache export-ignore +/docs export-ignore +/phpstan.neon.dist export-ignore +/phpunit.xml.dist export-ignore +/tests export-ignore diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 66354ae..b823b6a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,40 @@ jobs: tools: composer:v2 - run: composer validate --strict + cs: + name: PHP-CS-Fixer + runs-on: ubuntu-latest + needs: validate + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + extensions: ctype, mbstring, iconv + coverage: none + tools: composer:v2 + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + - name: Check coding standards + run: composer cs-check + + stan: + name: PHPStan + runs-on: ubuntu-latest + needs: validate + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: "8.2" + extensions: ctype, mbstring, iconv + coverage: none + tools: composer:v2 + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + - name: Run PHPStan + run: composer stan + tests: name: PHPUnit (PHP ${{ matrix.php }}) runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 359db29..8bb648c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ /build/ /.phpunit.cache/ /.phpunit.result.cache +/.php-cs-fixer.cache diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php new file mode 100644 index 0000000..7deb8da --- /dev/null +++ b/.php-cs-fixer.dist.php @@ -0,0 +1,32 @@ +in([__DIR__ . '/src', __DIR__ . '/tests']) + ->name('*.php'); + +return (new PhpCsFixer\Config()) + ->setRiskyAllowed(true) + ->setRules([ + '@PSR12' => true, + '@PSR12:risky' => true, + '@PHP74Migration' => true, + '@PHP74Migration:risky' => true, + 'array_syntax' => ['syntax' => 'short'], + 'declare_strict_types' => true, + 'native_function_invocation' => [ + 'include' => ['@compiler_optimized'], + 'scope' => 'namespaced', + 'strict' => true, + ], + 'no_unused_imports' => true, + 'ordered_imports' => [ + 'imports_order' => ['class', 'function', 'const'], + 'sort_algorithm' => 'alpha', + ], + 'single_quote' => true, + 'trailing_comma_in_multiline' => ['elements' => ['arrays']], + ]) + ->setFinder($finder) + ->setCacheFile(__DIR__ . '/build/php-cs-fixer.cache'); diff --git a/Examples/Attr.php b/Examples/Attr.php deleted file mode 100644 index 1d5e62d..0000000 --- a/Examples/Attr.php +++ /dev/null @@ -1,23 +0,0 @@ - - - - - Quoteless Attribute - - - -
- - ?> - > - Hello World - -
- - \ No newline at end of file diff --git a/Examples/Css.php b/Examples/Css.php deleted file mode 100644 index bf5443e..0000000 --- a/Examples/Css.php +++ /dev/null @@ -1,28 +0,0 @@ -'); -} -INPUT; -?> - - - - Escaped CSS - - - - -

User controlled CSS needs to be properly escaped!

- - \ No newline at end of file diff --git a/Examples/Html.php b/Examples/Html.php deleted file mode 100644 index be8bad7..0000000 --- a/Examples/Html.php +++ /dev/null @@ -1,20 +0,0 @@ - - - - - Encodings set correctly! - - - -alert("initphp")'; - -// <script>alert("initphp")</script> -echo Esc::esc($input, 'html'); - -?> - \ No newline at end of file diff --git a/Examples/Js.php b/Examples/Js.php deleted file mode 100644 index 6156d03..0000000 --- a/Examples/Js.php +++ /dev/null @@ -1,24 +0,0 @@ - - - - - Escaped Entities - - - - -

Hello World

- - \ No newline at end of file diff --git a/Examples/Url.php b/Examples/Url.php deleted file mode 100644 index e8654a2..0000000 --- a/Examples/Url.php +++ /dev/null @@ -1,21 +0,0 @@ - - - - - Unescaped URL data - - - - -Click here! - - \ No newline at end of file diff --git a/README.md b/README.md index 062595a..92d5c6c 100644 --- a/README.md +++ b/README.md @@ -132,9 +132,18 @@ A typical loop is: git clone https://github.com/InitPHP/Escaper.git cd Escaper composer install -composer test +composer ci # cs-check + phpstan + phpunit ``` +Individual steps are also available: + +| Command | What it does | +| ------------------ | ------------------------------------------- | +| `composer test` | Run PHPUnit | +| `composer stan` | Run PHPStan (max level) | +| `composer cs-check`| Report PHP-CS-Fixer violations, no changes | +| `composer cs-fix` | Apply PHP-CS-Fixer changes | + ## Security If you discover a security issue, please follow the disclosure process diff --git a/composer.json b/composer.json index ef46f80..95398af 100644 --- a/composer.json +++ b/composer.json @@ -34,7 +34,9 @@ "ext-mbstring": "*" }, "require-dev": { - "phpunit/phpunit": "^9.6 || ^10.5 || ^11.5" + "phpunit/phpunit": "^9.6 || ^10.5 || ^11.5", + "phpstan/phpstan": "^1.12 || ^2.1", + "friendsofphp/php-cs-fixer": "^3.65" }, "suggest": { "ext-iconv": "Preferred for encoding conversion; iconv is tried before mbstring." @@ -51,7 +53,23 @@ }, "scripts": { "test": "phpunit", - "test-coverage": "phpunit --coverage-html build/coverage" + "test-coverage": "phpunit --coverage-html build/coverage", + "stan": "phpstan analyse --no-progress", + "cs-check": "php-cs-fixer fix --dry-run --diff", + "cs-fix": "php-cs-fixer fix", + "ci": [ + "@cs-check", + "@stan", + "@test" + ] + }, + "scripts-descriptions": { + "test": "Run the PHPUnit test suite.", + "test-coverage": "Run PHPUnit and produce an HTML coverage report under build/coverage.", + "stan": "Run PHPStan at the level configured in phpstan.neon.dist.", + "cs-check": "Report any PHP-CS-Fixer violations without modifying files.", + "cs-fix": "Apply PHP-CS-Fixer fixes in-place.", + "ci": "Run the full CI bundle locally: cs-check, stan, test." }, "minimum-stability": "stable" } diff --git a/phpstan.neon.dist b/phpstan.neon.dist new file mode 100644 index 0000000..5b7f9e5 --- /dev/null +++ b/phpstan.neon.dist @@ -0,0 +1,6 @@ +parameters: + level: max + paths: + - src + - tests + tmpDir: build/phpstan diff --git a/src/Esc.php b/src/Esc.php index 7dc0cb7..199d76d 100644 --- a/src/Esc.php +++ b/src/Esc.php @@ -15,8 +15,6 @@ use InitPHP\Escaper\Exception\InvalidContextException; -use function is_array; -use function is_string; use function strtolower; /** @@ -54,21 +52,26 @@ class Esc * Escape a string — or every string inside an array — for the given * output context. * - * Non-string scalars and objects inside an array are returned unchanged. - * For a top-level non-string, non-array value the input is returned as-is. + * Behaviour by input type: + * - **string** — escaped according to `$context` and returned. + * - **array** — every element is escaped recursively. Keys are not + * touched; non-string, non-array elements are returned unchanged. + * - **anything else** — returned as-is. * - * @param array|string $data The value to escape. - * @param string $context One of: html, attr, js, css, url, raw. - * Lookup is case-insensitive. - * @param string|null $encoding Output encoding; null uses UTF-8. + * @param mixed $data The value to escape. + * @param string $context One of `html`, `attr`, `js`, `css`, + * `url`, `raw`. Lookup is case-insensitive. + * The empty string is treated like `raw`. + * @param string|null $encoding Output encoding; null resolves to UTF-8. * - * @return array|string + * @return mixed The escaped value, or the original value unchanged for + * unsupported types and `raw`/empty contexts. * - * @throws InvalidContextException When $context is not a recognised name. + * @throws InvalidContextException When `$context` is not a recognised name. */ public static function esc($data, string $context = 'html', ?string $encoding = null) { - if (is_array($data)) { + if (\is_array($data)) { foreach ($data as &$value) { $value = self::esc($value, $context, $encoding); } @@ -77,7 +80,7 @@ public static function esc($data, string $context = 'html', ?string $encoding = return $data; } - if (!is_string($data)) { + if (!\is_string($data)) { return $data; } @@ -88,7 +91,7 @@ public static function esc($data, string $context = 'html', ?string $encoding = if (!isset(self::CONTEXT_METHODS[$context])) { throw new InvalidContextException( - sprintf('Invalid escape context "%s".', $context) + \sprintf('Invalid escape context "%s".', $context) ); } diff --git a/src/Escaper.php b/src/Escaper.php index 50b3a6f..15af53d 100644 --- a/src/Escaper.php +++ b/src/Escaper.php @@ -19,18 +19,13 @@ use function bin2hex; use function ctype_digit; -use function function_exists; use function hexdec; use function htmlspecialchars; use function iconv; -use function in_array; use function mb_convert_encoding; -use function ord; use function preg_match; use function preg_replace_callback; use function rawurlencode; -use function sprintf; -use function strlen; use function strtolower; use function strtoupper; use function substr; @@ -138,9 +133,9 @@ public function __construct(?string $encoding = null) { if ($encoding !== null && $encoding !== '') { $encoding = strtolower($encoding); - if (!in_array($encoding, self::SUPPORTED_ENCODINGS, true)) { + if (!\in_array($encoding, self::SUPPORTED_ENCODINGS, true)) { throw new EncodingNotSupportedException( - sprintf('Encoding "%s" is not supported.', $encoding) + \sprintf('Encoding "%s" is not supported.', $encoding) ); } $this->encoding = $encoding; @@ -260,7 +255,7 @@ public function escCss(string $str): string protected function htmlAttrMatcher(array $matches): string { $chr = $matches[0]; - if (strlen($chr) > 1) { + if (\strlen($chr) > 1) { $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); } @@ -282,10 +277,10 @@ protected function htmlAttrMatcher(array $matches): string } if ($ord > 255) { - return sprintf('&#x%04X;', $ord); + return \sprintf('&#x%04X;', $ord); } - return sprintf('&#x%02X;', $ord); + return \sprintf('&#x%02X;', $ord); } /** @@ -297,21 +292,21 @@ protected function htmlAttrMatcher(array $matches): string protected function jsMatcher(array $matches): string { $chr = $matches[0]; - if (strlen($chr) === 1) { - return sprintf('\\x%02X', ord($chr)); + if (\strlen($chr) === 1) { + return \sprintf('\\x%02X', \ord($chr)); } $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); $hex = strtoupper(bin2hex($chr)); - if (strlen($hex) <= 4) { - return sprintf('\\u%04s', $hex); + if (\strlen($hex) <= 4) { + return \sprintf('\\u%04s', $hex); } $highSurrogate = substr($hex, 0, 4); $lowSurrogate = substr($hex, 4, 4); - return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate); + return \sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate); } /** @@ -323,14 +318,14 @@ protected function jsMatcher(array $matches): string protected function cssMatcher(array $matches): string { $chr = $matches[0]; - if (strlen($chr) === 1) { - $ord = ord($chr); + if (\strlen($chr) === 1) { + $ord = \ord($chr); } else { $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); $ord = (int) hexdec(bin2hex($chr)); } - return sprintf('\\%X ', $ord); + return \sprintf('\\%X ', $ord); } /** @@ -392,9 +387,9 @@ protected function isUtf8(string $str): bool */ protected function convertEncoding(string $str, string $to, string $from): string { - if (function_exists('iconv')) { + if (\function_exists('iconv')) { $result = @iconv($from, $to, $str); - } elseif (function_exists('mb_convert_encoding')) { + } elseif (\function_exists('mb_convert_encoding')) { $result = @mb_convert_encoding($str, $to, $from); } else { throw new EncodingConversionException( @@ -404,7 +399,7 @@ protected function convertEncoding(string $str, string $to, string $from): strin if ($result === false) { throw new EncodingConversionException( - sprintf('Failed to convert string from "%s" to "%s".', $from, $to) + \sprintf('Failed to convert string from "%s" to "%s".', $from, $to) ); } diff --git a/tests/EscTest.php b/tests/EscTest.php index 656b768..d398d4e 100644 --- a/tests/EscTest.php +++ b/tests/EscTest.php @@ -27,11 +27,11 @@ public function testHtmlContextIsDefault(): void public function testEachContextDispatchesToTheCorrectEscaper(): void { - self::assertSame('<b>', Esc::esc('', 'html')); - self::assertSame('<', Esc::esc('<', 'attr')); - self::assertSame('\\x3C', Esc::esc('<', 'js')); - self::assertSame('\\3C ', Esc::esc('<', 'css')); - self::assertSame('%3C', Esc::esc('<', 'url')); + self::assertSame('<b>', Esc::esc('', 'html')); + self::assertSame('<', Esc::esc('<', 'attr')); + self::assertSame('\\x3C', Esc::esc('<', 'js')); + self::assertSame('\\3C ', Esc::esc('<', 'css')); + self::assertSame('%3C', Esc::esc('<', 'url')); } public function testContextLookupIsCaseInsensitive(): void @@ -76,8 +76,8 @@ public function testArrayIsEscapedRecursively(): void public function testNonStringNonArrayValuesArePassedThrough(): void { - self::assertSame(42, Esc::esc(42)); - self::assertSame(3.14, Esc::esc(3.14)); + self::assertSame(42, Esc::esc(42)); + self::assertSame(3.14, Esc::esc(3.14)); self::assertTrue(Esc::esc(true)); self::assertNull(Esc::esc(null)); @@ -138,8 +138,8 @@ public function testDifferentEncodingsAreCachedIndependently(): void $instances = $property->getValue(); self::assertCount(3, $instances); - self::assertArrayHasKey('utf-8', $instances); - self::assertArrayHasKey('iso-8859-1', $instances); + self::assertArrayHasKey('utf-8', $instances); + self::assertArrayHasKey('iso-8859-1', $instances); self::assertArrayHasKey('windows-1252', $instances); } diff --git a/tests/EscaperEncodingTest.php b/tests/EscaperEncodingTest.php index 60ae31e..f39502d 100644 --- a/tests/EscaperEncodingTest.php +++ b/tests/EscaperEncodingTest.php @@ -29,9 +29,9 @@ public function testEmptyStringEncodingResolvesToUtf8(): void public function testEncodingLookupIsCaseInsensitive(): void { - self::assertSame('utf-8', (new Escaper('UTF-8'))->getEncoding()); + self::assertSame('utf-8', (new Escaper('UTF-8'))->getEncoding()); self::assertSame('windows-1252', (new Escaper('Windows-1252'))->getEncoding()); - self::assertSame('iso-8859-1', (new Escaper('ISO-8859-1'))->getEncoding()); + self::assertSame('iso-8859-1', (new Escaper('ISO-8859-1'))->getEncoding()); } public function testUnsupportedEncodingThrows(): void diff --git a/tests/EscaperHtmlAttrTest.php b/tests/EscaperHtmlAttrTest.php index 831d2f2..721af03 100644 --- a/tests/EscaperHtmlAttrTest.php +++ b/tests/EscaperHtmlAttrTest.php @@ -47,9 +47,9 @@ public function testQuotelessAttributeInjectionVector(): void public function testNamedEntitiesPreferredOverNumericForms(): void { self::assertSame('"', $this->escaper->escHtmlAttr('"')); - self::assertSame('&', $this->escaper->escHtmlAttr('&')); - self::assertSame('<', $this->escaper->escHtmlAttr('<')); - self::assertSame('>', $this->escaper->escHtmlAttr('>')); + self::assertSame('&', $this->escaper->escHtmlAttr('&')); + self::assertSame('<', $this->escaper->escHtmlAttr('<')); + self::assertSame('>', $this->escaper->escHtmlAttr('>')); } public function testControlCharactersBecomeReplacementCharacter(): void diff --git a/tests/ExceptionHierarchyTest.php b/tests/ExceptionHierarchyTest.php index 3dc3fe9..dac7936 100644 --- a/tests/ExceptionHierarchyTest.php +++ b/tests/ExceptionHierarchyTest.php @@ -10,36 +10,51 @@ use InitPHP\Escaper\Exception\InvalidContextException; use InitPHP\Escaper\Exception\InvalidUtf8Exception; use PHPUnit\Framework\TestCase; +use ReflectionClass; use RuntimeException; /** * Locks the package exception tree in place so a future refactor that * accidentally re-parents an exception fails loudly. + * + * Uses reflection rather than `is_subclass_of()` so the assertions look at + * the *immediate* parent of each exception, not just somewhere in the chain. */ final class ExceptionHierarchyTest extends TestCase { public function testBaseExtendsRuntimeException(): void { - self::assertTrue(is_subclass_of(EscaperException::class, RuntimeException::class)); + self::assertSame(RuntimeException::class, self::parentOf(EscaperException::class)); } public function testEncodingNotSupportedExtendsBase(): void { - self::assertTrue(is_subclass_of(EncodingNotSupportedException::class, EscaperException::class)); + self::assertSame(EscaperException::class, self::parentOf(EncodingNotSupportedException::class)); } public function testEncodingConversionExtendsBase(): void { - self::assertTrue(is_subclass_of(EncodingConversionException::class, EscaperException::class)); + self::assertSame(EscaperException::class, self::parentOf(EncodingConversionException::class)); } public function testInvalidContextExtendsBase(): void { - self::assertTrue(is_subclass_of(InvalidContextException::class, EscaperException::class)); + self::assertSame(EscaperException::class, self::parentOf(InvalidContextException::class)); } public function testInvalidUtf8ExtendsBase(): void { - self::assertTrue(is_subclass_of(InvalidUtf8Exception::class, EscaperException::class)); + self::assertSame(EscaperException::class, self::parentOf(InvalidUtf8Exception::class)); + } + + /** + * @param class-string $class + */ + private static function parentOf(string $class): string + { + $parent = (new ReflectionClass($class))->getParentClass(); + self::assertNotFalse($parent, \sprintf('Class "%s" must have a parent.', $class)); + + return $parent->getName(); } } From b239780399d7b1cb187de53de17978685f2d15dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muhammet=20=C5=9Eafak?= Date: Mon, 25 May 2026 09:00:41 +0300 Subject: [PATCH 3/4] Update CI for "*.x" branches, PHP matrix, dependencies, Codecov, CHANGELOG --- .github/workflows/ci.yml | 30 +++++++-- CHANGELOG.md | 85 ++++++++++++++++++++++++ README.md | 1 + UPGRADE-2.0.md | 140 +++++++++++++++++++++++++++++++++++++++ composer.json | 8 ++- 5 files changed, 257 insertions(+), 7 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 UPGRADE-2.0.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b823b6a..fefbebf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [main] + branches: [main, "*.x"] pull_request: - branches: [main] + branches: [main, "*.x"] jobs: validate: @@ -54,13 +54,19 @@ jobs: run: composer stan tests: - name: PHPUnit (PHP ${{ matrix.php }}) + name: PHPUnit (PHP ${{ matrix.php }}, ${{ matrix.deps }}) runs-on: ubuntu-latest needs: validate strategy: fail-fast: false matrix: php: ["7.4", "8.0", "8.1", "8.2", "8.3", "8.4"] + deps: ["highest"] + include: + - php: "7.4" + deps: "lowest" + - php: "8.4" + deps: "lowest" steps: - uses: actions/checkout@v4 @@ -83,12 +89,17 @@ jobs: uses: actions/cache@v4 with: path: ${{ steps.composer-cache.outputs.dir }} - key: composer-${{ matrix.php }}-${{ hashFiles('**/composer.json') }} - restore-keys: composer-${{ matrix.php }}- + key: composer-${{ matrix.php }}-${{ matrix.deps }}-${{ hashFiles('**/composer.json') }} + restore-keys: composer-${{ matrix.php }}-${{ matrix.deps }}- - - name: Install dependencies + - name: Install highest dependencies + if: matrix.deps == 'highest' run: composer update --prefer-dist --no-progress --no-interaction + - name: Install lowest dependencies + if: matrix.deps == 'lowest' + run: composer update --prefer-dist --no-progress --no-interaction --prefer-lowest --prefer-stable + - name: Run PHPUnit run: vendor/bin/phpunit @@ -119,3 +130,10 @@ jobs: name: coverage-clover path: coverage.xml retention-days: 14 + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v5 + with: + files: ./coverage.xml + flags: phpunit + fail_ci_if_error: false diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3ad7245 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,85 @@ +# Changelog + +All notable changes to `initphp/escaper` are documented here. + +The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [2.0.0] + +A reliability- and correctness-focused major release. Several bug fixes +in this release are visible from the outside, so the version bump is +necessary even though the public surface is unchanged. See +[`UPGRADE-2.0.md`](./UPGRADE-2.0.md) for a step-by-step migration guide. + +### Added + +- Dedicated exception hierarchy under `InitPHP\Escaper\Exception\`: + `EscaperException` (base, extends `\RuntimeException`), + `EncodingNotSupportedException`, `EncodingConversionException`, + `InvalidContextException`, `InvalidUtf8Exception`. +- Full PHPUnit test suite under `tests/` (62 tests, 100 assertions, + ~91% line coverage). +- GitHub Actions CI: `composer validate`, PHP-CS-Fixer, PHPStan (max), + PHPUnit on PHP 7.4 – 8.4, coverage upload. +- PHPStan configuration at the `max` level with zero reported issues. +- PHP-CS-Fixer configuration based on `@PSR12` and + `@PHP74Migration` rule sets. +- Developer documentation under `docs/` covering each escape context, + encoding handling, exceptions and security notes. +- `Esc::reset()` helper to clear the memoised `Escaper` cache (used by + tests; useful when the calling code wants to drop cached instances). +- `composer.json` scripts: `test`, `test-coverage`, `stan`, `cs-check`, + `cs-fix`, `ci`. + +### Changed + +- **`Esc::esc()` recursion** now propagates `$encoding` into recursive + calls. Previously the encoding was dropped on every inner call, + silently defaulting to UTF-8 for nested arrays. +- **`Esc::esc()` instance cache** is now keyed by encoding. The previous + cache compared `$escaper->getEncoding()` (`'utf-8'`) against the raw + `$encoding` argument (often `null`), so the cache rebuilt on every + default call. +- **`Escaper` constructor** raises `EncodingNotSupportedException` + instead of `\Exception`. (Still catchable via `\Exception` / + `\RuntimeException`.) +- **`HTML attribute` matcher** evaluates the C0/C1 control-character + check against the decoded code point, so `U+0080`–`U+009F` are now + correctly replaced with `U+FFFD` when they arrive in multibyte UTF-8 + form. Previously only single-byte controls were caught. +- **`composer.json`** now requires `ext-mbstring`. `ext-iconv` remains + optional and is preferred when present (`suggest` entry added). +- **PHPDoc blocks** rewritten across the package to reflect the actual + code behaviour. + +### Fixed + +- **Silent data loss on encoding-conversion failure.** When `iconv` / + `mb_convert_encoding` returned `false`, `Escaper::convertEncoding()` + previously returned an empty string and let it propagate, masking + real failures. It now raises `EncodingConversionException`. +- **`isUtf8()`** uses explicit `=== 1` comparison against + `preg_match()` instead of relying on PHP's loose type coercion in a + `bool` return. +- **Misleading error message** in `convertEncoding()`: the "MB_String + plugin is required" text appeared even when iconv was tried first. + Replaced with "Either ext-iconv or ext-mbstring is required". +- **Unused callable properties** (`$htmlAttrMatcher`, `$jsMatcher`, + `$cssMatcher`) removed. The matchers are now passed inline to + `preg_replace_callback`. + +### Removed + +- **`Examples/`** directory removed. The same scenarios are documented + under [`docs/`](./docs) with verified output for each example. + +## [1.0] + +Initial release. + +[Unreleased]: https://github.com/InitPHP/Escaper/compare/2.0.0...HEAD +[2.0.0]: https://github.com/InitPHP/Escaper/compare/1.0...2.0.0 +[1.0]: https://github.com/InitPHP/Escaper/releases/tag/1.0 diff --git a/README.md b/README.md index 92d5c6c..6133c91 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ HTML, HTML attributes, JavaScript, CSS and URLs. [![Latest Stable Version](https://poser.pugx.org/initphp/escaper/v)](https://packagist.org/packages/initphp/escaper) [![PHP Version Require](https://poser.pugx.org/initphp/escaper/require/php)](https://packagist.org/packages/initphp/escaper) [![CI](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml/badge.svg)](https://github.com/InitPHP/Escaper/actions/workflows/ci.yml) +[![codecov](https://codecov.io/gh/InitPHP/Escaper/branch/main/graph/badge.svg)](https://codecov.io/gh/InitPHP/Escaper) [![License](https://poser.pugx.org/initphp/escaper/license)](https://packagist.org/packages/initphp/escaper) [![Total Downloads](https://poser.pugx.org/initphp/escaper/downloads)](https://packagist.org/packages/initphp/escaper) diff --git a/UPGRADE-2.0.md b/UPGRADE-2.0.md new file mode 100644 index 0000000..891a1ed --- /dev/null +++ b/UPGRADE-2.0.md @@ -0,0 +1,140 @@ +# Upgrading from 1.x to 2.0 + +`initphp/escaper` 2.0 is a correctness release. The public API surface +is unchanged — every 1.x method still exists with the same signature. +What changed is **how the escaper signals failure** and **what happens +in a few edge cases that were latent bugs in 1.x**. + +If your 1.x code only calls `Esc::esc()` or `Escaper::escHtml()` etc. +in the happy path, you should be able to upgrade without code changes. +The notes below cover the cases where you may need to act. + +## 1. New `composer require`: `ext-mbstring` + +`composer.json` now declares `ext-mbstring` as a hard requirement +(`ext-iconv` remains optional but is preferred when present). If your +production image does not bundle mbstring you must add it: + +```Dockerfile +RUN docker-php-ext-install mbstring +``` + +Or on a Debian/Ubuntu host: + +```bash +apt-get install -y php-mbstring +``` + +## 2. Replace `catch (\Exception $e)` blocks (recommended) + +1.x threw a plain `\Exception`. 2.x ships a dedicated exception tree. +Your existing `\Exception` (or `\Throwable`) catches still work because +the new exceptions extend `\RuntimeException`, but you can now be +specific: + +```diff + use InitPHP\Escaper\Esc; ++use InitPHP\Escaper\Exception\EscaperException; + + try { + echo Esc::esc($value, 'attr'); +-} catch (\Exception $e) { ++} catch (EscaperException $e) { + // … + } +``` + +The full tree: + +``` +\RuntimeException + └─ InitPHP\Escaper\Exception\EscaperException + ├─ EncodingNotSupportedException // unsupported encoding constructor arg + ├─ EncodingConversionException // iconv/mbstring failure (NEW behaviour, see §3) + ├─ InvalidContextException // unknown context passed to Esc::esc() + └─ InvalidUtf8Exception // input is not / cannot be UTF-8 +``` + +## 3. Encoding-conversion failure now throws (behavioural break) + +In 1.x, if `iconv` / `mb_convert_encoding` returned `false`, the +escaper silently substituted an empty string and returned it. That +silently destroyed data. 2.x raises `EncodingConversionException` +instead. + +If you rely on the old "empty string on failure" behaviour, add an +explicit `try`/`catch`: + +```php +try { + $safe = $escaper->escHtmlAttr($value); +} catch (EncodingConversionException $e) { + $safe = ''; +} +``` + +Most callers will want the exception. If you were silently corrupting +output before, you will now see the error. + +## 4. `Esc::esc()` on arrays now keeps the `$encoding` argument + +This is a bug fix. In 1.x, `Esc::esc(['x' => $v], 'html', +'iso-8859-1')` recursed into the array and called itself **without +the encoding**, so every nested value escaped as UTF-8 regardless of +the third argument. 2.x propagates the encoding correctly. + +If you were depending on the bug (i.e. you passed an encoding but +expected UTF-8 for nested values), drop the encoding argument: + +```diff +-Esc::esc($payload, 'html', 'iso-8859-1'); ++Esc::esc($payload, 'html'); +``` + +## 5. C1 control characters in multibyte UTF-8 + +`escHtmlAttr` always replaced single-byte C0/C1 controls with the +Unicode replacement character (`U+FFFD`). In 1.x the replacement only +fired against the **first byte** of a multibyte sequence, so +`U+0080`–`U+009F` in their proper 2-byte UTF-8 form (`\xC2\x80` … +`\xC2\x9F`) survived as numeric character references (`€` … +`Ÿ`) instead of being replaced. + +2.x catches both forms. The output for those exact code points +changed from `€` etc. to `�`. Both are XSS-safe; if you +were diffing output byte-for-byte across versions, expect this drift. + +## 6. `Esc::esc()` cache is now actually effective + +Not a BC break, but worth knowing: in 1.x the static cache rebuilt the +`Escaper` on every call when `$encoding === null`. 2.x caches per +encoding. No code change needed — your default-encoding calls just got +faster. + +## 7. Examples directory removed + +The runnable PHP files under `Examples/` are gone. The same scenarios +live under [`docs/`](./docs) with each output verified by running the +escaper itself. If you scripted against the example file paths, point +your tooling at `docs/` instead. + +## 8. Static analysis & coding-standard tooling (dev only) + +If you have a fork or downstream patches, note that 2.x adds: + +- `phpstan.neon.dist` (level `max`, zero errors) +- `.php-cs-fixer.dist.php` (`@PSR12 + @PHP74Migration`) + +Your local changes should pass `composer ci` before being submitted as +PRs. + +## Summary checklist + +- [ ] `ext-mbstring` available in every environment. +- [ ] `catch (\Exception)` → `catch (EscaperException)` (optional). +- [ ] Handle `EncodingConversionException` if you used to rely on the + silent empty-string fallback. +- [ ] Drop redundant `$encoding` arguments that depended on the + recursion bug. +- [ ] Re-run any byte-for-byte output snapshots that include the + `U+0080`–`U+009F` range. diff --git a/composer.json b/composer.json index 95398af..663399d 100644 --- a/composer.json +++ b/composer.json @@ -71,5 +71,11 @@ "cs-fix": "Apply PHP-CS-Fixer fixes in-place.", "ci": "Run the full CI bundle locally: cs-check, stan, test." }, - "minimum-stability": "stable" + "minimum-stability": "stable", + "extra": { + "branch-alias": { + "dev-2.x": "2.0.x-dev", + "dev-main": "2.0.x-dev" + } + } } From d78ed95881d899f43f72b0c93a8ce2b046e141c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Muhammet=20=C5=9Eafak?= Date: Mon, 25 May 2026 09:20:44 +0300 Subject: [PATCH 4/4] Add Windows-1252 encoding round trips in EscaperEncodingTest --- tests/EscaperEncodingTest.php | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/EscaperEncodingTest.php b/tests/EscaperEncodingTest.php index f39502d..9aaf639 100644 --- a/tests/EscaperEncodingTest.php +++ b/tests/EscaperEncodingTest.php @@ -5,6 +5,7 @@ namespace InitPHP\Escaper\Tests; use InitPHP\Escaper\Escaper; +use InitPHP\Escaper\Exception\EncodingConversionException; use InitPHP\Escaper\Exception\EncodingNotSupportedException; use InitPHP\Escaper\Exception\EscaperException; use InitPHP\Escaper\Exception\InvalidUtf8Exception; @@ -73,4 +74,39 @@ public function testInvalidUtf8InAttributeContextThrows(): void // 0xC3 0x28 is a broken 2-byte sequence. (new Escaper())->escHtmlAttr("\xC3\x28"); } + + public function testWindows1252RoundTripThroughAttributeContext(): void + { + $escaper = new Escaper('windows-1252'); + + // 0xE9 == "é" in windows-1252. It is outside the attribute whitelist + // so the matcher must produce a numeric entity. The output reaches + // the caller after a UTF-8 → windows-1252 conversion back. + self::assertSame('é', $escaper->escHtmlAttr("\xE9")); + } + + public function testWindows1252RoundTripThroughJsContext(): void + { + $escaper = new Escaper('windows-1252'); + + // 0xE9 == "é". In the JS context the matcher emits é. + self::assertSame('\\u00E9', $escaper->escJs("\xE9")); + } + + public function testWindows1252RoundTripThroughCssContext(): void + { + $escaper = new Escaper('windows-1252'); + + // 0xE9 == "é". In the CSS context the matcher emits "\E9 ". + self::assertSame('\\E9 ', $escaper->escCss("\xE9")); + } + + public function testForwardConversionFailureRaisesException(): void + { + $this->expectException(EncodingConversionException::class); + $this->expectExceptionMessage('Failed to convert string from "windows-1252" to "UTF-8".'); + + // 0x81 is an undefined byte in windows-1252 — iconv returns false. + (new Escaper('windows-1252'))->escHtmlAttr("\x81"); + } }