diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index efbcba7..503b363 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,6 +33,7 @@ jobs:
name: Coding Standard
uses: brick/coding-standard/.github/workflows/coding-standard.yml@v2
with:
+ php-version: 8.4
working-directory: "tools/ecs"
phpunit:
@@ -43,15 +44,9 @@ jobs:
fail-fast: false
matrix:
php-version:
- - "8.1"
- - "8.2"
- - "8.3"
- "8.4"
deps:
- "highest"
- include:
- - php-version: "8.1"
- deps: "lowest"
steps:
- name: Checkout
diff --git a/README.md b/README.md
index 7ba5dd1..565db39 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ composer require brick/structured-data
### Requirements
-This library requires PHP 8.1 or later. It makes use of the following extensions:
+This library requires PHP 8.4 or later. It makes use of the following extensions:
- [dom](https://www.php.net/manual/en/book.dom.php)
- [json](https://www.php.net/manual/en/book.json.php)
@@ -55,13 +55,13 @@ interface Brick\StructuredData\Reader
/**
* Reads the items contained in the given document.
*
- * @param DOMDocument $document The DOM document to read.
- * @param string $url The URL the document was retrieved from. This will be used only to resolve relative
- * URLs in property values. No attempt will be performed to connect to this URL.
+ * @param HTMLDocument $document The DOM document to read.
+ * @param string $url The URL the document was retrieved from. This will be used only to resolve relative
+ * URLs in property values. No attempt will be performed to connect to this URL.
*
* @return Item[] The top-level items.
*/
- public function read(DOMDocument $document, string $url) : array;
+ public function read(HTMLDocument $document, string $url) : array;
}
```
@@ -93,7 +93,7 @@ use Brick\StructuredData\Item;
$microdataReader = new MicrodataReader();
// Wrap into HTMLReader to be able to read HTML strings or files directly,
-// i.e. without manually converting them to DOMDocument instances first
+// i.e. without manually converting them to HTMLDocument instances first
$htmlReader = new HTMLReader($microdataReader);
// Replace this URL with that of a website you know is using Microdata
diff --git a/composer.json b/composer.json
index 0827ad7..769d5fb 100644
--- a/composer.json
+++ b/composer.json
@@ -11,7 +11,7 @@
],
"license": "MIT",
"require": {
- "php": "^8.1",
+ "php": "^8.4",
"ext-dom": "*",
"ext-json": "*",
"ext-libxml": "*",
diff --git a/psalm-baseline.xml b/psalm-baseline.xml
index 17211f7..e63208c 100644
--- a/psalm-baseline.xml
+++ b/psalm-baseline.xml
@@ -14,13 +14,14 @@
+ textContent]]>
-
- $this->readJson($node->textContent, $url)]]>
-
+
+
+
@@ -29,106 +30,100 @@
-
- parentNode;
-
- if ($itemprop->isSameNode($node)) {
- return true;
- }
-
- if ($itemprop->attributes->getNamedItem('itemscope')) {
- return false;
- }
- }
- }]]>
-
-
- $this->nodeToItem($node, $xpath, $url)]]>
- parentNode;
-
- if ($itemprop->isSameNode($node)) {
- return true;
- }
-
- if ($itemprop->attributes->getNamedItem('itemscope')) {
- return false;
- }
- }
- }]]>
-
-
+
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
- textContent)]]>
-
-
- attributes->getNamedItem('itemprop')->textContent]]>
-
-
+ textContent]]>
+
+
+
+
+
+
+
+
+
-
-
-
-
- parentNode;
-
- if ($itemprop->isSameNode($node)) {
- return true;
- }
-
- if ($itemprop->attributes->getNamedItem('typeof')) {
- return false;
- }
- }
-
- // Unreachable, but makes static analysis happy
- return false;
- }]]>
-
+
+
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ attributes]]>
+ attributes->getNamedItem('itemprop')->textContent]]>
+ parentNode]]>
+ textContent]]>
+
+
+ textContent]]>
+ textContent]]>
+ textContent]]>
+
- $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null)]]>
- parentNode;
-
- if ($itemprop->isSameNode($node)) {
- return true;
- }
-
- if ($itemprop->attributes->getNamedItem('typeof')) {
- return false;
- }
- }
-
- // Unreachable, but makes static analysis happy
- return false;
- }]]>
+ textContent)]]>
-
+
+ attributes]]>
+ attributes]]>
+ attributes]]>
+
+
+
+
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
textContent]]>
- textContent)]]>
-
-
- attributes->getNamedItem('property')->textContent]]>
- textContent]]>
-
-
+ textContent]]>
+
+
+
+
+
+
-
+
+
+ textContent]]>
+ textContent]]>
+ textContent]]>
+ attributes]]>
+ parentNode]]>
+ attributes->getNamedItem('property')->textContent]]>
+ textContent]]>
+ textContent]]>
+ textContent]]>
+
+
+ textContent]]>
+
+
+ textContent)]]>
+
+
+ attributes]]>
+ attributes]]>
+ attributes]]>
+ attributes]]>
+
diff --git a/src/DOMBuilder.php b/src/DOMBuilder.php
index 8ca4210..4c53fe8 100644
--- a/src/DOMBuilder.php
+++ b/src/DOMBuilder.php
@@ -4,32 +4,26 @@
namespace Brick\StructuredData;
-use DOMDocument;
+use Dom\HTMLDocument;
+use const Dom\HTML_NO_DEFAULT_NS;
use const LIBXML_NOERROR;
-use const LIBXML_NOWARNING;
final class DOMBuilder
{
/**
- * Builds a DOMDocument from an HTML string.
+ * Builds a HTMLDocument from an HTML string.
*/
- public static function fromHTML(string $html): DOMDocument
+ public static function fromHTML(string $html): HTMLDocument
{
- $document = new DOMDocument();
- $document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
-
- return $document;
+ return HTMLDocument::createFromString($html, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
}
/**
- * Builds a DOMDocument from an HTML file.
+ * Builds a HTMLDocument from an HTML file.
*/
- public static function fromHTMLFile(string $file): DOMDocument
+ public static function fromHTMLFile(string $file): HTMLDocument
{
- $document = new DOMDocument();
- $document->loadHTMLFile($file, LIBXML_NOWARNING | LIBXML_NOERROR);
-
- return $document;
+ return HTMLDocument::createFromFile($file, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
}
}
diff --git a/src/Reader.php b/src/Reader.php
index c37c713..e6fa929 100644
--- a/src/Reader.php
+++ b/src/Reader.php
@@ -4,7 +4,7 @@
namespace Brick\StructuredData;
-use DOMDocument;
+use Dom\HTMLDocument;
/**
* Common interface for readers of each format: Microdata, RDFa Lite, JSON-LD.
@@ -14,11 +14,11 @@ interface Reader
/**
* Reads the items contained in the given document.
*
- * @param DOMDocument $document The DOM document to read.
- * @param string $url The URL the document was retrieved from. This will be used only to resolve relative
- * URLs in property values. The implementation must not attempt to connect to this URL.
+ * @param HTMLDocument $document The HTMLDocument to read.
+ * @param string $url The URL the document was retrieved from. This will be used only to resolve relative
+ * URLs in property values. The implementation must not attempt to connect to this URL.
*
* @return Item[] The top-level items.
*/
- public function read(DOMDocument $document, string $url): array;
+ public function read(HTMLDocument $document, string $url): array;
}
diff --git a/src/Reader/JsonLdReader.php b/src/Reader/JsonLdReader.php
index 3f5672b..6c460ba 100644
--- a/src/Reader/JsonLdReader.php
+++ b/src/Reader/JsonLdReader.php
@@ -6,9 +6,9 @@
use Brick\StructuredData\Item;
use Brick\StructuredData\Reader;
-use DOMDocument;
-use DOMNode;
-use DOMXPath;
+use Dom\HTMLDocument;
+use Dom\Node;
+use Dom\XPath;
use Override;
use Sabre\Uri\InvalidUriException;
use stdClass;
@@ -66,9 +66,9 @@ public function __construct(array $iriProperties = [])
}
#[Override]
- public function read(DOMDocument $document, string $url): array
+ public function read(HTMLDocument $document, string $url): array
{
- $xpath = new DOMXPath($document);
+ $xpath = new XPath($document);
$nodes = $xpath->query('//script[@type="application/ld+json"]');
$nodes = iterator_to_array($nodes);
@@ -78,7 +78,7 @@ public function read(DOMDocument $document, string $url): array
}
$items = array_map(
- fn (DOMNode $node) => $this->readJson($node->textContent, $url),
+ fn (Node $node) => $this->readJson($node->textContent, $url),
$nodes,
);
diff --git a/src/Reader/MicrodataReader.php b/src/Reader/MicrodataReader.php
index 8878c65..8db43d4 100644
--- a/src/Reader/MicrodataReader.php
+++ b/src/Reader/MicrodataReader.php
@@ -6,9 +6,9 @@
use Brick\StructuredData\Item;
use Brick\StructuredData\Reader;
-use DOMDocument;
-use DOMNode;
-use DOMXPath;
+use Dom\HTMLDocument;
+use Dom\Node;
+use Dom\XPath;
use Override;
use Sabre\Uri\InvalidUriException;
@@ -36,9 +36,9 @@
final class MicrodataReader implements Reader
{
#[Override]
- public function read(DOMDocument $document, string $url): array
+ public function read(HTMLDocument $document, string $url): array
{
- $xpath = new DOMXPath($document);
+ $xpath = new XPath($document);
/**
* An item is a top-level Microdata item if its element does not have an itemprop attribute.
@@ -49,19 +49,19 @@ public function read(DOMDocument $document, string $url): array
$nodes = iterator_to_array($nodes);
return array_map(
- fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url),
+ fn (Node $node) => $this->nodeToItem($node, $xpath, $url),
$nodes,
);
}
/**
- * Extracts information from a DOMNode into an Item.
+ * Extracts information from a Node into an Item.
*
- * @param DOMNode $node A DOMNode representing an element with the itemscope attribute.
- * @param DOMXPath $xpath A DOMXPath object created from the node's document element.
- * @param string $url The URL the document was retrieved from, for relative URL resolution.
+ * @param Node $node A Node representing an element with the itemscope attribute.
+ * @param XPath $xpath A XPath object created from the node's document element.
+ * @param string $url The URL the document was retrieved from, for relative URL resolution.
*/
- private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
+ private function nodeToItem(Node $node, XPath $xpath, string $url): Item
{
$itemid = $node->attributes->getNamedItem('itemid');
@@ -106,7 +106,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
// Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this.
// See: https://stackoverflow.com/q/26365495/759866
- $itemprops = array_filter($itemprops, function (DOMNode $itemprop) use ($node, $xpath) {
+ $itemprops = array_filter($itemprops, function (Node $itemprop) use ($node, $xpath) {
for (; ;) {
$itemprop = $itemprop->parentNode;
@@ -122,7 +122,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
$vocabularyIdentifier = $this->getVocabularyIdentifier($types);
- /** @var DOMNode[] $itemprops */
+ /** @var Node[] $itemprops */
foreach ($itemprops as $itemprop) {
/**
* An element introducing a property can introduce multiple properties at once, to avoid duplication when
@@ -159,11 +159,11 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
/**
* @see https://www.w3.org/TR/microdata/#values
*
- * @param DOMNode $node A DOMNode representing an element with the itemprop attribute.
- * @param DOMXPath $xpath A DOMXPath object created from the node's document element.
- * @param string $url The URL the document was retrieved from, for relative URL resolution.
+ * @param Node $node A Node representing an element with the itemprop attribute.
+ * @param XPath $xpath A XPath object created from the node's document element.
+ * @param string $url The URL the document was retrieved from, for relative URL resolution.
*/
- private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url): Item|string
+ private function getPropertyValue(Node $node, XPath $xpath, string $url): Item|string
{
/**
* If the element also has an itemscope attribute: the value is the item created by the element.
diff --git a/src/Reader/RdfaLiteReader.php b/src/Reader/RdfaLiteReader.php
index 70037e7..7db2f7a 100644
--- a/src/Reader/RdfaLiteReader.php
+++ b/src/Reader/RdfaLiteReader.php
@@ -6,9 +6,9 @@
use Brick\StructuredData\Item;
use Brick\StructuredData\Reader;
-use DOMDocument;
-use DOMNode;
-use DOMXPath;
+use Dom\HTMLDocument;
+use Dom\Node;
+use Dom\XPath;
use Override;
use Sabre\Uri\InvalidUriException;
@@ -93,9 +93,9 @@ final class RdfaLiteReader implements Reader
];
#[Override]
- public function read(DOMDocument $document, string $url): array
+ public function read(HTMLDocument $document, string $url): array
{
- $xpath = new DOMXPath($document);
+ $xpath = new XPath($document);
/**
* Top-level item has a typeof attribute and no property attribute.
@@ -104,22 +104,22 @@ public function read(DOMDocument $document, string $url): array
$nodes = iterator_to_array($nodes);
return array_map(
- fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null),
+ fn (Node $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null),
$nodes,
);
}
/**
- * Extracts information from a DOMNode into an Item.
+ * Extracts information from a Node into an Item.
*
- * @param DOMNode $node A DOMNode representing an element with the typeof attribute.
- * @param DOMXPath $xpath A DOMXPath object created from the node's document element.
+ * @param Node $node A Node representing an element with the typeof attribute.
+ * @param XPath $xpath A XPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
* @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL.
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
* This is the content of the vocab attribute of the closest item ancestor.
*/
- private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item
+ private function nodeToItem(Node $node, XPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item
{
$vocabulary = $this->updateVocabulary($node, $vocabulary);
@@ -165,7 +165,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $
// Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this.
// See: https://stackoverflow.com/q/26365495/759866
- $properties = array_filter($properties, function (DOMNode $itemprop) use ($node, $xpath) {
+ $properties = array_filter($properties, function (Node $itemprop) use ($node, $xpath) {
for (; ;) {
$itemprop = $itemprop->parentNode;
@@ -182,7 +182,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $
return false;
});
- /** @var DOMNode[] $properties */
+ /** @var Node[] $properties */
foreach ($properties as $property) {
$names = $property->attributes->getNamedItem('property')->textContent;
@@ -261,12 +261,12 @@ private function isValidAbsoluteURL(string $url): bool
/**
* Replaces the current vocabulary with the one from the vocab attribute of the current node, if set.
*
- * @param DOMNode $node The DOMNode that may contain a vocab attribute.
+ * @param Node $node The Node that may contain a vocab attribute.
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
*
* @return string|null The updated vocabulary URL, if any.
*/
- private function updateVocabulary(DOMNode $node, ?string $vocabulary): ?string
+ private function updateVocabulary(Node $node, ?string $vocabulary): ?string
{
$vocab = $node->attributes->getNamedItem('vocab');
@@ -310,13 +310,13 @@ private function checkVocabularyUrl(string $url): ?string
/**
* @see https://www.w3.org/TR/microdata/#values
*
- * @param DOMNode $node A DOMNode representing an element with the property attribute.
- * @param DOMXPath $xpath A DOMXPath object created from the node's document element.
+ * @param Node $node A Node representing an element with the property attribute.
+ * @param XPath $xpath A XPath object created from the node's document element.
* @param string $url The URL the document was retrieved from, for relative URL resolution.
* @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL.
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
*/
- private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item|string
+ private function getPropertyValue(Node $node, XPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item|string
{
// If the element also has an typeof attribute, create an item from the element
$attr = $node->attributes->getNamedItem('typeof');
diff --git a/src/Reader/ReaderChain.php b/src/Reader/ReaderChain.php
index e1c606c..00e1415 100644
--- a/src/Reader/ReaderChain.php
+++ b/src/Reader/ReaderChain.php
@@ -5,7 +5,7 @@
namespace Brick\StructuredData\Reader;
use Brick\StructuredData\Reader;
-use DOMDocument;
+use Dom\HTMLDocument;
use Override;
use function array_merge;
@@ -29,7 +29,7 @@ public function __construct(Reader ...$readers)
}
#[Override]
- public function read(DOMDocument $document, string $url): array
+ public function read(HTMLDocument $document, string $url): array
{
if (! $this->readers) {
return [];