Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ php:
- 5.4
- 5.6
- 7.0
dist: precise

3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ foreach($xml->path('root/animal') as $animal) {
// $animal is of type SimpleXMLElelent
// only the current iterated $animal is in memory, so huge xml files can be read, without much memory consumption
echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n";
}
}

```
34 changes: 34 additions & 0 deletions examples/filter-example1.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?php

namespace SimpleXmlReader;

use SimpleXmlReader\PathIterator;

require(__DIR__ . '/../src/SimpleXmlReader/autoload.php');


$xml = SimpleXmlReader::openFromString('
<root>
<animal type="cat">
<hastail>yes</hastail>
</animal>
<animal type="dog">
<hastail>yes</hastail>
</animal>
<animal type="kakariki">
<hastail>no</hastail>
</animal>
</root>
');

foreach ($xml->path('root/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/animal") {
if (! in_array($xr->getAttribute('type'), ['dog', 'kakariki'])) {
return PathIterator::ELEMENT_IS_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
}) as $animal) {
echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n";
}
40 changes: 40 additions & 0 deletions examples/filter-example2.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

namespace SimpleXmlReader;

use SimpleXmlReader\PathIterator;

require(__DIR__ . '/../src/SimpleXmlReader/autoload.php');


$xml = SimpleXmlReader::openFromString('
<root>
<group>
<type>pet</type>
<animal type="cat">
<hastail>yes</hastail>
</animal>
<animal type="dog">
<hastail>yes</hastail>
</animal>
</group>
<group>
<type>wild</type>
<animal type="kakariki">
<hastail>no</hastail>
</animal>
</group>
</root>
');

foreach ($xml->path('root/group/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/group/type") {
if ($xr->readString() != 'pet') {
return PathIterator::SIBLINGS_ARE_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
}) as $animal) {
echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n";
}
76 changes: 76 additions & 0 deletions examples/filter-example3.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<?php

namespace SimpleXmlReader;

use SimpleXmlReader\PathIterator;

require(__DIR__ . '/../src/SimpleXmlReader/autoload.php');

$xml = SimpleXmlReader::openFromString('
<root id="123">
<continent name="Africa">
<birds>
<creature predator="no">
<name>parrot</name>
<memo>parrot is ...</memo>
</creature>
<creature predator="yes">
<name>falcon</name>
<memo>falcon is ...</memo>
</creature>
</birds>
<animals>
<creature predator="no">
<name>gazelle</name>
<memo>gazelle is ...</memo>
</creature>
<creature predator="yes">
<name>lion</name>
<memo>lion is ...</memo>
</creature>
</animals>
</continent>
<continent name="Eurasia">
<birds>
<creature predator="no">
<name>straus</name>
<memo>straus is ...</memo>
</creature>
<creature predator="yes">
<name>eagle</name>
<memo>eagle is ...</memo>
</creature>
</birds>
<animals>
<creature predator="no">
<name>panda</name>
<memo>panda is ...</memo>
</creature>
<creature predator="yes">
<name>tiger</name>
<memo>tiger is ...</memo>
</creature>
</animals>
</continent>
</root>
');

foreach ($xml->path('root/continent/*/creature', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root") {
if ($xr->getAttribute('id') != '123') {
return PathIterator::ELEMENT_IS_INVALID;
}
} elseif ($path == "root/continent") {
if ($xr->getAttribute('name') != 'Eurasia') {
return PathIterator::ELEMENT_IS_INVALID;
}
} elseif (preg_match(chr(1) . 'root/continent/[^/]+/creature' . chr(1), $path)) {
if ($xr->getAttribute('predator') != 'yes') {
return PathIterator::ELEMENT_IS_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
}) as $animal) {
echo "A {$animal->name} is predator! {$animal->memo}\n";
}
64 changes: 59 additions & 5 deletions src/SimpleXmlReader/PathIterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,22 @@ class PathIterator implements Iterator
const DESCENDANTS_COULD_MATCH = 'DESCENDANTS_COULD_MATCH';
const DESCENDANTS_CANT_MATCH = 'DESCENDANTS_CANT_MATCH';

/*
* The list of return codes for filtering callback function
*/
/*
* Valid elem, no filtering.
*/
const ELEMENT_IS_VALID = 1; // elem
/*
* Invalid elem and its descendants, so have to be filtered out.
*/
const ELEMENT_IS_INVALID = 2;
/*
* The same as `ELEMENT_IS_INVALID`. Additionaly after it sibling elems(and its descendants) have to be filtered out too.
*/
const SIBLINGS_ARE_INVALID = 3;

protected $reader;
protected $searchPath;
protected $searchCrumbs;
Expand All @@ -21,7 +37,12 @@ class PathIterator implements Iterator
protected $isValid;
protected $returnType;

public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType)
/*
* Filtering callback function
*/
protected $callback;

public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType, $callback = null)
{
$this->reader = $reader;
$this->searchPath = $path;
Expand All @@ -31,6 +52,7 @@ public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnTy
$this->rewindCount = 0;
$this->isValid = false;
$this->returnType = $returnType;
$this->callback = $callback;
}

public function current()
Expand Down Expand Up @@ -110,6 +132,15 @@ protected function pathIsMatching()
return self::DESCENDANTS_COULD_MATCH;
}

protected function searchForOpenTag(XMLReader $r)
{
// search for open tag
while ($r->nodeType != XMLReader::ELEMENT) {
if (! $r->tryRead()) { return false; }
}
return true;
}

public function tryGotoNextIterationElement()
{
$r = $this->reader;
Expand All @@ -124,21 +155,44 @@ public function tryGotoNextIterationElement()

while (true) {
// search for open tag
while ($r->nodeType != XMLReader::ELEMENT) {
if (! $r->tryRead()) { return false; }
}
if (! $this->searchForOpenTag($r)) { return false; }

// fill crumbs
array_splice($this->crumbs, $r->depth, count($this->crumbs), array($r->name));

switch ($this->pathIsMatching()) {
$matching = $this->pathIsMatching();

$uf = self::ELEMENT_IS_VALID;
if ($this->callback && is_callable($this->callback)
&& ($uf = call_user_func_array($this->callback, array($r, $this->crumbs))) !== self::ELEMENT_IS_VALID) {

// extra check for sanity of a value returned by the user filter
if ($uf !== self::SIBLINGS_ARE_INVALID && $uf !== self::ELEMENT_IS_INVALID ) {
$uf = self::ELEMENT_IS_INVALID;
}

$df = $r->depth;

if ($uf === self::SIBLINGS_ARE_INVALID) { $df--; }
$matching = self::DESCENDANTS_CANT_MATCH;
}

switch ($matching) {

case self::DESCENDANTS_COULD_MATCH:
if (! $r->tryRead()) { return false; }
continue 2;

case self::DESCENDANTS_CANT_MATCH:

if (! $r->tryNext()) { return false; }
if ($uf !== self::ELEMENT_IS_VALID) {
if (! $this->searchForOpenTag($r)) { return false; }
while ($r->depth > $df) {
if (! $r->tryNext()) { return false; }
if (! $this->searchForOpenTag($r)) { return false; }
}
}
continue 2;

case self::IS_MATCH:
Expand Down
4 changes: 2 additions & 2 deletions src/SimpleXmlReader/SimpleXmlReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ public static function openFromString($source, $encoding = 'UTF-8', $options = 0
return $simpleXmlReader;
}

public function path($path, $returnType = self::RETURN_SIMPLE_XML)
public function path($path, $returnType = self::RETURN_SIMPLE_XML, $callback = null)
{
return new PathIterator($this->xmlReader, $path, $returnType);
return new PathIterator($this->xmlReader, $path, $returnType, $callback);
}
}
84 changes: 84 additions & 0 deletions tests/SimpleXmlReader/PathIteratorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,88 @@ public function testInvalidXml3()
iterator_to_array($iterator);
}


/** @test */
public function testPathCbAttr1OuterXml()
{
$xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
$res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/zoo") {
if ($xr->getAttribute('city') != "Banghok") {
return PathIterator::ELEMENT_IS_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
})));
$this->assertEquals('<animal>kakariki</animal>', preg_replace('/\s/', '', (string) $res));
}

/** @test */
public function testPathCbAttr2OuterXml()
{
$xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
$res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/zoo") {
if ($xr->getAttribute('contenent') != "Europe") {
return PathIterator::ELEMENT_IS_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
})));
$this->assertEquals('<animal>cat</animal><animal>bear</animal>', preg_replace('/\s/', '', (string) $res));
}

/** @test */
public function testPathCbElemOuterXml()
{
$xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
$res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/zoo/work") {
if ($xr->readString() != "yes") {
return PathIterator::SIBLINGS_ARE_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
})));
$this->assertEquals('<animal>kakariki</animal><animal>bear</animal>', preg_replace('/\s/', '', (string) $res));
}

/** @test */
public function testPathCbElemAttrOuterXml()
{
$xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
$res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/zoo") {
if ($xr->getAttribute('contenent') != "Europe") {
return PathIterator::ELEMENT_IS_INVALID;
}
} elseif ($path == "root/zoo/work") {
if ($xr->readString() != "yes") {
return PathIterator::SIBLINGS_ARE_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
})));
$this->assertEquals('<animal>bear</animal>', preg_replace('/\s/', '', (string) $res));
}

/** @test */
public function testPathCb2ElemOuterXml()
{
$xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb2.xml');
$res = implode('', iterator_to_array($xml->path('root/group/zoos/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
$path = implode("/", $crumbs);
if ($path == "root/group/work") {
if ($xr->readString() != "yes") {
return PathIterator::SIBLINGS_ARE_INVALID;
}
}
return PathIterator::ELEMENT_IS_VALID;
})));
$this->assertEquals('<animal>kakariki</animal><animal>bear</animal>', preg_replace('/\s/', '', (string) $res));
}
}
15 changes: 15 additions & 0 deletions tests/SimpleXmlReader/testdata/cb.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<zoo city="Moscow" contenent="Europe">
<work>no</work>
<animal>cat</animal>
</zoo>
<zoo city="Banghok" contenent="Asia">
<work>yes</work>
<animal>kakariki</animal>
</zoo>
<zoo city="Praha" contenent="Europe">
<work>yes</work>
<animal>bear</animal>
</zoo>
</root>
Loading