diff --git a/.travis.yml b/.travis.yml index 388e468..a85d616 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,3 +6,5 @@ php: - 5.4 - 5.6 - 7.0 +dist: precise + diff --git a/README.md b/README.md index 12e8235..971f483 100644 --- a/README.md +++ b/README.md @@ -31,5 +31,6 @@ foreach($xml->path('root/animal') as $animal) { // $animal is of type SimpleXMLElelent // only the current iterated $animal is in memory, so huge xml files can be read, without much memory consumption echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; -} +} + ``` diff --git a/examples/filter-example1.php b/examples/filter-example1.php new file mode 100644 index 0000000..9ef9214 --- /dev/null +++ b/examples/filter-example1.php @@ -0,0 +1,34 @@ + + + yes + + + yes + + + no + + +'); + +foreach ($xml->path('root/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/animal") { + if (! in_array($xr->getAttribute('type'), ['dog', 'kakariki'])) { + return PathIterator::ELEMENT_IS_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; +}) as $animal) { + echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; +} diff --git a/examples/filter-example2.php b/examples/filter-example2.php new file mode 100644 index 0000000..e08ebf5 --- /dev/null +++ b/examples/filter-example2.php @@ -0,0 +1,40 @@ + + + pet + + yes + + + yes + + + + wild + + no + + + +'); + +foreach ($xml->path('root/group/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/group/type") { + if ($xr->readString() != 'pet') { + return PathIterator::SIBLINGS_ARE_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; +}) as $animal) { + echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; +} diff --git a/examples/filter-example3.php b/examples/filter-example3.php new file mode 100644 index 0000000..9b5c242 --- /dev/null +++ b/examples/filter-example3.php @@ -0,0 +1,76 @@ + + + + + parrot + parrot is ... + + + falcon + falcon is ... + + + + + gazelle + gazelle is ... + + + lion + lion is ... + + + + + + + straus + straus is ... + + + eagle + eagle is ... + + + + + panda + panda is ... + + + tiger + tiger is ... + + + + +'); + +foreach ($xml->path('root/continent/*/creature', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root") { + if ($xr->getAttribute('id') != '123') { + return PathIterator::ELEMENT_IS_INVALID; + } + } elseif ($path == "root/continent") { + if ($xr->getAttribute('name') != 'Eurasia') { + return PathIterator::ELEMENT_IS_INVALID; + } + } elseif (preg_match(chr(1) . 'root/continent/[^/]+/creature' . chr(1), $path)) { + if ($xr->getAttribute('predator') != 'yes') { + return PathIterator::ELEMENT_IS_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; +}) as $animal) { + echo "A {$animal->name} is predator! {$animal->memo}\n"; +} diff --git a/src/SimpleXmlReader/PathIterator.php b/src/SimpleXmlReader/PathIterator.php index 9dd833e..c06f470 100644 --- a/src/SimpleXmlReader/PathIterator.php +++ b/src/SimpleXmlReader/PathIterator.php @@ -12,6 +12,22 @@ class PathIterator implements Iterator const DESCENDANTS_COULD_MATCH = 'DESCENDANTS_COULD_MATCH'; const DESCENDANTS_CANT_MATCH = 'DESCENDANTS_CANT_MATCH'; + /* + * The list of return codes for filtering callback function + */ + /* + * Valid elem, no filtering. + */ + const ELEMENT_IS_VALID = 1; // elem + /* + * Invalid elem and its descendants, so have to be filtered out. + */ + const ELEMENT_IS_INVALID = 2; + /* + * The same as `ELEMENT_IS_INVALID`. Additionaly after it sibling elems(and its descendants) have to be filtered out too. + */ + const SIBLINGS_ARE_INVALID = 3; + protected $reader; protected $searchPath; protected $searchCrumbs; @@ -21,7 +37,12 @@ class PathIterator implements Iterator protected $isValid; protected $returnType; - public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType) + /* + * Filtering callback function + */ + protected $callback; + + public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType, $callback = null) { $this->reader = $reader; $this->searchPath = $path; @@ -31,6 +52,7 @@ public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnTy $this->rewindCount = 0; $this->isValid = false; $this->returnType = $returnType; + $this->callback = $callback; } public function current() @@ -110,6 +132,15 @@ protected function pathIsMatching() return self::DESCENDANTS_COULD_MATCH; } + protected function searchForOpenTag(XMLReader $r) + { + // search for open tag + while ($r->nodeType != XMLReader::ELEMENT) { + if (! $r->tryRead()) { return false; } + } + return true; + } + public function tryGotoNextIterationElement() { $r = $this->reader; @@ -124,21 +155,44 @@ public function tryGotoNextIterationElement() while (true) { // search for open tag - while ($r->nodeType != XMLReader::ELEMENT) { - if (! $r->tryRead()) { return false; } - } + if (! $this->searchForOpenTag($r)) { return false; } // fill crumbs array_splice($this->crumbs, $r->depth, count($this->crumbs), array($r->name)); - switch ($this->pathIsMatching()) { + $matching = $this->pathIsMatching(); + + $uf = self::ELEMENT_IS_VALID; + if ($this->callback && is_callable($this->callback) + && ($uf = call_user_func_array($this->callback, array($r, $this->crumbs))) !== self::ELEMENT_IS_VALID) { + + // extra check for sanity of a value returned by the user filter + if ($uf !== self::SIBLINGS_ARE_INVALID && $uf !== self::ELEMENT_IS_INVALID ) { + $uf = self::ELEMENT_IS_INVALID; + } + + $df = $r->depth; + + if ($uf === self::SIBLINGS_ARE_INVALID) { $df--; } + $matching = self::DESCENDANTS_CANT_MATCH; + } + + switch ($matching) { case self::DESCENDANTS_COULD_MATCH: if (! $r->tryRead()) { return false; } continue 2; case self::DESCENDANTS_CANT_MATCH: + if (! $r->tryNext()) { return false; } + if ($uf !== self::ELEMENT_IS_VALID) { + if (! $this->searchForOpenTag($r)) { return false; } + while ($r->depth > $df) { + if (! $r->tryNext()) { return false; } + if (! $this->searchForOpenTag($r)) { return false; } + } + } continue 2; case self::IS_MATCH: diff --git a/src/SimpleXmlReader/SimpleXmlReader.php b/src/SimpleXmlReader/SimpleXmlReader.php index c3d31a0..53952ed 100644 --- a/src/SimpleXmlReader/SimpleXmlReader.php +++ b/src/SimpleXmlReader/SimpleXmlReader.php @@ -44,8 +44,8 @@ public static function openFromString($source, $encoding = 'UTF-8', $options = 0 return $simpleXmlReader; } - public function path($path, $returnType = self::RETURN_SIMPLE_XML) + public function path($path, $returnType = self::RETURN_SIMPLE_XML, $callback = null) { - return new PathIterator($this->xmlReader, $path, $returnType); + return new PathIterator($this->xmlReader, $path, $returnType, $callback); } } diff --git a/tests/SimpleXmlReader/PathIteratorTest.php b/tests/SimpleXmlReader/PathIteratorTest.php index 3012e8b..143398f 100644 --- a/tests/SimpleXmlReader/PathIteratorTest.php +++ b/tests/SimpleXmlReader/PathIteratorTest.php @@ -107,4 +107,88 @@ public function testInvalidXml3() iterator_to_array($iterator); } + + /** @test */ + public function testPathCbAttr1OuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo") { + if ($xr->getAttribute('city') != "Banghok") { + return PathIterator::ELEMENT_IS_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; + }))); + $this->assertEquals('kakariki', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCbAttr2OuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo") { + if ($xr->getAttribute('contenent') != "Europe") { + return PathIterator::ELEMENT_IS_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; + }))); + $this->assertEquals('catbear', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCbElemOuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo/work") { + if ($xr->readString() != "yes") { + return PathIterator::SIBLINGS_ARE_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; + }))); + $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCbElemAttrOuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo") { + if ($xr->getAttribute('contenent') != "Europe") { + return PathIterator::ELEMENT_IS_INVALID; + } + } elseif ($path == "root/zoo/work") { + if ($xr->readString() != "yes") { + return PathIterator::SIBLINGS_ARE_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; + }))); + $this->assertEquals('bear', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCb2ElemOuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb2.xml'); + $res = implode('', iterator_to_array($xml->path('root/group/zoos/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/group/work") { + if ($xr->readString() != "yes") { + return PathIterator::SIBLINGS_ARE_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; + }))); + $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res)); + } } diff --git a/tests/SimpleXmlReader/testdata/cb.xml b/tests/SimpleXmlReader/testdata/cb.xml new file mode 100644 index 0000000..f434d38 --- /dev/null +++ b/tests/SimpleXmlReader/testdata/cb.xml @@ -0,0 +1,15 @@ + + + + no + cat + + + yes + kakariki + + + yes + bear + + diff --git a/tests/SimpleXmlReader/testdata/cb2.xml b/tests/SimpleXmlReader/testdata/cb2.xml new file mode 100644 index 0000000..48c50be --- /dev/null +++ b/tests/SimpleXmlReader/testdata/cb2.xml @@ -0,0 +1,22 @@ + + + + no + + + cat + + + + + yes + + + kakariki + + + bear + + + +