From c20aa3e649df647e82ec42b34193be4e66a0773c Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Mon, 6 Mar 2017 12:45:24 +0700 Subject: [PATCH 1/8] filter facility is added --- examples/filter-example1.php | 32 ++++++++++ examples/filter-example2.php | 38 ++++++++++++ src/SimpleXmlReader/PathIterator.php | 26 +++++++- src/SimpleXmlReader/SimpleXmlReader.php | 4 +- tests/SimpleXmlReader/PathIteratorTest.php | 69 ++++++++++++++++++++++ tests/SimpleXmlReader/testdata/cb.xml | 15 +++++ 6 files changed, 180 insertions(+), 4 deletions(-) create mode 100644 examples/filter-example1.php create mode 100644 examples/filter-example2.php create mode 100644 tests/SimpleXmlReader/testdata/cb.xml diff --git a/examples/filter-example1.php b/examples/filter-example1.php new file mode 100644 index 0000000..48898aa --- /dev/null +++ b/examples/filter-example1.php @@ -0,0 +1,32 @@ + + + yes + + + yes + + + no + + +'); + +foreach ($xml->path('root/animal', SimpleXMLReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/animal") { + if (! in_array($xr->getAttribute('type'), ['dog', 'cat'])) { + return false; + } + } + return true; +}) as $animal) { + echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; +} diff --git a/examples/filter-example2.php b/examples/filter-example2.php new file mode 100644 index 0000000..f511950 --- /dev/null +++ b/examples/filter-example2.php @@ -0,0 +1,38 @@ + + + pet + + yes + + + yes + + + + wild + + no + + + +'); + +foreach ($xml->path('root/group/animal', SimpleXMLReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/group/type") { + if ($xr->readString() != 'pet') { + return false; + } + } + return true; +}) as $animal) { + echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; +} diff --git a/src/SimpleXmlReader/PathIterator.php b/src/SimpleXmlReader/PathIterator.php index 9dd833e..d1bdf18 100644 --- a/src/SimpleXmlReader/PathIterator.php +++ b/src/SimpleXmlReader/PathIterator.php @@ -21,7 +21,12 @@ class PathIterator implements Iterator protected $isValid; protected $returnType; - public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType) + /* + * Filtering callback function + */ + protected $callback; + + public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType, $callback = null) { $this->reader = $reader; $this->searchPath = $path; @@ -31,6 +36,7 @@ public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnTy $this->rewindCount = 0; $this->isValid = false; $this->returnType = $returnType; + $this->callback = $callback; } public function current() @@ -131,7 +137,18 @@ public function tryGotoNextIterationElement() // fill crumbs array_splice($this->crumbs, $r->depth, count($this->crumbs), array($r->name)); - switch ($this->pathIsMatching()) { + $matching = $this->pathIsMatching(); + + $uf = false; + if ($this->callback && is_callable($this->callback) + && !call_user_func_array($this->callback, [$r, $this->crumbs])) { + $uf = true; + $sd = count($this->searchCrumbs) - 1; + if ($sd == $r->depth) { $sd--; } + $matching = self::DESCENDANTS_CANT_MATCH; + } + + switch ($matching) { case self::DESCENDANTS_COULD_MATCH: if (! $r->tryRead()) { return false; } @@ -139,6 +156,11 @@ public function tryGotoNextIterationElement() case self::DESCENDANTS_CANT_MATCH: if (! $r->tryNext()) { return false; } + if ($uf) { + while ($r->depth > $sd) { + if (! $r->tryNext()) { return false; } + } + } continue 2; case self::IS_MATCH: diff --git a/src/SimpleXmlReader/SimpleXmlReader.php b/src/SimpleXmlReader/SimpleXmlReader.php index c3d31a0..53952ed 100644 --- a/src/SimpleXmlReader/SimpleXmlReader.php +++ b/src/SimpleXmlReader/SimpleXmlReader.php @@ -44,8 +44,8 @@ public static function openFromString($source, $encoding = 'UTF-8', $options = 0 return $simpleXmlReader; } - public function path($path, $returnType = self::RETURN_SIMPLE_XML) + public function path($path, $returnType = self::RETURN_SIMPLE_XML, $callback = null) { - return new PathIterator($this->xmlReader, $path, $returnType); + return new PathIterator($this->xmlReader, $path, $returnType, $callback); } } diff --git a/tests/SimpleXmlReader/PathIteratorTest.php b/tests/SimpleXmlReader/PathIteratorTest.php index 3012e8b..8d3d60a 100644 --- a/tests/SimpleXmlReader/PathIteratorTest.php +++ b/tests/SimpleXmlReader/PathIteratorTest.php @@ -107,4 +107,73 @@ public function testInvalidXml3() iterator_to_array($iterator); } + /** @test */ + public function testPathCbAttr1OuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo") { + if ($xr->getAttribute('city') != "Banghok") { + return false; + } + } + return true; + }))); + $this->assertEquals('kakariki', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCbAttr2OuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo") { + if ($xr->getAttribute('contenent') != "Europe") { + return false; + } + } + return true; + }))); + $this->assertEquals('catbear', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCbElemOuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo/work") { + if ($xr->readString() != "yes") { + return false; + } + } + return true; + }))); + //echo $res; + $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res)); + } + + /** @test */ + public function testPathCbElemAttrOuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml'); + $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/zoo") { + if ($xr->getAttribute('contenent') != "Europe") { + return false; + } + } elseif ($path == "root/zoo/work") { + if ($xr->readString() != "yes") { + return false; + } + } + return true; + }))); + //echo $res; + $this->assertEquals('bear', preg_replace('/\s/', '', (string) $res)); + } } diff --git a/tests/SimpleXmlReader/testdata/cb.xml b/tests/SimpleXmlReader/testdata/cb.xml new file mode 100644 index 0000000..f434d38 --- /dev/null +++ b/tests/SimpleXmlReader/testdata/cb.xml @@ -0,0 +1,15 @@ + + + + no + cat + + + yes + kakariki + + + yes + bear + + From 36efb05afc298968ed5c0091fff14927e160f427 Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Mon, 6 Mar 2017 13:31:22 +0700 Subject: [PATCH 2/8] refs to class has renamed(PSR-4 compliant) --- examples/filter-example1.php | 2 +- examples/filter-example2.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/filter-example1.php b/examples/filter-example1.php index 48898aa..0f71da7 100644 --- a/examples/filter-example1.php +++ b/examples/filter-example1.php @@ -19,7 +19,7 @@ '); -foreach ($xml->path('root/animal', SimpleXMLReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { +foreach ($xml->path('root/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { $path = implode("/", $crumbs); if ($path == "root/animal") { if (! in_array($xr->getAttribute('type'), ['dog', 'cat'])) { diff --git a/examples/filter-example2.php b/examples/filter-example2.php index f511950..73877af 100644 --- a/examples/filter-example2.php +++ b/examples/filter-example2.php @@ -25,7 +25,7 @@ '); -foreach ($xml->path('root/group/animal', SimpleXMLReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { +foreach ($xml->path('root/group/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { $path = implode("/", $crumbs); if ($path == "root/group/type") { if ($xr->readString() != 'pet') { From acaeaa2a77a11bbc6654c1dfd14b039b98dc8474 Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Mon, 6 Mar 2017 18:08:34 +0700 Subject: [PATCH 3/8] added: filter's condition based on sibling elem(s) --- examples/filter-example1.php | 8 ++-- examples/filter-example2.php | 6 ++- src/SimpleXmlReader/PathIterator.php | 45 +++++++++++++++++----- tests/SimpleXmlReader/PathIteratorTest.php | 37 ++++++++++++------ tests/SimpleXmlReader/testdata/cb2.xml | 22 +++++++++++ 5 files changed, 92 insertions(+), 26 deletions(-) create mode 100644 tests/SimpleXmlReader/testdata/cb2.xml diff --git a/examples/filter-example1.php b/examples/filter-example1.php index 0f71da7..9ef9214 100644 --- a/examples/filter-example1.php +++ b/examples/filter-example1.php @@ -2,6 +2,8 @@ namespace SimpleXmlReader; +use SimpleXmlReader\PathIterator; + require(__DIR__ . '/../src/SimpleXmlReader/autoload.php'); @@ -22,11 +24,11 @@ foreach ($xml->path('root/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { $path = implode("/", $crumbs); if ($path == "root/animal") { - if (! in_array($xr->getAttribute('type'), ['dog', 'cat'])) { - return false; + if (! in_array($xr->getAttribute('type'), ['dog', 'kakariki'])) { + return PathIterator::ELEMENT_IS_INVALID; } } - return true; + return PathIterator::ELEMENT_IS_VALID; }) as $animal) { echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; } diff --git a/examples/filter-example2.php b/examples/filter-example2.php index 73877af..e08ebf5 100644 --- a/examples/filter-example2.php +++ b/examples/filter-example2.php @@ -2,6 +2,8 @@ namespace SimpleXmlReader; +use SimpleXmlReader\PathIterator; + require(__DIR__ . '/../src/SimpleXmlReader/autoload.php'); @@ -29,10 +31,10 @@ $path = implode("/", $crumbs); if ($path == "root/group/type") { if ($xr->readString() != 'pet') { - return false; + return PathIterator::SIBLINGS_ARE_INVALID; } } - return true; + return PathIterator::ELEMENT_IS_VALID; }) as $animal) { echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; } diff --git a/src/SimpleXmlReader/PathIterator.php b/src/SimpleXmlReader/PathIterator.php index d1bdf18..529cdbb 100644 --- a/src/SimpleXmlReader/PathIterator.php +++ b/src/SimpleXmlReader/PathIterator.php @@ -12,6 +12,22 @@ class PathIterator implements Iterator const DESCENDANTS_COULD_MATCH = 'DESCENDANTS_COULD_MATCH'; const DESCENDANTS_CANT_MATCH = 'DESCENDANTS_CANT_MATCH'; + /* + * The list of return codes for filtering callback function + */ + /* + * Valid elem, no filtering. + */ + const ELEMENT_IS_VALID = 1; // elem + /* + * Invalid elem and its descendants, so have to be filtered out. + */ + const ELEMENT_IS_INVALID = 2; + /* + * The same as `ELEMENT_IS_INVALID`. Additionaly after it sibling elems(and its descendants) have to be filtered out too. + */ + const SIBLINGS_ARE_INVALID = 3; + protected $reader; protected $searchPath; protected $searchCrumbs; @@ -116,6 +132,15 @@ protected function pathIsMatching() return self::DESCENDANTS_COULD_MATCH; } + protected function searchForOpenTag(XMLReader $r) + { + // search for open tag + while ($r->nodeType != XMLReader::ELEMENT) { + if (! $r->tryRead()) { return false; } + } + return true; + } + public function tryGotoNextIterationElement() { $r = $this->reader; @@ -130,21 +155,18 @@ public function tryGotoNextIterationElement() while (true) { // search for open tag - while ($r->nodeType != XMLReader::ELEMENT) { - if (! $r->tryRead()) { return false; } - } + if (! $this->searchForOpenTag($r)) { return false; } // fill crumbs array_splice($this->crumbs, $r->depth, count($this->crumbs), array($r->name)); $matching = $this->pathIsMatching(); - $uf = false; + $uf = self::ELEMENT_IS_VALID; if ($this->callback && is_callable($this->callback) - && !call_user_func_array($this->callback, [$r, $this->crumbs])) { - $uf = true; - $sd = count($this->searchCrumbs) - 1; - if ($sd == $r->depth) { $sd--; } + && ($uf = call_user_func_array($this->callback, [$r, $this->crumbs])) !== self::ELEMENT_IS_VALID) { + $df = $r->depth; + if ($uf === self::SIBLINGS_ARE_INVALID) { $df--; } $matching = self::DESCENDANTS_CANT_MATCH; } @@ -155,10 +177,13 @@ public function tryGotoNextIterationElement() continue 2; case self::DESCENDANTS_CANT_MATCH: + if (! $r->tryNext()) { return false; } - if ($uf) { - while ($r->depth > $sd) { + if ($uf !== self::ELEMENT_IS_VALID) { + if (! $this->searchForOpenTag($r)) { return false; } + while ($r->depth > $df) { if (! $r->tryNext()) { return false; } + if (! $this->searchForOpenTag($r)) { return false; } } } continue 2; diff --git a/tests/SimpleXmlReader/PathIteratorTest.php b/tests/SimpleXmlReader/PathIteratorTest.php index 8d3d60a..143398f 100644 --- a/tests/SimpleXmlReader/PathIteratorTest.php +++ b/tests/SimpleXmlReader/PathIteratorTest.php @@ -107,6 +107,7 @@ public function testInvalidXml3() iterator_to_array($iterator); } + /** @test */ public function testPathCbAttr1OuterXml() { @@ -115,10 +116,10 @@ public function testPathCbAttr1OuterXml() $path = implode("/", $crumbs); if ($path == "root/zoo") { if ($xr->getAttribute('city') != "Banghok") { - return false; + return PathIterator::ELEMENT_IS_INVALID; } } - return true; + return PathIterator::ELEMENT_IS_VALID; }))); $this->assertEquals('kakariki', preg_replace('/\s/', '', (string) $res)); } @@ -131,10 +132,10 @@ public function testPathCbAttr2OuterXml() $path = implode("/", $crumbs); if ($path == "root/zoo") { if ($xr->getAttribute('contenent') != "Europe") { - return false; + return PathIterator::ELEMENT_IS_INVALID; } } - return true; + return PathIterator::ELEMENT_IS_VALID; }))); $this->assertEquals('catbear', preg_replace('/\s/', '', (string) $res)); } @@ -147,12 +148,11 @@ public function testPathCbElemOuterXml() $path = implode("/", $crumbs); if ($path == "root/zoo/work") { if ($xr->readString() != "yes") { - return false; + return PathIterator::SIBLINGS_ARE_INVALID; } } - return true; + return PathIterator::ELEMENT_IS_VALID; }))); - //echo $res; $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res)); } @@ -164,16 +164,31 @@ public function testPathCbElemAttrOuterXml() $path = implode("/", $crumbs); if ($path == "root/zoo") { if ($xr->getAttribute('contenent') != "Europe") { - return false; + return PathIterator::ELEMENT_IS_INVALID; } } elseif ($path == "root/zoo/work") { if ($xr->readString() != "yes") { - return false; + return PathIterator::SIBLINGS_ARE_INVALID; } } - return true; + return PathIterator::ELEMENT_IS_VALID; }))); - //echo $res; $this->assertEquals('bear', preg_replace('/\s/', '', (string) $res)); } + + /** @test */ + public function testPathCb2ElemOuterXml() + { + $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb2.xml'); + $res = implode('', iterator_to_array($xml->path('root/group/zoos/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root/group/work") { + if ($xr->readString() != "yes") { + return PathIterator::SIBLINGS_ARE_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; + }))); + $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res)); + } } diff --git a/tests/SimpleXmlReader/testdata/cb2.xml b/tests/SimpleXmlReader/testdata/cb2.xml new file mode 100644 index 0000000..48c50be --- /dev/null +++ b/tests/SimpleXmlReader/testdata/cb2.xml @@ -0,0 +1,22 @@ + + + + no + + + cat + + + + + yes + + + kakariki + + + bear + + + + From 9429bc95b026230d45abe20e4a112097c870d69d Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Thu, 9 Mar 2017 10:54:41 +0700 Subject: [PATCH 4/8] extra check for sanity --- src/SimpleXmlReader/PathIterator.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/SimpleXmlReader/PathIterator.php b/src/SimpleXmlReader/PathIterator.php index 529cdbb..aab10d1 100644 --- a/src/SimpleXmlReader/PathIterator.php +++ b/src/SimpleXmlReader/PathIterator.php @@ -165,7 +165,14 @@ public function tryGotoNextIterationElement() $uf = self::ELEMENT_IS_VALID; if ($this->callback && is_callable($this->callback) && ($uf = call_user_func_array($this->callback, [$r, $this->crumbs])) !== self::ELEMENT_IS_VALID) { + + // extra check for sanity of a value returned by the user filter + if ($uf !== self::SIBLINGS_ARE_INVALID && $uf !== self::ELEMENT_IS_INVALID ) { + $uf = self::ELEMENT_IS_INVALID; + } + $df = $r->depth; + if ($uf === self::SIBLINGS_ARE_INVALID) { $df--; } $matching = self::DESCENDANTS_CANT_MATCH; } From f5483350bbfccf70ba7ff51d9cd7db7c3577e4f3 Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Thu, 10 Aug 2017 00:15:48 +0700 Subject: [PATCH 5/8] fix syntax error for php5.3 --- src/SimpleXmlReader/PathIterator.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SimpleXmlReader/PathIterator.php b/src/SimpleXmlReader/PathIterator.php index aab10d1..c06f470 100644 --- a/src/SimpleXmlReader/PathIterator.php +++ b/src/SimpleXmlReader/PathIterator.php @@ -164,7 +164,7 @@ public function tryGotoNextIterationElement() $uf = self::ELEMENT_IS_VALID; if ($this->callback && is_callable($this->callback) - && ($uf = call_user_func_array($this->callback, [$r, $this->crumbs])) !== self::ELEMENT_IS_VALID) { + && ($uf = call_user_func_array($this->callback, array($r, $this->crumbs))) !== self::ELEMENT_IS_VALID) { // extra check for sanity of a value returned by the user filter if ($uf !== self::SIBLINGS_ARE_INVALID && $uf !== self::ELEMENT_IS_INVALID ) { From a5ff9de3ab347f9f6ddc4053bb59423b75b0d7e1 Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Thu, 10 Aug 2017 00:30:48 +0700 Subject: [PATCH 6/8] change distro to precise --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 388e468..a85d616 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,3 +6,5 @@ php: - 5.4 - 5.6 - 7.0 +dist: precise + From 924bd9d5be356ae0c4b3333644b205da172ca7ae Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Fri, 11 Aug 2017 13:25:46 +0700 Subject: [PATCH 7/8] a bit more complex filter example --- examples/filter-example3.php | 76 ++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 examples/filter-example3.php diff --git a/examples/filter-example3.php b/examples/filter-example3.php new file mode 100644 index 0000000..9b5c242 --- /dev/null +++ b/examples/filter-example3.php @@ -0,0 +1,76 @@ + + + + + parrot + parrot is ... + + + falcon + falcon is ... + + + + + gazelle + gazelle is ... + + + lion + lion is ... + + + + + + + straus + straus is ... + + + eagle + eagle is ... + + + + + panda + panda is ... + + + tiger + tiger is ... + + + + +'); + +foreach ($xml->path('root/continent/*/creature', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) { + $path = implode("/", $crumbs); + if ($path == "root") { + if ($xr->getAttribute('id') != '123') { + return PathIterator::ELEMENT_IS_INVALID; + } + } elseif ($path == "root/continent") { + if ($xr->getAttribute('name') != 'Eurasia') { + return PathIterator::ELEMENT_IS_INVALID; + } + } elseif (preg_match(chr(1) . 'root/continent/[^/]+/creature' . chr(1), $path)) { + if ($xr->getAttribute('predator') != 'yes') { + return PathIterator::ELEMENT_IS_INVALID; + } + } + return PathIterator::ELEMENT_IS_VALID; +}) as $animal) { + echo "A {$animal->name} is predator! {$animal->memo}\n"; +} From 3934ebd6b5ed5ea9f761aa8090d99c5187c936d8 Mon Sep 17 00:00:00 2001 From: Eduard Chernikov Date: Thu, 14 Mar 2019 16:03:55 +0700 Subject: [PATCH 8/8] typo --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 12e8235..971f483 100644 --- a/README.md +++ b/README.md @@ -31,5 +31,6 @@ foreach($xml->path('root/animal') as $animal) { // $animal is of type SimpleXMLElelent // only the current iterated $animal is in memory, so huge xml files can be read, without much memory consumption echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n"; -} +} + ```