diff --git a/.travis.yml b/.travis.yml
index 388e468..a85d616 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,3 +6,5 @@ php:
- 5.4
- 5.6
- 7.0
+dist: precise
+
diff --git a/README.md b/README.md
index 12e8235..971f483 100644
--- a/README.md
+++ b/README.md
@@ -31,5 +31,6 @@ foreach($xml->path('root/animal') as $animal) {
// $animal is of type SimpleXMLElelent
// only the current iterated $animal is in memory, so huge xml files can be read, without much memory consumption
echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n";
-}
+}
+
```
diff --git a/examples/filter-example1.php b/examples/filter-example1.php
new file mode 100644
index 0000000..9ef9214
--- /dev/null
+++ b/examples/filter-example1.php
@@ -0,0 +1,34 @@
+
+
+ yes
+
+
+ yes
+
+
+ no
+
+
+');
+
+foreach ($xml->path('root/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/animal") {
+ if (! in_array($xr->getAttribute('type'), ['dog', 'kakariki'])) {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+}) as $animal) {
+ echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n";
+}
diff --git a/examples/filter-example2.php b/examples/filter-example2.php
new file mode 100644
index 0000000..e08ebf5
--- /dev/null
+++ b/examples/filter-example2.php
@@ -0,0 +1,40 @@
+
+
+ pet
+
+ yes
+
+
+ yes
+
+
+
+ wild
+
+ no
+
+
+
+');
+
+foreach ($xml->path('root/group/animal', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/group/type") {
+ if ($xr->readString() != 'pet') {
+ return PathIterator::SIBLINGS_ARE_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+}) as $animal) {
+ echo "A {$animal->attributes()->type} has a tail? {$animal->hastail}!\n";
+}
diff --git a/examples/filter-example3.php b/examples/filter-example3.php
new file mode 100644
index 0000000..9b5c242
--- /dev/null
+++ b/examples/filter-example3.php
@@ -0,0 +1,76 @@
+
+
+
+
+ parrot
+ parrot is ...
+
+
+ falcon
+ falcon is ...
+
+
+
+
+ gazelle
+ gazelle is ...
+
+
+ lion
+ lion is ...
+
+
+
+
+
+
+ straus
+ straus is ...
+
+
+ eagle
+ eagle is ...
+
+
+
+
+ panda
+ panda is ...
+
+
+ tiger
+ tiger is ...
+
+
+
+
+');
+
+foreach ($xml->path('root/continent/*/creature', SimpleXmlReader::RETURN_SIMPLE_XML, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root") {
+ if ($xr->getAttribute('id') != '123') {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ } elseif ($path == "root/continent") {
+ if ($xr->getAttribute('name') != 'Eurasia') {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ } elseif (preg_match(chr(1) . 'root/continent/[^/]+/creature' . chr(1), $path)) {
+ if ($xr->getAttribute('predator') != 'yes') {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+}) as $animal) {
+ echo "A {$animal->name} is predator! {$animal->memo}\n";
+}
diff --git a/src/SimpleXmlReader/PathIterator.php b/src/SimpleXmlReader/PathIterator.php
index 9dd833e..c06f470 100644
--- a/src/SimpleXmlReader/PathIterator.php
+++ b/src/SimpleXmlReader/PathIterator.php
@@ -12,6 +12,22 @@ class PathIterator implements Iterator
const DESCENDANTS_COULD_MATCH = 'DESCENDANTS_COULD_MATCH';
const DESCENDANTS_CANT_MATCH = 'DESCENDANTS_CANT_MATCH';
+ /*
+ * The list of return codes for filtering callback function
+ */
+ /*
+ * Valid elem, no filtering.
+ */
+ const ELEMENT_IS_VALID = 1; // elem
+ /*
+ * Invalid elem and its descendants, so have to be filtered out.
+ */
+ const ELEMENT_IS_INVALID = 2;
+ /*
+ * The same as `ELEMENT_IS_INVALID`. Additionaly after it sibling elems(and its descendants) have to be filtered out too.
+ */
+ const SIBLINGS_ARE_INVALID = 3;
+
protected $reader;
protected $searchPath;
protected $searchCrumbs;
@@ -21,7 +37,12 @@ class PathIterator implements Iterator
protected $isValid;
protected $returnType;
- public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType)
+ /*
+ * Filtering callback function
+ */
+ protected $callback;
+
+ public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnType, $callback = null)
{
$this->reader = $reader;
$this->searchPath = $path;
@@ -31,6 +52,7 @@ public function __construct(ExceptionThrowingXMLReader $reader, $path, $returnTy
$this->rewindCount = 0;
$this->isValid = false;
$this->returnType = $returnType;
+ $this->callback = $callback;
}
public function current()
@@ -110,6 +132,15 @@ protected function pathIsMatching()
return self::DESCENDANTS_COULD_MATCH;
}
+ protected function searchForOpenTag(XMLReader $r)
+ {
+ // search for open tag
+ while ($r->nodeType != XMLReader::ELEMENT) {
+ if (! $r->tryRead()) { return false; }
+ }
+ return true;
+ }
+
public function tryGotoNextIterationElement()
{
$r = $this->reader;
@@ -124,21 +155,44 @@ public function tryGotoNextIterationElement()
while (true) {
// search for open tag
- while ($r->nodeType != XMLReader::ELEMENT) {
- if (! $r->tryRead()) { return false; }
- }
+ if (! $this->searchForOpenTag($r)) { return false; }
// fill crumbs
array_splice($this->crumbs, $r->depth, count($this->crumbs), array($r->name));
- switch ($this->pathIsMatching()) {
+ $matching = $this->pathIsMatching();
+
+ $uf = self::ELEMENT_IS_VALID;
+ if ($this->callback && is_callable($this->callback)
+ && ($uf = call_user_func_array($this->callback, array($r, $this->crumbs))) !== self::ELEMENT_IS_VALID) {
+
+ // extra check for sanity of a value returned by the user filter
+ if ($uf !== self::SIBLINGS_ARE_INVALID && $uf !== self::ELEMENT_IS_INVALID ) {
+ $uf = self::ELEMENT_IS_INVALID;
+ }
+
+ $df = $r->depth;
+
+ if ($uf === self::SIBLINGS_ARE_INVALID) { $df--; }
+ $matching = self::DESCENDANTS_CANT_MATCH;
+ }
+
+ switch ($matching) {
case self::DESCENDANTS_COULD_MATCH:
if (! $r->tryRead()) { return false; }
continue 2;
case self::DESCENDANTS_CANT_MATCH:
+
if (! $r->tryNext()) { return false; }
+ if ($uf !== self::ELEMENT_IS_VALID) {
+ if (! $this->searchForOpenTag($r)) { return false; }
+ while ($r->depth > $df) {
+ if (! $r->tryNext()) { return false; }
+ if (! $this->searchForOpenTag($r)) { return false; }
+ }
+ }
continue 2;
case self::IS_MATCH:
diff --git a/src/SimpleXmlReader/SimpleXmlReader.php b/src/SimpleXmlReader/SimpleXmlReader.php
index c3d31a0..53952ed 100644
--- a/src/SimpleXmlReader/SimpleXmlReader.php
+++ b/src/SimpleXmlReader/SimpleXmlReader.php
@@ -44,8 +44,8 @@ public static function openFromString($source, $encoding = 'UTF-8', $options = 0
return $simpleXmlReader;
}
- public function path($path, $returnType = self::RETURN_SIMPLE_XML)
+ public function path($path, $returnType = self::RETURN_SIMPLE_XML, $callback = null)
{
- return new PathIterator($this->xmlReader, $path, $returnType);
+ return new PathIterator($this->xmlReader, $path, $returnType, $callback);
}
}
diff --git a/tests/SimpleXmlReader/PathIteratorTest.php b/tests/SimpleXmlReader/PathIteratorTest.php
index 3012e8b..143398f 100644
--- a/tests/SimpleXmlReader/PathIteratorTest.php
+++ b/tests/SimpleXmlReader/PathIteratorTest.php
@@ -107,4 +107,88 @@ public function testInvalidXml3()
iterator_to_array($iterator);
}
+
+ /** @test */
+ public function testPathCbAttr1OuterXml()
+ {
+ $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
+ $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/zoo") {
+ if ($xr->getAttribute('city') != "Banghok") {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+ })));
+ $this->assertEquals('kakariki', preg_replace('/\s/', '', (string) $res));
+ }
+
+ /** @test */
+ public function testPathCbAttr2OuterXml()
+ {
+ $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
+ $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/zoo") {
+ if ($xr->getAttribute('contenent') != "Europe") {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+ })));
+ $this->assertEquals('catbear', preg_replace('/\s/', '', (string) $res));
+ }
+
+ /** @test */
+ public function testPathCbElemOuterXml()
+ {
+ $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
+ $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/zoo/work") {
+ if ($xr->readString() != "yes") {
+ return PathIterator::SIBLINGS_ARE_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+ })));
+ $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res));
+ }
+
+ /** @test */
+ public function testPathCbElemAttrOuterXml()
+ {
+ $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb.xml');
+ $res = implode('', iterator_to_array($xml->path('root/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/zoo") {
+ if ($xr->getAttribute('contenent') != "Europe") {
+ return PathIterator::ELEMENT_IS_INVALID;
+ }
+ } elseif ($path == "root/zoo/work") {
+ if ($xr->readString() != "yes") {
+ return PathIterator::SIBLINGS_ARE_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+ })));
+ $this->assertEquals('bear', preg_replace('/\s/', '', (string) $res));
+ }
+
+ /** @test */
+ public function testPathCb2ElemOuterXml()
+ {
+ $xml = SimpleXmlReader::openXML(__DIR__ . '/testdata/cb2.xml');
+ $res = implode('', iterator_to_array($xml->path('root/group/zoos/zoo/animal', SimpleXmlReader::RETURN_OUTER_XML_STRING, function ($xr, $crumbs) {
+ $path = implode("/", $crumbs);
+ if ($path == "root/group/work") {
+ if ($xr->readString() != "yes") {
+ return PathIterator::SIBLINGS_ARE_INVALID;
+ }
+ }
+ return PathIterator::ELEMENT_IS_VALID;
+ })));
+ $this->assertEquals('kakarikibear', preg_replace('/\s/', '', (string) $res));
+ }
}
diff --git a/tests/SimpleXmlReader/testdata/cb.xml b/tests/SimpleXmlReader/testdata/cb.xml
new file mode 100644
index 0000000..f434d38
--- /dev/null
+++ b/tests/SimpleXmlReader/testdata/cb.xml
@@ -0,0 +1,15 @@
+
+
+
+ no
+ cat
+
+
+ yes
+ kakariki
+
+
+ yes
+ bear
+
+
diff --git a/tests/SimpleXmlReader/testdata/cb2.xml b/tests/SimpleXmlReader/testdata/cb2.xml
new file mode 100644
index 0000000..48c50be
--- /dev/null
+++ b/tests/SimpleXmlReader/testdata/cb2.xml
@@ -0,0 +1,22 @@
+
+
+
+ no
+
+
+ cat
+
+
+
+
+ yes
+
+
+ kakariki
+
+
+ bear
+
+
+
+