@@ -517,7 +517,11 @@ public class HTMLScanner implements XMLDocumentSource, XMLLocator, HTMLComponent
517517 * each use to ensure correctness.
518518 * <p>
519519 * Thread safety: Safe because scanner instances are single-threaded.
520- */ final boolean [] fSingleBoolean = {false };
520+ */
521+ final boolean [] fSingleBoolean = {false };
522+
523+ /** Reusable parser for numeric character references (&#x...; and &#...;) */
524+ private final HTMLUnicodeEntitiesParser fUnicodeEntitiesParser = new HTMLUnicodeEntitiesParser ();
521525
522526 final HTMLConfiguration htmlConfiguration_ ;
523527
@@ -1343,6 +1347,7 @@ else if (NAMES_LOWERCASE == mode && !Character.isLowerCase(c)) {
13431347 break ;
13441348 }
13451349 }
1350+
13461351 final int length = fCurrentEntity .offset_ - offset ;
13471352 final String name = length > 0 ? new String (fCurrentEntity .buffer_ , offset , length ) : null ;
13481353 if (DEBUG_BUFFER ) {
@@ -1418,7 +1423,6 @@ else if (NAMES_LOWERCASE == fNamesElems && !Character.isLowerCase(c)) {
14181423 if (DEBUG_BUFFER ) {
14191424 fCurrentEntity .debugBufferIfNeeded (")scanName: " , " -> \" " + name + '"' );
14201425 }
1421-
14221426 return name ;
14231427 }
14241428
@@ -1439,17 +1443,17 @@ protected int scanEntityRef(final XMLString str, final XMLString plainValue, fin
14391443 str .append ((char ) nextChar );
14401444
14411445 if ('#' == nextChar ) {
1442- final HTMLUnicodeEntitiesParser parser = new HTMLUnicodeEntitiesParser ();
1446+ fUnicodeEntitiesParser . reset ();
14431447
14441448 do {
14451449 nextChar = fCurrentEntity .readPreservingBufferContent ();
14461450 if (nextChar != -1 ) {
14471451 str .append ((char ) nextChar );
14481452 }
14491453 }
1450- while (nextChar != -1 && parser .parseNumeric (nextChar ));
1454+ while (nextChar != -1 && fUnicodeEntitiesParser .parseNumeric (nextChar ));
14511455
1452- final String match = parser .getMatch ();
1456+ final String match = fUnicodeEntitiesParser .getMatch ();
14531457 if (match == null ) {
14541458 fCurrentEntity .rewind (str .length () - 1 );
14551459 if (plainValue != null ) {
@@ -1458,7 +1462,7 @@ protected int scanEntityRef(final XMLString str, final XMLString plainValue, fin
14581462 str .clearAndAppend ('&' );
14591463 }
14601464 else {
1461- fCurrentEntity .rewind (parser .getRewindCount ());
1465+ fCurrentEntity .rewind (fUnicodeEntitiesParser .getRewindCount ());
14621466 if (plainValue != null ) {
14631467 plainValue .append (str );
14641468 }
@@ -2312,16 +2316,16 @@ else if (c == '/') {
23122316 else if (!fAllowSelfclosingTags_
23132317 && !fAllowSelfclosingIframe_
23142318 && "iframe" .equals (enameLC )) {
2315- scanUntilEndTag ("iframe" );
2319+ scanUntilEndTag ("/ iframe" );
23162320 }
23172321 else if (!fParseNoScriptContent_ && "noscript" .equals (enameLC )) {
2318- scanUntilEndTag ("noscript" );
2322+ scanUntilEndTag ("/ noscript" );
23192323 }
23202324 else if ("noframes" .equals (enameLC )) {
2321- scanUntilEndTag ("noframes" );
2325+ scanUntilEndTag ("/ noframes" );
23222326 }
23232327 else if ("noembed" .equals (enameLC )) {
2324- scanUntilEndTag ("noembed" );
2328+ scanUntilEndTag ("/ noembed" );
23252329 }
23262330 // title inside svg
23272331 else if ("title" .equals (enameLC )
@@ -2400,15 +2404,14 @@ private void eof() {
24002404 * plain text when feature {@link HTMLScanner#PARSE_NOSCRIPT_CONTENT} is set to
24012405 * false.
24022406 *
2403- * @param tagName the tag for which content is scanned (one of "noscript",
2404- * "noframes", "iframe")
2407+ * @param tagName the tag for which content is scanned (one of "/ noscript",
2408+ * "/ noframes", "/noembed", "/ iframe")
24052409 * @throws IOException on error
24062410 */
2407- private void scanUntilEndTag (final String tagName ) throws IOException {
2411+ private void scanUntilEndTag (final String tagNameWithLeadingSlash ) throws IOException {
24082412 fScanUntilEndTag .clear ();
24092413
2410- final String end = "/" + tagName ;
2411- final int lengthToScan = tagName .length () + 2 ;
2414+ final int lengthToScan = tagNameWithLeadingSlash .length () + 1 ;
24122415
24132416 while (true ) {
24142417 final int c = fCurrentEntity .read ();
@@ -2418,7 +2421,8 @@ private void scanUntilEndTag(final String tagName) throws IOException {
24182421 if (c == '<' ) {
24192422 final String next = fCurrentEntity .nextContent (lengthToScan ) + " " ;
24202423 if (next .length () >= lengthToScan
2421- && end .equalsIgnoreCase (next .substring (0 , end .length ()))
2424+ && tagNameWithLeadingSlash .equalsIgnoreCase (
2425+ next .substring (0 , tagNameWithLeadingSlash .length ()))
24222426 && ('>' == next .charAt (lengthToScan - 1 )
24232427 || Character .isWhitespace (next .charAt (lengthToScan - 1 )))) {
24242428 fCurrentEntity .rewind ();
0 commit comments