/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.charfilter;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.lucene.util.TestUtil;

public class HTMLStripCharFilterTest
extends BaseTokenStreamTestCase {
    private static Analyzer newTestAnalyzer() {
        return new Analyzer(){

            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
                MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new Analyzer.TokenStreamComponents((Tokenizer)tokenizer, (TokenStream)tokenizer);
            }

            protected Reader initReader(String fieldName, Reader reader) {
                return new HTMLStripCharFilter(reader);
            }
        };
    }

    public void test() throws Exception {
        String html = "<div class=\"foo\">this is some text</div> here is a <a href=\"#bar\">link</a> and another <a href=\"http://lucene.apache.org/\">link</a>. This is an entity: &amp; plus a &lt;.  Here is an &. <!-- is a comment -->";
        String gold = "\nthis is some text\n here is a link and another link. This is an entity: & plus a <.  Here is an &. ";
        HTMLStripCharFilterTest.assertHTMLStripsTo(html, gold, null);
    }

    public void testHTML() throws Exception {
        InputStream stream = ((Object)((Object)this)).getClass().getResourceAsStream("htmlStripReaderTest.html");
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new InputStreamReader(stream, StandardCharsets.UTF_8));
        StringBuilder builder = new StringBuilder();
        int ch = -1;
        while ((ch = reader.read()) != -1) {
            builder.append((char)ch);
        }
        String str = builder.toString();
        HTMLStripCharFilterTest.assertTrue((String)"Entity not properly escaped", (str.indexOf("&lt;") == -1 ? 1 : 0) != 0);
        HTMLStripCharFilterTest.assertTrue((String)"Forrest should have been stripped out", (str.indexOf("forrest") == -1 && str.indexOf("Forrest") == -1 ? 1 : 0) != 0);
        HTMLStripCharFilterTest.assertTrue((String)"File should start with 'Welcome to Solr' after trimming", (boolean)str.trim().startsWith("Welcome to Solr"));
        HTMLStripCharFilterTest.assertTrue((String)"File should start with 'Foundation.' after trimming", (boolean)str.trim().endsWith("Foundation."));
    }

    public void testMSWord14GeneratedHTML() throws Exception {
        InputStream stream = ((Object)((Object)this)).getClass().getResourceAsStream("MS-Word 14 generated.htm");
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new InputStreamReader(stream, StandardCharsets.UTF_8));
        String gold = "This is a test";
        StringBuilder builder = new StringBuilder();
        int ch = 0;
        while ((ch = reader.read()) != -1) {
            builder.append((char)ch);
        }
        HTMLStripCharFilterTest.assertEquals((String)("'" + builder.toString().trim() + "' is not equal to '" + gold + "'"), (Object)gold, (Object)builder.toString().trim());
    }

    public void testGamma() throws Exception {
        HTMLStripCharFilterTest.assertHTMLStripsTo("&Gamma;", "\u0393", new HashSet<String>(Arrays.asList("reserved")));
    }

    public void testEntities() throws Exception {
        String test = "&nbsp; &lt;foo&gt; &Uuml;bermensch &#61; &Gamma; bar &#x393;";
        String gold = "  <foo> \u00dcbermensch = \u0393 bar \u0393";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, new HashSet<String>(Arrays.asList("reserved")));
    }

    public void testMoreEntities() throws Exception {
        String test = "&nbsp; &lt;junk/&gt; &nbsp; &#33; &#64; and &#8217;";
        String gold = "  <junk/>   ! @ and \u2019";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, new HashSet<String>(Arrays.asList("reserved")));
    }

    public void testReserved() throws Exception {
        String test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
        HashSet<String> set = new HashSet<String>();
        set.add("reserved");
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new StringReader(test), set);
        StringBuilder builder = new StringBuilder();
        int ch = 0;
        while ((ch = reader.read()) != -1) {
            builder.append((char)ch);
        }
        String result = builder.toString();
        HTMLStripCharFilterTest.assertTrue((String)("Escaped tag not preserved: " + result.indexOf("reserved")), (result.indexOf("reserved") == 9 ? 1 : 0) != 0);
        HTMLStripCharFilterTest.assertTrue((String)("Escaped tag not preserved: " + result.indexOf("reserved", 15)), (result.indexOf("reserved", 15) == 38 ? 1 : 0) != 0);
        HTMLStripCharFilterTest.assertTrue((String)("Escaped tag not preserved: " + result.indexOf("reserved", 41)), (result.indexOf("reserved", 41) == 54 ? 1 : 0) != 0);
        HTMLStripCharFilterTest.assertTrue((String)"Other tag should be removed", (result.indexOf("other") == -1 ? 1 : 0) != 0);
    }

    public void testMalformedHTML() throws Exception {
        String[] testGold = new String[]{"a <a hr<ef=aa<a>> </close</a>", "a <a hr<ef=aa> </close", "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\" class=lu style=\"font-size: 9px\" target=dmoz>Submit a Site</a>", "Submit a Site", "<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand id=e8 class=expanded rel=http://www.csmonitor.com/>Christian Science", "Christian Science", "<link rel=\"alternate\" type=\"application/rss+xml\" title=\"San Francisco \" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\" />", "\n", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\"http://ucblibraries.colorado.edu/how/index.htm\"class=\"pageNavAreaText\">", "", "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\"  type=\"application/opensearchdescription+xml\"  href=\"http://21sta.com/blog/inc/opensearch.php\" />", "\n", "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?", "?", "<a href='/modern-furniture'   ' id='21txt' class='offtab'   onMouseout=\"this.className='offtab';  return true;\" onMouseover=\"this.className='ontab';  return true;\">", "", "<a href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size: 275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>", "", "The <a href=<a href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a> practice software</a>", "The <a href=medical\">http://www.advancedmd.com>medical practice software", "<a href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008 Clip of the Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week 29...", "Levi.com/BMX 2008 Clip of the Week 29...", "<a href=\"printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer Friendly", "Printer Friendly", "<a href=#\" ondragstart=\"return false\" onclick=\"window.external.AddFavorite('http://www.amazingtextures.com', 'Amazing Textures');return false\" onmouseover=\"window.status='Add to Favorites';return true\">Add to Favorites", "Add to Favorites", "<a href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At", "At", "E-mail: <a href=\"\"mailto:XXXXXX@example.com\" \">XXXXXX@example.com </a>", "E-mail: XXXXXX@example.com ", "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\" href=\"http://www.america.gov/persian\" alt=\"\" name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>", "\nA'13?\n", "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert \"Geese\" Ausby</a></li>", "\nHubert \"Geese\" Ausby\n", "<href=\"http://anbportal.com/mms/login.asp\">", "\n", "<a href=\"", "<a href=\"", "<a href=\">", "", "<a rel=\"nofollow\" href=\"http://anissanina31.skyrock.com/1895039493-Hi-tout-le-monde.html\" title=\" Hi, tout le monde !>#</a>", "#", "<a href=\"http://annunciharleydavidsonusate.myblog.it/\" title=\"Annunci Moto e Accessori Harley Davidson\" target=\"_blank\"><img src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\" /></a>", "", "<a href=\"video/addvideo&v=120838887181\" onClick=\"return confirm('Are you sure you want  add this video to your profile? If it exists some video in your profile will be overlapped by this video!!')\" \" onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">", "", "<a href=#Services & Support>", "", "<input type=\"image\" src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\" name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + document.getElementById('sel').value ; var dt ='&dt=' + document.getElementById('dt').value; var searchKeyword = document.getElementById('q').value ; searchKeyword = searchKeyword.replace(/\\s/g,''); if (searchKeyword.length < 3){alert('Nothing to search. Search keyword should contain atleast 3 chars.'); return false; } var al='&al=' +  document.getElementById('advancedlink').style.display ;  document.location.href='http://apologyindex.com/search.aspx?q=' + document.getElementById('q').value + incom + dt + al;\" />", "", "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\" height=\"22\"  hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){ window.sidebar.addPanel(document.title,location.href,''); }else if(window.external){ window.external.AddFavorite(location.href,document.title); }else if(window.opera&&window.print) { return true; }\">", "", "<area shape=\"rect\" coords=\"12,153,115,305\" href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's Norwegian Roots in Rogaland\">", "\n", "<a rel=\"nofollow\" href=\"http://arth26.skyrock.com/660188240-bonzai.html\" title=\"bonza>#", "#", "<a href=  >", "", "<ahref=http:..", "<ahref=http:..", "<ahref=http:..>", "\n", "<ahref=\"http://aseigo.bddf.ca/cms/1025\">A", "\nA", "<a href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">", "", "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\" Rackmounts\">", "", "<a href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html title=savage model 110 manual rel=dofollow>", "", "<a class=\"at\" name=\"Lamborghini  href=\"http://lamborghini.coolbegin.com\">Lamborghini /a>", "Lamborghini /a>", "<A href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on eye black for championship game' TARGET=_blank>", "", "<a href=/myspace !style='color:#993333'>", "", "<meta name=3DProgId content=3DExcel.Sheet>", "\n", "<link id=3D\"shLink\" href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">", "\n", "<td bgcolor=3D\"#FFFFFF\" nowrap>", "\n", "<a href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\">\"predicciones mundiales 2009\"</a>", "\"predicciones mundiales 2009\"", "<a class=\"comment-link\" href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>", "", "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\" videos\">Bishop\"</a>", "Bishop\"", "<a href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\">BHAA Eircom 2 &amp; 5 miles CC combined start</a>", "BHAA Eircom 2 & 5 miles CC combined start", "<a href=\"http://people.tribe.net/wolfmana\" onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\" title=\"Mana\">", "", "<a  href=\"http://blog.edu-cyberpg.com/ct.ashx?id=6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\" eudora=\"autourl\">", "", "<input type=\"text\" value=\"<search here>\">", "<input type=\"text\" value=\"\n\">", "<input type=\"text\" value=\"<search here\">", "<input type=\"text\" value=\"\n", "<input type=\"text\" value=\"search here>\">", "\">", "<input type=\"text\" value=\"&lt;search here&gt;\" onFocus=\"this.value='<search here>'\">", "", "<![if ! IE]>\n<link href=\"http://i.deviantart.com/icons/favicon.png\" rel=\"shortcut icon\"/>\n<![endif]>", "\n\n\n", "<![if supportMisalignedColumns]>\n<tr height=0 style='display:none'>\n<td width=64 style='width:48pt'></td>\n</tr>\n<![endif]>", "\n\n\n\n\n\n\n\n"};
        for (int i = 0; i < testGold.length; i += 2) {
            HTMLStripCharFilterTest.assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
        }
    }

    public void testBufferOverflow() throws Exception {
        StringBuilder testBuilder = new StringBuilder(HTMLStripCharFilter.getInitialBufferSize() + 50);
        testBuilder.append("ah<?> ??????");
        this.appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new BufferedReader(new StringReader(testBuilder.toString())));
        HTMLStripCharFilterTest.assertHTMLStripsTo((Reader)reader, testBuilder.toString(), null);
        testBuilder.setLength(0);
        testBuilder.append("<!--");
        this.appendChars(testBuilder, 3 * HTMLStripCharFilter.getInitialBufferSize() + 500);
        testBuilder.append("-->foo");
        String gold = "foo";
        HTMLStripCharFilterTest.assertHTMLStripsTo(testBuilder.toString(), gold, null);
        testBuilder.setLength(0);
        testBuilder.append("<?");
        this.appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
        testBuilder.append("?>");
        gold = "";
        HTMLStripCharFilterTest.assertHTMLStripsTo(testBuilder.toString(), gold, null);
        testBuilder.setLength(0);
        testBuilder.append("<b ");
        this.appendChars(testBuilder, HTMLStripCharFilter.getInitialBufferSize() + 500);
        testBuilder.append("/>");
        gold = "";
        HTMLStripCharFilterTest.assertHTMLStripsTo(testBuilder.toString(), gold, null);
    }

    private void appendChars(StringBuilder testBuilder, int numChars) {
        int i1 = numChars / 2;
        for (int i = 0; i < i1; ++i) {
            testBuilder.append('a').append(' ');
        }
    }

    public void testComment() throws Exception {
        String test = "<!--- three dashes, still a valid comment ---> ";
        String gold = " ";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
        test = "<! -- blah > ";
        gold = " ";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
        StringBuilder testBuilder = new StringBuilder("<!--");
        this.appendChars(testBuilder, TestUtil.nextInt((Random)HTMLStripCharFilterTest.random(), (int)0, (int)1000));
        gold = "";
        HTMLStripCharFilterTest.assertHTMLStripsTo(testBuilder.toString(), gold, null);
    }

    public void doTestOffsets(String in) throws Exception {
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new BufferedReader(new StringReader(in)));
        int ch = 0;
        int off = 0;
        int strOff = -1;
        while ((ch = reader.read()) != -1) {
            int correctedOff = reader.correctOffset(off);
            if (ch == 88) {
                strOff = in.indexOf(88, strOff + 1);
                HTMLStripCharFilterTest.assertEquals((long)strOff, (long)correctedOff);
            }
            ++off;
        }
    }

    public void testOffsets() throws Exception {
        this.doTestOffsets("hello <p> X<p> how <p>X are you");
        this.doTestOffsets("X &amp; X &#40; X &lt; &gt; X");
        this.doTestOffsets("X < &zz >X &# < X > < &l > &g < X");
    }

    static void assertLegalOffsets(String in) throws Exception {
        int length = in.length();
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new BufferedReader(new StringReader(in)));
        int ch = 0;
        int off = 0;
        while ((ch = reader.read()) != -1) {
            int correction = reader.correctOffset(off);
            HTMLStripCharFilterTest.assertTrue((String)("invalid offset correction: " + off + "->" + correction + " for doc of length: " + length), (correction <= length ? 1 : 0) != 0);
            ++off;
        }
    }

    public void testLegalOffsets() throws Exception {
        HTMLStripCharFilterTest.assertLegalOffsets("hello world");
        HTMLStripCharFilterTest.assertLegalOffsets("hello &#x world");
    }

    public void testRandom() throws Exception {
        int numRounds = RANDOM_MULTIPLIER * 1000;
        Analyzer a = HTMLStripCharFilterTest.newTestAnalyzer();
        HTMLStripCharFilterTest.checkRandomData((Random)HTMLStripCharFilterTest.random(), (Analyzer)a, (int)numRounds);
        a.close();
    }

    public void testRandomHugeStrings() throws Exception {
        int numRounds = RANDOM_MULTIPLIER * 100;
        Analyzer a = HTMLStripCharFilterTest.newTestAnalyzer();
        HTMLStripCharFilterTest.checkRandomData((Random)HTMLStripCharFilterTest.random(), (Analyzer)a, (int)numRounds, (int)8192);
        a.close();
    }

    public void testCloseBR() throws Exception {
        Analyzer a = HTMLStripCharFilterTest.newTestAnalyzer();
        HTMLStripCharFilterTest.checkAnalysisConsistency((Random)HTMLStripCharFilterTest.random(), (Analyzer)a, (boolean)HTMLStripCharFilterTest.random().nextBoolean(), (String)" Secretary)</br> [[M");
        a.close();
    }

    public void testServerSideIncludes() throws Exception {
        String test = "one<img src=\"image.png\"\n alt =  \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
        String gold = "onetwo";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
        test = "one<script><!-- <!--#config comment=\"<!-- \\\"comment\\\"-->\"--> --></script>two";
        gold = "one\ntwo";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testScriptQuotes() throws Exception {
        String test = "one<script attr= bare><!-- action('<!-- comment -->', \"\\\"-->\\\"\"); --></script>two";
        String gold = "one\ntwo";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
        test = "hello<script><!-- f('<!--internal--></script>'); --></script>";
        gold = "hello\n";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testEscapeScript() throws Exception {
        String test = "one<script no-value-attr>callSomeMethod();</script>two";
        String gold = "one<script no-value-attr></script>two";
        HashSet<String> escapedTags = new HashSet<String>(Arrays.asList("SCRIPT"));
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, escapedTags);
    }

    public void testStyle() throws Exception {
        String test = "one<style type=\"text/css\">\n<!--\n@import url('http://www.lasletrasdecanciones.com/css.css');\n-->\n</style>two";
        String gold = "one\ntwo";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testEscapeStyle() throws Exception {
        String test = "one<style type=\"text/css\"> body,font,a { font-family:arial; } </style>two";
        String gold = "one<style type=\"text/css\"></style>two";
        HashSet<String> escapedTags = new HashSet<String>(Arrays.asList("STYLE"));
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, escapedTags);
    }

    public void testBR() throws Exception {
        String[] testGold = new String[]{"one<BR />two<br>three", "one\ntwo\nthree", "one<BR some stuff here too>two</BR>", "one\ntwo\n"};
        for (int i = 0; i < testGold.length; i += 2) {
            HTMLStripCharFilterTest.assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
        }
    }

    public void testEscapeBR() throws Exception {
        String test = "one<BR class='whatever'>two</\nBR\n>";
        String gold = "one<BR class='whatever'>two</\nBR\n>";
        HashSet<String> escapedTags = new HashSet<String>(Arrays.asList("BR"));
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, escapedTags);
    }

    public void testInlineTagsNoSpace() throws Exception {
        String test = "one<sPAn class=\"invisible\">two<sup>2<sup>e</sup></sup>.</SpaN>three";
        String gold = "onetwo2e.three";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testCDATA() throws Exception {
        int maxNumElems = 100;
        String randomHtmlishString1 = TestUtil.randomHtmlishString((Random)HTMLStripCharFilterTest.random(), (int)maxNumElems).replaceAll(">", " ").replaceFirst("^--", "__");
        String closedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[&]]>";
        String randomHtmlishString2 = TestUtil.randomHtmlishString((Random)HTMLStripCharFilterTest.random(), (int)maxNumElems).replaceAll(">", " ").replaceFirst("^--", "__");
        String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[";
        String[] testGold = new String[]{"one<![CDATA[<one><two>three<four></four></two></one>]]>two", "one<one><two>three<four></four></two></one>two", "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five", "onetwo<![CDATA[three]]>fourfive", "<! [CDATA[&]]>", "", "<! [CDATA[&] ] >", "", "<! [CDATA[&]]", "<! [CDATA[&]]", "<!\u2009[CDATA[&]]>", "", "<!\u2009[CDATA[&]\u2009]\u2009>", "", "<!\u2009[CDATA[&]\u2009]\u2009", "<!\u2009[CDATA[&]\u2009]\u2009", closedAngleBangNonCDATA, "", "<![CDATA[", "", "<![CDATA[<br>", "<br>", "<![CDATA[<br>]]", "<br>]]", "<![CDATA[<br>]]>", "<br>", "<![CDATA[<br>] ] >", "<br>] ] >", "<![CDATA[<br>]\u2009]\u2009>", "<br>]\u2009]\u2009>", "<!\u2009[CDATA[", "<!\u2009[CDATA[", unclosedAngleBangNonCDATA, unclosedAngleBangNonCDATA};
        for (int i = 0; i < testGold.length; i += 2) {
            HTMLStripCharFilterTest.assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
        }
    }

    public void testUnclosedAngleBang() throws Exception {
        HTMLStripCharFilterTest.assertHTMLStripsTo("<![endif]", "<![endif]", null);
    }

    public void testUppercaseCharacterEntityVariants() throws Exception {
        String test = " &QUOT;-&COPY;&GT;>&LT;<&REG;&AMP;";
        String gold = " \"-\u00a9>><<\u00ae&";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testMSWordMalformedProcessingInstruction() throws Exception {
        String test = "one<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />two";
        String gold = "onetwo";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testSupplementaryCharsInTags() throws Exception {
        String test = "one<\ud866\udf05\u8271\u935f\u41f9\u612f\u701b>two<\u701b\u612f\ud866\udf05>three \u701b\u612f\ud866\udf05</\u701b\u612f\ud866\udf05>four</\ud866\udf05\u8271\u935f\u41f9\u612f\u701b>five<\ud840\udc00\ud840\udc00>six<\ud840\udc00\ud840\udc00/>seven";
        String gold = "one\ntwo\nthree \u701b\u612f\ud866\udf05\nfour\nfive\nsix\nseven";
        HTMLStripCharFilterTest.assertHTMLStripsTo(test, gold, null);
    }

    public void testRandomBrokenHTML() throws Exception {
        int maxNumElements = 10000;
        String text = TestUtil.randomHtmlishString((Random)HTMLStripCharFilterTest.random(), (int)maxNumElements);
        Analyzer a = HTMLStripCharFilterTest.newTestAnalyzer();
        HTMLStripCharFilterTest.checkAnalysisConsistency((Random)HTMLStripCharFilterTest.random(), (Analyzer)a, (boolean)HTMLStripCharFilterTest.random().nextBoolean(), (String)text);
        a.close();
    }

    public void testRandomText() throws Exception {
        StringBuilder text = new StringBuilder();
        int minNumWords = 10;
        int maxNumWords = 10000;
        int minWordLength = 3;
        int maxWordLength = 20;
        int numWords = TestUtil.nextInt((Random)HTMLStripCharFilterTest.random(), (int)minNumWords, (int)maxNumWords);
        switch (TestUtil.nextInt((Random)HTMLStripCharFilterTest.random(), (int)0, (int)4)) {
            case 0: {
                int wordNum;
                for (wordNum = 0; wordNum < numWords; ++wordNum) {
                    text.append(TestUtil.randomUnicodeString((Random)HTMLStripCharFilterTest.random(), (int)maxWordLength));
                    text.append(' ');
                }
                break;
            }
            case 1: {
                int wordNum;
                for (wordNum = 0; wordNum < numWords; ++wordNum) {
                    text.append(TestUtil.randomRealisticUnicodeString((Random)HTMLStripCharFilterTest.random(), (int)minWordLength, (int)maxWordLength));
                    text.append(' ');
                }
                break;
            }
            default: {
                int wordNum;
                for (wordNum = 0; wordNum < numWords; ++wordNum) {
                    text.append(TestUtil.randomSimpleString((Random)HTMLStripCharFilterTest.random()));
                    text.append(' ');
                }
            }
        }
        HTMLStripCharFilter reader = new HTMLStripCharFilter((Reader)new StringReader(text.toString()));
        while (reader.read() != -1) {
        }
    }

    public void testUTF16Surrogates() throws Exception {
        Analyzer analyzer = HTMLStripCharFilterTest.newTestAnalyzer();
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" one two &#xD86C;&#XdC01;three", (String[])new String[]{"one", "two", "\ud86c\udc01three"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55404;&#XdC01;", (String[])new String[]{"\ud86c\udc01"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#xD86C;&#56321;", (String[])new String[]{"\ud86c\udc01"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55404;&#56321;", (String[])new String[]{"\ud86c\udc01"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55404;&#57999;", (String[])new String[]{"\ufffd\ue28f"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#xD86C;&#57999;", (String[])new String[]{"\ufffd\ue28f"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55002;&#XdC01;", (String[])new String[]{"\ud6da\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55002;&#56321;", (String[])new String[]{"\ud6da\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#Xd921;", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#Xd921", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#Xd921<br>", (String[])new String[]{"&#Xd921"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55528;", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55528", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#55528<br>", (String[])new String[]{"&#55528"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#xdfdb;", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#xdfdb", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#xdfdb<br>", (String[])new String[]{"&#xdfdb"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#57209;", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#57209", (String[])new String[]{"\ufffd"});
        HTMLStripCharFilterTest.assertAnalyzesTo((Analyzer)analyzer, (String)" &#57209<br>", (String[])new String[]{"&#57209"});
        analyzer.close();
    }

    public static void assertHTMLStripsTo(String input, String gold, Set<String> escapedTags) throws Exception {
        HTMLStripCharFilterTest.assertHTMLStripsTo(new StringReader(input), gold, escapedTags);
    }

    public static void assertHTMLStripsTo(Reader input, String gold, Set<String> escapedTags) throws Exception {
        HTMLStripCharFilter reader = null == escapedTags ? new HTMLStripCharFilter(input) : new HTMLStripCharFilter(input, escapedTags);
        int ch = 0;
        StringBuilder builder = new StringBuilder();
        try {
            while ((ch = reader.read()) != -1) {
                builder.append((char)ch);
            }
        }
        catch (Exception e) {
            if (gold.equals(builder.toString())) {
                throw e;
            }
            throw new Exception("('" + builder.toString() + "' is not equal to '" + gold + "').  " + e.getMessage(), e);
        }
        HTMLStripCharFilterTest.assertEquals((String)("'" + builder.toString() + "' is not equal to '" + gold + "'"), (Object)gold, (Object)builder.toString());
    }
}

