/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.util;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Locale;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.TestUtil;

public class TestCharTokenizers
extends BaseTokenStreamTestCase {
    public void testReadSupplementaryChars() throws IOException {
        StringBuilder builder = new StringBuilder();
        int num = 1024 + TestCharTokenizers.random().nextInt(1024);
        num *= RANDOM_MULTIPLIER;
        for (int i = 1; i < num; ++i) {
            builder.append("\ud801\udc1cabc");
            if (i % 10 != 0) continue;
            builder.append(" ");
        }
        builder.insert(1023, "\ud801\udc1c");
        LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TestCharTokenizers.newAttributeFactory());
        tokenizer.setReader((Reader)new StringReader(builder.toString()));
        TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])builder.toString().toLowerCase(Locale.ROOT).split(" "));
    }

    public void testExtendCharBuffer() throws IOException {
        for (int i = 0; i < 40; ++i) {
            StringBuilder builder = new StringBuilder();
            for (int j = 0; j < 1 + i; ++j) {
                builder.append("a");
            }
            builder.append("\ud801\udc1cabc");
            LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TestCharTokenizers.newAttributeFactory());
            tokenizer.setReader((Reader)new StringReader(builder.toString()));
            TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{builder.toString().toLowerCase(Locale.ROOT)});
        }
    }

    public void testMaxWordLength() throws IOException {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < 255; ++i) {
            builder.append("A");
        }
        LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TestCharTokenizers.newAttributeFactory());
        tokenizer.setReader((Reader)new StringReader(builder.toString() + builder.toString()));
        TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
    }

    public void testMaxWordLengthWithSupplementary() throws IOException {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < 254; ++i) {
            builder.append("A");
        }
        builder.append("\ud801\udc1c");
        LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TestCharTokenizers.newAttributeFactory());
        tokenizer.setReader((Reader)new StringReader(builder.toString() + builder.toString()));
        TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
    }

    public void testCrossPlaneNormalization() throws IOException {
        Analyzer analyzer = new Analyzer(){

            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
                LetterTokenizer tokenizer = new LetterTokenizer(BaseTokenStreamTestCase.newAttributeFactory()){

                    protected int normalize(int c) {
                        if (c > 65535) {
                            return 948;
                        }
                        return c;
                    }
                };
                return new Analyzer.TokenStreamComponents((Tokenizer)tokenizer, (TokenStream)tokenizer);
            }
        };
        int num = 1000 * RANDOM_MULTIPLIER;
        for (int i = 0; i < num; ++i) {
            String s = TestUtil.randomUnicodeString((Random)TestCharTokenizers.random());
            try (TokenStream ts = analyzer.tokenStream("foo", s);){
                ts.reset();
                OffsetAttribute offsetAtt = (OffsetAttribute)ts.addAttribute(OffsetAttribute.class);
                while (ts.incrementToken()) {
                    String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
                    int cp = 0;
                    for (int j = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
                        cp = highlightedText.codePointAt(j);
                        TestCharTokenizers.assertTrue((String)("non-letter:" + Integer.toHexString(cp)), (boolean)Character.isLetter(cp));
                    }
                }
                ts.end();
                continue;
            }
        }
        TestCharTokenizers.checkRandomData((Random)TestCharTokenizers.random(), (Analyzer)analyzer, (int)num);
        analyzer.close();
    }

    public void testCrossPlaneNormalization2() throws IOException {
        Analyzer analyzer = new Analyzer(){

            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
                LetterTokenizer tokenizer = new LetterTokenizer(BaseTokenStreamTestCase.newAttributeFactory()){

                    protected int normalize(int c) {
                        if (c <= 65535) {
                            return 66620;
                        }
                        return c;
                    }
                };
                return new Analyzer.TokenStreamComponents((Tokenizer)tokenizer, (TokenStream)tokenizer);
            }
        };
        int num = 1000 * RANDOM_MULTIPLIER;
        for (int i = 0; i < num; ++i) {
            String s = TestUtil.randomUnicodeString((Random)TestCharTokenizers.random());
            try (TokenStream ts = analyzer.tokenStream("foo", s);){
                ts.reset();
                OffsetAttribute offsetAtt = (OffsetAttribute)ts.addAttribute(OffsetAttribute.class);
                while (ts.incrementToken()) {
                    String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
                    int cp = 0;
                    for (int j = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
                        cp = highlightedText.codePointAt(j);
                        TestCharTokenizers.assertTrue((String)("non-letter:" + Integer.toHexString(cp)), (boolean)Character.isLetter(cp));
                    }
                }
                ts.end();
                continue;
            }
        }
        TestCharTokenizers.checkRandomData((Random)TestCharTokenizers.random(), (Analyzer)analyzer, (int)num);
        analyzer.close();
    }

    public void testDefinitionUsingMethodReference1() throws Exception {
        StringReader reader = new StringReader("Tokenizer Test");
        CharTokenizer tokenizer = CharTokenizer.fromSeparatorCharPredicate(Character::isWhitespace);
        tokenizer.setReader((Reader)reader);
        TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"Tokenizer", "Test"});
    }

    public void testDefinitionUsingMethodReference2() throws Exception {
        StringReader reader = new StringReader("Tokenizer(Test)");
        CharTokenizer tokenizer = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toUpperCase);
        tokenizer.setReader((Reader)reader);
        TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"TOKENIZER", "TEST"});
    }

    public void testDefinitionUsingLambda() throws Exception {
        StringReader reader = new StringReader("Tokenizer\u00a0Test Foo");
        CharTokenizer tokenizer = CharTokenizer.fromSeparatorCharPredicate(c -> c == 160 || Character.isWhitespace(c), Character::toLowerCase);
        tokenizer.setReader((Reader)reader);
        TestCharTokenizers.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"tokenizer", "test", "foo"});
    }
}

