/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.ngram;

import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.TestUtil;

public class NGramTokenizerTest
extends BaseTokenStreamTestCase {
    private StringReader input;

    public void setUp() throws Exception {
        super.setUp();
        this.input = new StringReader("abcde");
    }

    public void testInvalidInput() throws Exception {
        NGramTokenizerTest.expectThrows(IllegalArgumentException.class, () -> {
            NGramTokenizer tok = new NGramTokenizer(2, 1);
        });
    }

    public void testInvalidInput2() throws Exception {
        NGramTokenizerTest.expectThrows(IllegalArgumentException.class, () -> {
            NGramTokenizer tok = new NGramTokenizer(0, 1);
            tok.setReader((Reader)this.input);
        });
    }

    public void testUnigrams() throws Exception {
        NGramTokenizer tokenizer = new NGramTokenizer(1, 1);
        tokenizer.setReader((Reader)this.input);
        NGramTokenizerTest.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"a", "b", "c", "d", "e"}, (int[])new int[]{0, 1, 2, 3, 4}, (int[])new int[]{1, 2, 3, 4, 5}, (Integer)5);
    }

    public void testBigrams() throws Exception {
        NGramTokenizer tokenizer = new NGramTokenizer(2, 2);
        tokenizer.setReader((Reader)this.input);
        NGramTokenizerTest.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"ab", "bc", "cd", "de"}, (int[])new int[]{0, 1, 2, 3}, (int[])new int[]{2, 3, 4, 5}, (Integer)5);
    }

    public void testNgrams() throws Exception {
        NGramTokenizer tokenizer = new NGramTokenizer(1, 3);
        tokenizer.setReader((Reader)this.input);
        NGramTokenizerTest.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e"}, (int[])new int[]{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4}, (int[])new int[]{1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5}, null, null, null, (Integer)5, (boolean)false);
    }

    public void testOversizedNgrams() throws Exception {
        NGramTokenizer tokenizer = new NGramTokenizer(6, 7);
        tokenizer.setReader((Reader)this.input);
        NGramTokenizerTest.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[0], (int[])new int[0], (int[])new int[0], (Integer)5);
    }

    public void testReset() throws Exception {
        NGramTokenizer tokenizer = new NGramTokenizer(1, 1);
        tokenizer.setReader((Reader)this.input);
        NGramTokenizerTest.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"a", "b", "c", "d", "e"}, (int[])new int[]{0, 1, 2, 3, 4}, (int[])new int[]{1, 2, 3, 4, 5}, (Integer)5);
        tokenizer.setReader((Reader)new StringReader("abcde"));
        NGramTokenizerTest.assertTokenStreamContents((TokenStream)tokenizer, (String[])new String[]{"a", "b", "c", "d", "e"}, (int[])new int[]{0, 1, 2, 3, 4}, (int[])new int[]{1, 2, 3, 4, 5}, (Integer)5);
    }

    public void testRandomStrings() throws Exception {
        int numIters = TEST_NIGHTLY ? 10 : 1;
        for (int i = 0; i < numIters; ++i) {
            final int min = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)2, (int)10);
            final int max = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)min, (int)20);
            Analyzer a = new Analyzer(){

                protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
                    NGramTokenizer tokenizer = new NGramTokenizer(min, max);
                    return new Analyzer.TokenStreamComponents((Tokenizer)tokenizer, (TokenStream)tokenizer);
                }
            };
            NGramTokenizerTest.checkRandomData((Random)NGramTokenizerTest.random(), (Analyzer)a, (int)(200 * RANDOM_MULTIPLIER), (int)20);
            NGramTokenizerTest.checkRandomData((Random)NGramTokenizerTest.random(), (Analyzer)a, (int)(10 * RANDOM_MULTIPLIER), (int)1027);
            a.close();
        }
    }

    private static void testNGrams(int minGram, int maxGram, int length, String nonTokenChars) throws IOException {
        String s = RandomStrings.randomAsciiOfLength((Random)NGramTokenizerTest.random(), (int)length);
        NGramTokenizerTest.testNGrams(minGram, maxGram, s, nonTokenChars);
    }

    private static void testNGrams(int minGram, int maxGram, String s, String nonTokenChars) throws IOException {
        NGramTokenizerTest.testNGrams(minGram, maxGram, s, nonTokenChars, false);
    }

    static int[] toCodePoints(CharSequence s) {
        int[] codePoints = new int[Character.codePointCount(s, 0, s.length())];
        int i = 0;
        int j = 0;
        while (i < s.length()) {
            codePoints[j] = Character.codePointAt(s, i);
            i += Character.charCount(codePoints[j]);
            ++j;
        }
        return codePoints;
    }

    static boolean isTokenChar(String nonTokenChars, int codePoint) {
        int cp;
        for (int i = 0; i < nonTokenChars.length(); i += Character.charCount(cp)) {
            cp = nonTokenChars.codePointAt(i);
            if (cp != codePoint) continue;
            return false;
        }
        return true;
    }

    static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
        int[] codePoints = NGramTokenizerTest.toCodePoints(s);
        int[] offsets = new int[codePoints.length + 1];
        for (int i = 0; i < codePoints.length; ++i) {
            offsets[i + 1] = offsets[i] + Character.charCount(codePoints[i]);
        }
        NGramTokenizer grams = new NGramTokenizer(minGram, maxGram, edgesOnly){

            protected boolean isTokenChar(int chr) {
                return nonTokenChars.indexOf(chr) < 0;
            }
        };
        grams.setReader((Reader)new StringReader(s));
        CharTermAttribute termAtt = (CharTermAttribute)grams.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)grams.addAttribute(PositionIncrementAttribute.class);
        PositionLengthAttribute posLenAtt = (PositionLengthAttribute)grams.addAttribute(PositionLengthAttribute.class);
        OffsetAttribute offsetAtt = (OffsetAttribute)grams.addAttribute(OffsetAttribute.class);
        grams.reset();
        for (int start = 0; start < codePoints.length; ++start) {
            block2: for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
                if (edgesOnly && start > 0 && NGramTokenizerTest.isTokenChar(nonTokenChars, codePoints[start - 1])) continue;
                for (int j = start; j < end; ++j) {
                    if (!NGramTokenizerTest.isTokenChar(nonTokenChars, codePoints[j])) continue block2;
                }
                NGramTokenizerTest.assertTrue((boolean)grams.incrementToken());
                NGramTokenizerTest.assertArrayEquals((int[])Arrays.copyOfRange(codePoints, start, end), (int[])NGramTokenizerTest.toCodePoints((CharSequence)termAtt));
                NGramTokenizerTest.assertEquals((long)1L, (long)posIncAtt.getPositionIncrement());
                NGramTokenizerTest.assertEquals((long)1L, (long)posLenAtt.getPositionLength());
                NGramTokenizerTest.assertEquals((long)offsets[start], (long)offsetAtt.startOffset());
                NGramTokenizerTest.assertEquals((long)offsets[end], (long)offsetAtt.endOffset());
            }
        }
        NGramTokenizerTest.assertFalse((boolean)grams.incrementToken());
        grams.end();
        NGramTokenizerTest.assertEquals((long)s.length(), (long)offsetAtt.startOffset());
        NGramTokenizerTest.assertEquals((long)s.length(), (long)offsetAtt.endOffset());
    }

    public void testLargeInput() throws IOException {
        int minGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)1, (int)100);
        int maxGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)minGram, (int)100);
        NGramTokenizerTest.testNGrams(minGram, maxGram, TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)3072, (int)4096), "");
    }

    public void testLargeMaxGram() throws IOException {
        int minGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)1290, (int)1300);
        int maxGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)minGram, (int)1300);
        NGramTokenizerTest.testNGrams(minGram, maxGram, TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)3072, (int)4096), "");
    }

    public void testPreTokenization() throws IOException {
        int minGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)1, (int)100);
        int maxGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)minGram, (int)100);
        NGramTokenizerTest.testNGrams(minGram, maxGram, TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)0, (int)4096), "a");
    }

    public void testHeavyPreTokenization() throws IOException {
        int minGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)1, (int)100);
        int maxGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)minGram, (int)100);
        NGramTokenizerTest.testNGrams(minGram, maxGram, TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)0, (int)4096), "abcdef");
    }

    public void testFewTokenChars() throws IOException {
        char[] chrs = new char[TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)4000, (int)5000)];
        Arrays.fill(chrs, ' ');
        for (int i = 0; i < chrs.length; ++i) {
            if (!((double)NGramTokenizerTest.random().nextFloat() < 0.1)) continue;
            chrs[i] = 97;
        }
        int minGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)1, (int)2);
        int maxGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)minGram, (int)2);
        NGramTokenizerTest.testNGrams(minGram, maxGram, new String(chrs), " ");
    }

    public void testFullUTF8Range() throws IOException {
        int minGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)1, (int)100);
        int maxGram = TestUtil.nextInt((Random)NGramTokenizerTest.random(), (int)minGram, (int)100);
        String s = TestUtil.randomUnicodeString((Random)NGramTokenizerTest.random(), (int)4096);
        NGramTokenizerTest.testNGrams(minGram, maxGram, s, "");
        NGramTokenizerTest.testNGrams(minGram, maxGram, s, "abcdef");
    }
}

