/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.util;

import java.io.Closeable;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Arrays;
import java.util.Locale;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.IOUtils;

public class TestSegmentingTokenizerBase
extends BaseTokenStreamTestCase {
    private Analyzer sentence;
    private Analyzer sentenceAndWord;

    public void setUp() throws Exception {
        super.setUp();
        this.sentence = new Analyzer(){

            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
                return new Analyzer.TokenStreamComponents((Tokenizer)new WholeSentenceTokenizer());
            }
        };
        this.sentenceAndWord = new Analyzer(){

            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
                return new Analyzer.TokenStreamComponents((Tokenizer)new SentenceAndWordTokenizer());
            }
        };
    }

    public void tearDown() throws Exception {
        IOUtils.close((Closeable[])new Closeable[]{this.sentence, this.sentenceAndWord});
        super.tearDown();
    }

    public void testBasics() throws IOException {
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentence, (String)"The acronym for United States is U.S. but this doesn't end a sentence", (String[])new String[]{"The acronym for United States is U.S. but this doesn't end a sentence"});
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentence, (String)"He said, \"Are you going?\" John shook his head.", (String[])new String[]{"He said, \"Are you going?\" ", "John shook his head."});
    }

    public void testCustomAttributes() throws IOException {
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentenceAndWord, (String)"He said, \"Are you going?\" John shook his head.", (String[])new String[]{"He", "said", "Are", "you", "going", "John", "shook", "his", "head"}, (int[])new int[]{0, 3, 10, 14, 18, 26, 31, 37, 41}, (int[])new int[]{2, 7, 13, 17, 23, 30, 36, 40, 45}, (int[])new int[]{1, 1, 1, 1, 1, 2, 1, 1, 1});
    }

    public void testReuse() throws IOException {
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentenceAndWord, (String)"He said, \"Are you going?\"", (String[])new String[]{"He", "said", "Are", "you", "going"}, (int[])new int[]{0, 3, 10, 14, 18}, (int[])new int[]{2, 7, 13, 17, 23}, (int[])new int[]{1, 1, 1, 1, 1});
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentenceAndWord, (String)"John shook his head.", (String[])new String[]{"John", "shook", "his", "head"}, (int[])new int[]{0, 5, 11, 15}, (int[])new int[]{4, 10, 14, 19}, (int[])new int[]{1, 1, 1, 1});
    }

    public void testEnd() throws IOException {
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentenceAndWord, (String)"John shook his head          ", (String[])new String[]{"John", "shook", "his", "head"});
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentenceAndWord, (String)"John shook his head.          ", (String[])new String[]{"John", "shook", "his", "head"});
    }

    public void testHugeDoc() throws IOException {
        StringBuilder sb = new StringBuilder();
        char[] whitespace = new char[4094];
        Arrays.fill(whitespace, '\n');
        sb.append(whitespace);
        sb.append("testing 1234");
        String input = sb.toString();
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentenceAndWord, (String)input, (String[])new String[]{"testing", "1234"});
    }

    public void testHugeTerm() throws IOException {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 10240; ++i) {
            sb.append('a');
        }
        String input = sb.toString();
        char[] token = new char[1024];
        Arrays.fill(token, 'a');
        String expectedToken = new String(token);
        String[] expected = new String[]{expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken};
        TestSegmentingTokenizerBase.assertAnalyzesTo((Analyzer)this.sentence, (String)input, (String[])expected);
    }

    public void testRandomStrings() throws Exception {
        TestSegmentingTokenizerBase.checkRandomData((Random)TestSegmentingTokenizerBase.random(), (Analyzer)this.sentence, (int)(10000 * RANDOM_MULTIPLIER));
        TestSegmentingTokenizerBase.checkRandomData((Random)TestSegmentingTokenizerBase.random(), (Analyzer)this.sentenceAndWord, (int)(10000 * RANDOM_MULTIPLIER));
    }

    static class SentenceAndWordTokenizer
    extends SegmentingTokenizerBase {
        int sentenceStart;
        int sentenceEnd;
        int wordStart;
        int wordEnd;
        int posBoost = -1;
        private CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
        private OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
        private PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)this.addAttribute(PositionIncrementAttribute.class);

        public SentenceAndWordTokenizer() {
            super(BaseTokenStreamTestCase.newAttributeFactory(), BreakIterator.getSentenceInstance(Locale.ROOT));
        }

        protected void setNextSentence(int sentenceStart, int sentenceEnd) {
            this.wordEnd = this.sentenceStart = sentenceStart;
            this.wordStart = this.sentenceStart;
            this.sentenceEnd = sentenceEnd;
            ++this.posBoost;
        }

        public void reset() throws IOException {
            super.reset();
            this.posBoost = -1;
        }

        protected boolean incrementWord() {
            this.wordStart = this.wordEnd;
            while (this.wordStart < this.sentenceEnd && !Character.isLetterOrDigit(this.buffer[this.wordStart])) {
                ++this.wordStart;
            }
            if (this.wordStart == this.sentenceEnd) {
                return false;
            }
            this.wordEnd = this.wordStart + 1;
            while (this.wordEnd < this.sentenceEnd && Character.isLetterOrDigit(this.buffer[this.wordEnd])) {
                ++this.wordEnd;
            }
            this.clearAttributes();
            this.termAtt.copyBuffer(this.buffer, this.wordStart, this.wordEnd - this.wordStart);
            this.offsetAtt.setOffset(this.correctOffset(this.offset + this.wordStart), this.correctOffset(this.offset + this.wordEnd));
            this.posIncAtt.setPositionIncrement(this.posIncAtt.getPositionIncrement() + this.posBoost);
            this.posBoost = 0;
            return true;
        }
    }

    static class WholeSentenceTokenizer
    extends SegmentingTokenizerBase {
        int sentenceStart;
        int sentenceEnd;
        boolean hasSentence;
        private CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
        private OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);

        public WholeSentenceTokenizer() {
            super(BaseTokenStreamTestCase.newAttributeFactory(), BreakIterator.getSentenceInstance(Locale.ROOT));
        }

        protected void setNextSentence(int sentenceStart, int sentenceEnd) {
            this.sentenceStart = sentenceStart;
            this.sentenceEnd = sentenceEnd;
            this.hasSentence = true;
        }

        protected boolean incrementWord() {
            if (this.hasSentence) {
                this.hasSentence = false;
                this.clearAttributes();
                this.termAtt.copyBuffer(this.buffer, this.sentenceStart, this.sentenceEnd - this.sentenceStart);
                this.offsetAtt.setOffset(this.correctOffset(this.offset + this.sentenceStart), this.correctOffset(this.offset + this.sentenceEnd));
                return true;
            }
            return false;
        }
    }
}

