/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.minhash;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.minhash.MinHashFilter;
import org.apache.lucene.analysis.minhash.MinHashFilterFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.Test;

public class MinHashFilterTest
extends BaseTokenStreamTestCase {
    @Test
    public void testIntHash() {
        MinHashFilter.LongPair hash = new MinHashFilter.LongPair();
        MinHashFilter.murmurhash3_x64_128((byte[])MinHashFilter.getBytes((int)0), (int)0, (int)4, (int)0, (MinHashFilter.LongPair)hash);
        MinHashFilterTest.assertEquals((long)-3485513579396041028L, (long)hash.val1);
        MinHashFilterTest.assertEquals((long)6383328099726337777L, (long)hash.val2);
    }

    @Test
    public void testStringHash() throws UnsupportedEncodingException {
        MinHashFilter.LongPair hash = new MinHashFilter.LongPair();
        byte[] bytes = "woof woof woof woof woof".getBytes("UTF-16LE");
        MinHashFilter.murmurhash3_x64_128((byte[])bytes, (int)0, (int)bytes.length, (int)0, (MinHashFilter.LongPair)hash);
        MinHashFilterTest.assertEquals((long)7638079586852243959L, (long)hash.val1);
        MinHashFilterTest.assertEquals((long)4378804943379391304L, (long)hash.val2);
    }

    @Test
    public void testSimpleOrder() throws UnsupportedEncodingException {
        MinHashFilter.LongPair hash1 = new MinHashFilter.LongPair();
        hash1.val1 = 1L;
        hash1.val2 = 2L;
        MinHashFilter.LongPair hash2 = new MinHashFilter.LongPair();
        hash2.val1 = 2L;
        hash2.val2 = 1L;
        assert (hash1.compareTo(hash2) > 0);
    }

    @Test
    public void testHashOrder() {
        MinHashFilterTest.assertTrue((!MinHashFilter.isLessThanUnsigned((long)0L, (long)0L) ? 1 : 0) != 0);
        MinHashFilterTest.assertTrue((boolean)MinHashFilter.isLessThanUnsigned((long)0L, (long)-1L));
        MinHashFilterTest.assertTrue((boolean)MinHashFilter.isLessThanUnsigned((long)1L, (long)-1L));
        MinHashFilterTest.assertTrue((boolean)MinHashFilter.isLessThanUnsigned((long)-2L, (long)-1L));
        MinHashFilterTest.assertTrue((boolean)MinHashFilter.isLessThanUnsigned((long)1L, (long)2L));
        MinHashFilterTest.assertTrue((boolean)MinHashFilter.isLessThanUnsigned((long)Long.MAX_VALUE, (long)Long.MIN_VALUE));
        MinHashFilter.FixedSizeTreeSet minSet = new MinHashFilter.FixedSizeTreeSet(500);
        HashSet<MinHashFilter.LongPair> unadded = new HashSet<MinHashFilter.LongPair>();
        for (int i = 0; i < 100; ++i) {
            MinHashFilter.LongPair hash = new MinHashFilter.LongPair();
            MinHashFilter.murmurhash3_x64_128((byte[])MinHashFilter.getBytes((int)i), (int)0, (int)4, (int)0, (MinHashFilter.LongPair)hash);
            MinHashFilter.LongPair peek = null;
            if (minSet.size() > 0) {
                peek = (MinHashFilter.LongPair)minSet.last();
            }
            if (!minSet.add((Comparable)hash)) {
                unadded.add(hash);
                continue;
            }
            if (peek == null || minSet.size() != 500 || peek.equals(minSet.last())) continue;
            unadded.add(peek);
        }
        MinHashFilterTest.assertEquals((long)100L, (long)minSet.size());
        MinHashFilterTest.assertEquals((long)0L, (long)unadded.size());
        HashSet<MinHashFilter.LongPair> collisionDetection = new HashSet<MinHashFilter.LongPair>();
        unadded = new HashSet();
        minSet = new MinHashFilter.FixedSizeTreeSet(500);
        for (int i = 0; i < 1000000; ++i) {
            MinHashFilter.LongPair hash = new MinHashFilter.LongPair();
            MinHashFilter.murmurhash3_x64_128((byte[])MinHashFilter.getBytes((int)i), (int)0, (int)4, (int)0, (MinHashFilter.LongPair)hash);
            collisionDetection.add(hash);
            MinHashFilter.LongPair peek = null;
            if (minSet.size() > 0) {
                peek = (MinHashFilter.LongPair)minSet.last();
            }
            if (!minSet.add((Comparable)hash)) {
                unadded.add(hash);
                continue;
            }
            if (peek == null || minSet.size() != 500 || peek.equals(minSet.last())) continue;
            unadded.add(peek);
        }
        MinHashFilterTest.assertEquals((long)1000000L, (long)collisionDetection.size());
        MinHashFilterTest.assertEquals((long)500L, (long)minSet.size());
        MinHashFilterTest.assertEquals((long)999500L, (long)unadded.size());
        MinHashFilter.LongPair last = null;
        MinHashFilter.LongPair current = null;
        while ((current = (MinHashFilter.LongPair)minSet.pollLast()) != null) {
            if (last != null) {
                MinHashFilterTest.assertTrue((boolean)this.isLessThan(current, last));
            }
            last = current;
        }
    }

    @Test
    public void testHashNotRepeated() {
        MinHashFilter.FixedSizeTreeSet minSet = new MinHashFilter.FixedSizeTreeSet(500);
        HashSet<MinHashFilter.LongPair> unadded = new HashSet<MinHashFilter.LongPair>();
        for (int i = 0; i < 10000; ++i) {
            MinHashFilter.LongPair hash = new MinHashFilter.LongPair();
            MinHashFilter.murmurhash3_x64_128((byte[])MinHashFilter.getBytes((int)i), (int)0, (int)4, (int)0, (MinHashFilter.LongPair)hash);
            MinHashFilter.LongPair peek = null;
            if (minSet.size() > 0) {
                peek = (MinHashFilter.LongPair)minSet.last();
            }
            if (!minSet.add((Comparable)hash)) {
                unadded.add(hash);
                continue;
            }
            if (peek == null || minSet.size() != 500 || peek.equals(minSet.last())) continue;
            unadded.add(peek);
        }
        MinHashFilterTest.assertEquals((long)500L, (long)minSet.size());
        MinHashFilter.LongPair last = null;
        MinHashFilter.LongPair current = null;
        while ((current = (MinHashFilter.LongPair)minSet.pollLast()) != null) {
            if (last != null) {
                MinHashFilterTest.assertTrue((boolean)this.isLessThan(current, last));
            }
            last = current;
        }
    }

    @Test
    public void testMockShingleTokenizer() throws IOException {
        Tokenizer mockShingleTokenizer = MinHashFilterTest.createMockShingleTokenizer(5, "woof woof woof woof woof woof woof woof woof puff");
        MinHashFilterTest.assertTokenStreamContents((TokenStream)mockShingleTokenizer, (String[])new String[]{"woof woof woof woof woof", "woof woof woof woof puff"});
    }

    @Test
    public void testTokenStreamSingleInput() throws IOException {
        String[] hashes = new String[]{"\u2101\ud33d\ufa85\u8059\u219d\ua1c1\ud671\u676f"};
        TokenStream ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof", 1, 1, 100, false);
        MinHashFilterTest.assertTokenStreamContents((TokenStream)ts, (String[])hashes, (int[])new int[]{0}, (int[])new int[]{24}, (String[])new String[]{"MIN_HASH"}, (int[])new int[]{1}, (int[])new int[]{1}, (Integer)24, (Integer)0, null, (boolean)true);
        ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof", 2, 1, 1, false);
        MinHashFilterTest.assertTokenStreamContents((TokenStream)ts, (String[])new String[]{new String(new char[]{'\u0000', '\u0000', '\u2101', '\ud33d', '\ufa85', '\u8059', '\u219d', '\ua1c1'}), new String(new char[]{'\u0000', '\u0001', '\u41f7', '\ue334', '\u9a70', '\u3a4e', '\u1981', '\u437c'})}, (int[])new int[]{0, 0}, (int[])new int[]{24, 24}, (String[])new String[]{"MIN_HASH", "MIN_HASH"}, (int[])new int[]{1, 0}, (int[])new int[]{1, 1}, (Integer)24, (Integer)0, null, (boolean)true);
    }

    @Test
    public void testTokenStream1() throws IOException {
        String[] hashes = new String[]{"\u2101\ud33d\ufa85\u8059\u219d\ua1c1\ud671\u676f", new String(new char[]{'\u8dfb', '\uf7e1', '\ua805', '\ude0b', '\ucc3c', '\u85b7', '\ue24e', '\ua53e'})};
        TokenStream ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 1, 1, 100, false);
        MinHashFilterTest.assertTokenStreamContents((TokenStream)ts, (String[])hashes, (int[])new int[]{0, 0}, (int[])new int[]{49, 49}, (String[])new String[]{"MIN_HASH", "MIN_HASH"}, (int[])new int[]{1, 0}, (int[])new int[]{1, 1}, (Integer)49, (Integer)0, null, (boolean)true);
    }

    private ArrayList<String> getTokens(TokenStream ts) throws IOException {
        ArrayList<String> tokens = new ArrayList<String>();
        ts.reset();
        while (ts.incrementToken()) {
            CharTermAttribute termAttribute = (CharTermAttribute)ts.getAttribute(CharTermAttribute.class);
            String token = new String(termAttribute.buffer(), 0, termAttribute.length());
            tokens.add(token);
        }
        ts.end();
        ts.close();
        return tokens;
    }

    @Test
    public void testTokenStream2() throws IOException {
        TokenStream ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 100, 1, 1, false);
        ArrayList<String> tokens = this.getTokens(ts);
        ts.close();
        MinHashFilterTest.assertEquals((long)100L, (long)tokens.size());
    }

    @Test
    public void testTokenStream3() throws IOException {
        TokenStream ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 10, 1, 10, false);
        ArrayList<String> tokens = this.getTokens(ts);
        ts.close();
        MinHashFilterTest.assertEquals((long)20L, (long)tokens.size());
    }

    @Test
    public void testTokenStream4() throws IOException {
        TokenStream ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 10, 10, 1, false);
        ArrayList<String> tokens = this.getTokens(ts);
        ts.close();
        MinHashFilterTest.assertEquals((long)20L, (long)tokens.size());
        ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 10, 10, 1, true);
        tokens = this.getTokens(ts);
        ts.close();
        MinHashFilterTest.assertEquals((long)100L, (long)tokens.size());
    }

    @Test
    public void testTokenStream5() throws IOException {
        TokenStream ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 1, 100, 1, false);
        ArrayList<String> tokens = this.getTokens(ts);
        ts.close();
        MinHashFilterTest.assertEquals((long)2L, (long)tokens.size());
        ts = MinHashFilterTest.createTokenStream(5, "woof woof woof woof woof woof woof woof woof puff", 1, 100, 1, true);
        tokens = this.getTokens(ts);
        ts.close();
        MinHashFilterTest.assertEquals((long)100L, (long)tokens.size());
        HashSet<String> set = new HashSet<String>(tokens);
        MinHashFilterTest.assertEquals((long)2L, (long)set.size());
        boolean rolled = false;
        String first = null;
        String last = null;
        for (String current : tokens) {
            if (first == null) {
                first = current;
            }
            if (last != null) {
                if (!rolled) {
                    if (current.compareTo(last) < 0) {
                        if (current.equals(first)) {
                            rolled = true;
                        } else {
                            MinHashFilterTest.fail((String)"Incorrect hash order");
                        }
                    }
                } else if (!current.equals(first)) {
                    MinHashFilterTest.fail((String)"Incorrect hash order");
                }
            }
            last = current;
        }
    }

    public static TokenStream createTokenStream(int shingleSize, String shingles, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
        Tokenizer tokenizer = MinHashFilterTest.createMockShingleTokenizer(shingleSize, shingles);
        HashMap<String, String> lshffargs = new HashMap<String, String>();
        lshffargs.put("hashCount", "" + hashCount);
        lshffargs.put("bucketCount", "" + bucketCount);
        lshffargs.put("hashSetSize", "" + hashSetSize);
        lshffargs.put("withRotation", "" + withRotation);
        MinHashFilterFactory lshff = new MinHashFilterFactory(lshffargs);
        return lshff.create((TokenStream)tokenizer);
    }

    private static Tokenizer createMockShingleTokenizer(int shingleSize, String shingles) {
        MockTokenizer tokenizer = new MockTokenizer(new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){" + (shingleSize - 1) + "}").toAutomaton()), true);
        tokenizer.setEnableChecks(true);
        if (shingles != null) {
            tokenizer.setReader((Reader)new StringReader(shingles));
        }
        return tokenizer;
    }

    private boolean isLessThan(MinHashFilter.LongPair hash1, MinHashFilter.LongPair hash2) {
        return MinHashFilter.isLessThanUnsigned((long)hash1.val2, (long)hash2.val2) || hash1.val2 == hash2.val2 && MinHashFilter.isLessThanUnsigned((long)hash1.val1, (long)hash2.val1);
    }
}

