/*
 * Decompiled with CFR 0.152.
 */
package net.paoding.analysis.knife;

import net.paoding.analysis.dictionary.Dictionary;
import net.paoding.analysis.dictionary.Hit;
import net.paoding.analysis.dictionary.Word;
import net.paoding.analysis.knife.Beef;
import net.paoding.analysis.knife.CharSet;
import net.paoding.analysis.knife.Collector;
import net.paoding.analysis.knife.Dictionaries;
import net.paoding.analysis.knife.DictionariesWare;
import net.paoding.analysis.knife.Knife;

public class CJKKnife
implements Knife,
DictionariesWare {
    private Dictionary vocabulary;
    private Dictionary noiseWords;
    private Dictionary noiseCharactors;
    private Dictionary units;

    public CJKKnife() {
    }

    public CJKKnife(Dictionaries dictionaries) {
        this.setDictionaries(dictionaries);
    }

    public void setDictionaries(Dictionaries dictionaries) {
        this.vocabulary = dictionaries.getVocabularyDictionary();
        this.noiseWords = dictionaries.getNoiseWordsDictionary();
        this.noiseCharactors = dictionaries.getNoiseCharactorsDictionary();
        this.units = dictionaries.getUnitsDictionary();
    }

    public int assignable(Beef beef, int offset, int index) {
        char ch = beef.charAt(index);
        if (CharSet.isCjkUnifiedIdeographs(ch)) {
            return 1;
        }
        if (index > offset && (CharSet.isArabianNumber(ch) || CharSet.isLantingLetter(ch) || ch == '-' || ch == '_')) {
            return 0;
        }
        return -1;
    }

    public int dissect(Collector collector, Beef beef, int offset) {
        int len;
        int point = -1;
        int limit = offset + 1;
        block4: while (true) {
            switch (this.assignable(beef, offset, limit)) {
                case -1: {
                    break block4;
                }
                case 0: {
                    if (point == -1) {
                        point = limit;
                    }
                }
                default: {
                    ++limit;
                    continue block4;
                }
            }
            break;
        }
        if (offset > 0 && limit == beef.length()) {
            return -offset;
        }
        Hit curSearch = null;
        int offsetLimit = point != -1 ? point : limit;
        int maxDicWordEnd = offset;
        int isolatedOffset = -1;
        int maxDicWordLength = 0;
        block5: for (int curSearchOffset = offset; curSearchOffset < offsetLimit; ++curSearchOffset) {
            int curSearchEnd = curSearchOffset + 1;
            int curSearchLength = 1;
            while (curSearchEnd <= limit) {
                boolean isolatedFound;
                curSearch = this.vocabulary.search(beef, curSearchOffset, curSearchLength);
                if (curSearch.isHit()) {
                    Word word;
                    if (isolatedOffset >= 0) {
                        this.dissectIsolated(collector, beef, isolatedOffset, curSearchOffset);
                        isolatedOffset = -1;
                    }
                    if (maxDicWordEnd < curSearchEnd) {
                        maxDicWordEnd = curSearchEnd;
                    }
                    if (curSearchOffset == offset && maxDicWordLength < curSearchLength) {
                        maxDicWordLength = curSearchLength;
                    }
                    if (!(word = curSearch.getWord()).isNoise()) {
                        collector.collect(word.getText(), curSearchOffset, curSearchEnd);
                    }
                }
                if (!(isolatedFound = curSearch.isUndefined()) && !curSearch.isHit()) {
                    boolean bl = isolatedFound = curSearchEnd >= limit || beef.charAt(curSearchEnd) < curSearch.getNext().charAt(curSearchLength);
                }
                if (isolatedFound) {
                    if (isolatedOffset >= 0 || curSearchOffset < maxDicWordEnd) continue block5;
                    isolatedOffset = curSearchOffset;
                    continue block5;
                }
                ++curSearchEnd;
                ++curSearchLength;
            }
        }
        if (maxDicWordEnd < offsetLimit) {
            this.dissectIsolated(collector, beef, maxDicWordEnd, offsetLimit);
        }
        if ((len = limit - offset) > 2 && len != maxDicWordLength && this.shouldBeWord(beef, offset, limit)) {
            collector.collect(((Object)beef.subSequence(offset, limit)).toString(), offset, limit);
        }
        return point == -1 ? limit : point;
    }

    protected void dissectIsolated(Collector collector, Beef beef, int offset, int limit) {
        int curSearchOffset;
        int binOffset = curSearchOffset = offset;
        while (curSearchOffset < limit) {
            int tempEnd = this.collectNumber(collector, beef, curSearchOffset, limit, binOffset);
            if (tempEnd > curSearchOffset) {
                curSearchOffset = tempEnd;
                binOffset = tempEnd;
                continue;
            }
            tempEnd = this.skipNoiseWords(collector, beef, curSearchOffset, limit, binOffset);
            if (tempEnd > curSearchOffset) {
                curSearchOffset = tempEnd;
                binOffset = tempEnd;
                continue;
            }
            Hit curSearch = this.noiseCharactors.search(beef, curSearchOffset, 1);
            if (curSearch.isHit()) {
                this.binDissect(collector, beef, binOffset, curSearchOffset);
                binOffset = ++curSearchOffset;
                continue;
            }
            ++curSearchOffset;
        }
        if (limit > binOffset) {
            this.binDissect(collector, beef, binOffset, limit);
        }
    }

    protected int collectNumber(Collector collector, Beef beef, int offset, int limit, int binOffset) {
        int curTail;
        int number1 = -1;
        int number2 = -1;
        int bitValue = 0;
        int maxUnit = 0;
        boolean hasDigit = false;
        for (curTail = offset; curTail < limit && (bitValue = CharSet.toNumber(beef.charAt(curTail))) >= 0 && (bitValue != 2 || beef.charAt(curTail) != '\u4e24' && beef.charAt(curTail) != '\u4fe9' && beef.charAt(curTail) != '\u5006' || curTail == offset); ++curTail) {
            if (bitValue >= 0 && bitValue < 10) {
                hasDigit = true;
                if (number2 < 0) {
                    number2 = bitValue;
                    continue;
                }
                number2 *= 10;
                number2 += bitValue;
                continue;
            }
            if (number2 < 0) {
                if (number1 < 0) {
                    number1 = 1;
                }
                number1 *= bitValue;
            } else {
                if (number1 < 0) {
                    number1 = 0;
                }
                if (bitValue >= maxUnit) {
                    number1 += number2;
                    number1 *= bitValue;
                    maxUnit = bitValue;
                } else {
                    number1 += number2 * bitValue;
                }
            }
            number2 = -1;
        }
        if (!hasDigit) {
            return offset;
        }
        if (number2 > 0) {
            number1 = number1 < 0 ? number2 : (number1 += number2);
        }
        if (number1 >= 0) {
            if (offset > binOffset) {
                this.binDissect(collector, beef, binOffset, offset);
            }
            collector.collect(String.valueOf(number1), offset, curTail);
            if (this.units != null) {
                Hit wd = null;
                Hit wd2 = null;
                int i = curTail + 1;
                while ((wd = this.units.search(beef, curTail, i - curTail)).isHit()) {
                    wd2 = wd;
                    ++i;
                    if (wd.isUnclosed()) continue;
                }
                --i;
                if (wd2 != null) {
                    collector.collect(wd2.getWord().getText(), curTail, i);
                    return i;
                }
            }
        }
        return curTail;
    }

    protected int skipNoiseWords(Collector collector, Beef beef, int offset, int end, int binOffset) {
        for (int k = offset + 2; k <= end; ++k) {
            Hit word = this.noiseWords.search(beef, offset, k - offset);
            if (word.isHit()) {
                if (binOffset > 0 && offset > binOffset) {
                    this.binDissect(collector, beef, binOffset, offset);
                    binOffset = -1;
                }
                offset = k;
            }
            if (word.isUndefined() || !word.isUnclosed()) break;
        }
        return offset;
    }

    protected void binDissect(Collector collector, Beef beef, int offset, int limit) {
        if (limit - offset == 1) {
            collector.collect(((Object)beef.subSequence(offset, limit)).toString(), offset, limit);
        } else {
            for (int curOffset = offset; curOffset < limit - 1; ++curOffset) {
                collector.collect(((Object)beef.subSequence(curOffset, curOffset + 2)).toString(), curOffset, curOffset + 2);
            }
        }
    }

    protected boolean shouldBeWord(Beef beef, int offset, int end) {
        char prevChar = beef.charAt(offset - 1);
        char endChar = beef.charAt(end);
        if (prevChar == '\u201c' && endChar == '\u201d') {
            return true;
        }
        if (prevChar == '\u2018' && endChar == '\u2019') {
            return true;
        }
        if (prevChar == '\'' && endChar == '\'') {
            return true;
        }
        if (prevChar == '\"' && endChar == '\"') {
            return true;
        }
        if (prevChar == '\u300a' && endChar == '\u300b') {
            return true;
        }
        if (prevChar == '\u3008' && endChar == '\u3009') {
            return true;
        }
        return prevChar == '<' && endChar == '>';
    }
}

