/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.AbstractStatisticalTokenDistance;
import com.wcohen.ss.BagOfTokens;
import com.wcohen.ss.PrintfFormat;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.tokens.SimpleTokenizer;

public class TokenFelligiSunter
extends AbstractStatisticalTokenDistance {
    private Tokenizer tokenizer;
    private double mismatchFactor = 0.5;
    private boolean oversimplified = false;

    public TokenFelligiSunter(Tokenizer tokenizer, double mismatchFactor) {
        this.tokenizer = tokenizer;
        this.mismatchFactor = mismatchFactor;
    }

    public TokenFelligiSunter() {
        this(SimpleTokenizer.DEFAULT_TOKENIZER, 0.5);
    }

    public void setMismatchFactor(double d) {
        this.mismatchFactor = d;
    }

    public void setMismatchFactor(Double d) {
        this.mismatchFactor = d;
    }

    @Override
    public double score(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        double sim = 0.0;
        for (Token tok : sBag.getDistinctTokens()) {
            if (tBag.contains(tok)) {
                if (this.oversimplified) {
                    sim += tBag.getWeight(tok);
                    continue;
                }
                double p = Math.exp(-tBag.getWeight(tok));
                sim -= Math.log(1.0 - Math.exp((double)(sBag.size() * tBag.size()) * Math.log(1.0 - p * p)));
                continue;
            }
            if (!this.oversimplified) continue;
            sim -= sBag.getWeight(tok) * this.mismatchFactor;
        }
        return sim;
    }

    @Override
    public StringWrapper prepare(String s) {
        BagOfTokens bag = new BagOfTokens(s, this.tokenizer.tokenize(s));
        for (Token tok : bag.getDistinctTokens()) {
            if (this.collectionSize > 0) {
                Integer dfInteger = (Integer)this.documentFrequency.get(tok);
                double df = dfInteger == null ? 1.0 : (double)dfInteger.intValue();
                double w = -Math.log(df / (double)this.collectionSize);
                bag.setWeight(tok, w);
                continue;
            }
            bag.setWeight(tok, Math.log(10.0));
        }
        return bag;
    }

    @Override
    public String explainScore(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        StringBuilder buf = new StringBuilder("");
        PrintfFormat fmt = new PrintfFormat("%.3f");
        buf.append("Common tokens: ");
        for (Token tok : sBag.getDistinctTokens()) {
            if (!tBag.contains(tok)) continue;
            buf.append(" " + tok.getValue() + ": ");
            buf.append(fmt.sprintf(tBag.getWeight(tok)));
        }
        buf.append("\nscore = " + this.score(s, t));
        return buf.toString();
    }

    public String toString() {
        return "[TokenFelligiSunter]";
    }

    public static void main(String[] argv) {
        TokenFelligiSunter.doMain(new TokenFelligiSunter(), argv);
    }
}

