/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WeightedAttributesHandler;
import weka.core.WeightedInstancesHandler;
import weka.filters.SimpleBatchFilter;
import weka.filters.UnsupervisedFilter;

public class MergeInfrequentNominalValues
extends SimpleBatchFilter
implements UnsupervisedFilter,
WeightedAttributesHandler,
WeightedInstancesHandler {
    static final long serialVersionUID = 4444337331921333847L;
    protected int m_MinimumFrequency = 2;
    protected Range m_SelectCols = new Range();
    protected int[] m_SelectedAttributes;
    protected boolean[] m_AttToBeModified;
    protected int[][] m_NewValues;
    protected boolean m_UseShortIDs = false;

    @Override
    public String globalInfo() {
        return "Merges all values of the specified nominal attributes that are insufficiently frequent.";
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>(3);
        result.addElement(new Option("\tThe minimum frequency for a value to remain (default: 2).\n", "-N", 1, "-N <int>"));
        result.addElement(new Option("\tSets list of attributes to act on (or its inverse). 'first and 'last' are accepted as well.'\n\tE.g.: first-5,7,9,20-last\n\t(default: 1,2)", "R", 1, "-R <range>"));
        result.addElement(new Option("\tInvert matching sense (i.e. act on all attributes not specified in list)", "V", 0, "-V"));
        result.addElement(new Option("\tUse short IDs for merged attribute values.", "S", 0, "-S"));
        result.addAll(Collections.list(super.listOptions()));
        return result.elements();
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-N");
        result.add("" + this.getMinimumFrequency());
        result.add("-R");
        result.add(this.getAttributeIndices());
        if (this.getInvertSelection()) {
            result.add("-V");
        }
        if (this.getUseShortIDs()) {
            result.add("-S");
        }
        Collections.addAll(result, super.getOptions());
        return result.toArray(new String[result.size()]);
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String minFrequencyString = Utils.getOption('N', options);
        if (minFrequencyString.length() != 0) {
            this.setMinimumFrequency(Integer.parseInt(minFrequencyString));
        } else {
            this.setMinimumFrequency(2);
        }
        String tmpStr = Utils.getOption('R', options);
        if (tmpStr.length() != 0) {
            this.setAttributeIndices(tmpStr);
        } else {
            this.setAttributeIndices("");
        }
        this.setInvertSelection(Utils.getFlag('V', options));
        this.setUseShortIDs(Utils.getFlag('S', options));
        super.setOptions(options);
        Utils.checkForRemainingOptions(options);
    }

    public String minimumFrequencyTipText() {
        return "The minimum frequency for a value to remain.";
    }

    public int getMinimumFrequency() {
        return this.m_MinimumFrequency;
    }

    public void setMinimumFrequency(int minF) {
        this.m_MinimumFrequency = minF;
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on (or its inverse). This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_SelectCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_SelectCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList(attributes));
    }

    public String invertSelectionTipText() {
        return "Determines whether selected attributes are to be acted on or all other attributes are used instead.";
    }

    public boolean getInvertSelection() {
        return this.m_SelectCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_SelectCols.setInvert(invert);
    }

    public String useShortIDsTipText() {
        return "If true, short IDs will be used for merged attribute values.";
    }

    public boolean getUseShortIDs() {
        return this.m_UseShortIDs;
    }

    public void setUseShortIDs(boolean m_UseShortIDs) {
        this.m_UseShortIDs = m_UseShortIDs;
    }

    @Override
    public boolean allowAccessToFullInputFormat() {
        return true;
    }

    @Override
    protected Instances determineOutputFormat(Instances inputFormat) {
        int m_SelectedAttribute;
        int current;
        int m_SelectedAttribute2;
        int n;
        this.m_SelectCols.setUpper(inputFormat.numAttributes() - 1);
        this.m_SelectedAttributes = this.m_SelectCols.getSelection();
        int[][] freqs = new int[inputFormat.numAttributes()][];
        Object object = this.m_SelectedAttributes;
        int n2 = ((int[])object).length;
        for (n = 0; n < n2; ++n) {
            current = m_SelectedAttribute2 = object[n];
            Attribute att = inputFormat.attribute(current);
            if (current == inputFormat.classIndex() || !att.isNominal()) continue;
            freqs[current] = new int[att.numValues()];
        }
        object = inputFormat.iterator();
        while (object.hasNext()) {
            Instance inst = (Instance)object.next();
            int[] nArray = this.m_SelectedAttributes;
            m_SelectedAttribute2 = nArray.length;
            for (current = 0; current < m_SelectedAttribute2; ++current) {
                int m_SelectedAttribute3 = nArray[current];
                int current2 = m_SelectedAttribute3;
                if (current2 == inputFormat.classIndex() || !inputFormat.attribute(current2).isNominal() || inst.isMissing(current2)) continue;
                int[] nArray2 = freqs[current2];
                int n3 = (int)inst.value(current2);
                nArray2[n3] = nArray2[n3] + 1;
            }
        }
        int[] numInfrequentValues = new int[inputFormat.numAttributes()];
        int[] inst = this.m_SelectedAttributes;
        n = inst.length;
        for (m_SelectedAttribute2 = 0; m_SelectedAttribute2 < n; ++m_SelectedAttribute2) {
            int current3 = m_SelectedAttribute = inst[m_SelectedAttribute2];
            Attribute att = inputFormat.attribute(current3);
            if (current3 == inputFormat.classIndex() || !att.isNominal()) continue;
            for (int k = 0; k < att.numValues(); ++k) {
                if (this.m_Debug) {
                    System.err.println("Attribute: " + att.name() + " Value: " + att.value(k) + " Freq.: " + freqs[current3][k]);
                }
                if (freqs[current3][k] >= this.m_MinimumFrequency) continue;
                int n4 = current3;
                numInfrequentValues[n4] = numInfrequentValues[n4] + 1;
            }
        }
        this.m_AttToBeModified = new boolean[inputFormat.numAttributes()];
        this.m_NewValues = new int[inputFormat.numAttributes()][];
        inst = this.m_SelectedAttributes;
        n = inst.length;
        for (m_SelectedAttribute2 = 0; m_SelectedAttribute2 < n; ++m_SelectedAttribute2) {
            int current4 = m_SelectedAttribute = inst[m_SelectedAttribute2];
            Attribute att = inputFormat.attribute(current4);
            if (numInfrequentValues[current4] <= 1) continue;
            this.m_AttToBeModified[current4] = true;
            int j = 1;
            this.m_NewValues[current4] = new int[att.numValues()];
            for (int k = 0; k < att.numValues(); ++k) {
                this.m_NewValues[current4][k] = freqs[current4][k] < this.m_MinimumFrequency ? 0 : j++;
            }
        }
        ArrayList<Attribute> atts = new ArrayList<Attribute>();
        for (int i = 0; i < inputFormat.numAttributes(); ++i) {
            int current5 = i;
            Attribute att = inputFormat.attribute(current5);
            if (this.m_AttToBeModified[i]) {
                ArrayList<String> vals = new ArrayList<String>();
                StringBuilder sb = new StringBuilder();
                vals.add("");
                for (int j = 0; j < att.numValues(); ++j) {
                    if (this.m_NewValues[current5][j] == 0) {
                        if (sb.length() != 0) {
                            sb.append("_or_");
                        }
                        sb.append(att.value(j));
                        continue;
                    }
                    vals.add(att.value(j));
                }
                if (this.m_UseShortIDs) {
                    vals.set(0, "" + sb.toString().hashCode());
                } else {
                    vals.set(0, sb.toString());
                }
                Attribute a = new Attribute(att.name() + "_merged_infrequent_values", vals);
                a.setWeight(att.weight());
                atts.add(a);
                continue;
            }
            atts.add((Attribute)att.copy());
        }
        Instances data = new Instances(inputFormat.relationName(), atts, 0);
        data.setClassIndex(inputFormat.classIndex());
        return data;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        result.enable(Capabilities.Capability.NO_CLASS);
        return result;
    }

    @Override
    protected Instances process(Instances instances) throws Exception {
        Instances result = new Instances(this.getOutputFormat(), instances.numInstances());
        for (int i = 0; i < instances.numInstances(); ++i) {
            Instance inst = instances.instance(i);
            double[] newData = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); ++j) {
                newData[j] = this.m_AttToBeModified[j] && !inst.isMissing(j) ? (double)this.m_NewValues[j][(int)inst.value(j)] : inst.value(j);
            }
            DenseInstance instNew = new DenseInstance(inst.weight(), newData);
            instNew.setDataset(result);
            this.copyValues(instNew, false, inst.dataset(), this.outputFormatPeek());
            result.add(instNew);
        }
        return result;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8034 $");
    }

    public static void main(String[] args) {
        MergeInfrequentNominalValues.runFilter(new MergeInfrequentNominalValues(), args);
    }
}

