/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.spectral.kmeans;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.kmeans.EigenSeedGenerator;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.spectral.AffinityMatrixInputJob;
import org.apache.mahout.clustering.spectral.MatrixDiagonalizeJob;
import org.apache.mahout.clustering.spectral.UnitVectorizerJob;
import org.apache.mahout.clustering.spectral.VectorMatrixMultiplicationJob;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.decomposer.lanczos.LanczosState;
import org.apache.mahout.math.hadoop.DistributedRowMatrix;
import org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver;
import org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob;
import org.apache.mahout.math.hadoop.stochasticsvd.SSVDSolver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpectralKMeansDriver
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(SpectralKMeansDriver.class);
    public static final double OVERSHOOTMULTIPLIER = 2.0;
    public static final int REDUCERS = 10;
    public static final int BLOCKHEIGHT = 30000;
    public static final int OVERSAMPLING = 15;
    public static final int POWERITERS = 0;

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new SpectralKMeansDriver(), (String[])args);
    }

    public int run(String[] arg0) throws Exception {
        Configuration conf = this.getConf();
        this.addInputOption();
        this.addOutputOption();
        this.addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
        this.addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
        this.addOption(DefaultOptionCreator.distanceMeasureOption().create());
        this.addOption(DefaultOptionCreator.convergenceOption().create());
        this.addOption(DefaultOptionCreator.maxIterationsOption().create());
        this.addOption(DefaultOptionCreator.overwriteOption().create());
        this.addFlag("usessvd", "ssvd", "Uses SSVD as the eigensolver. Default is the Lanczos solver.");
        this.addOption("reduceTasks", "t", "Number of reducers for SSVD", String.valueOf(10));
        this.addOption("outerProdBlockHeight", "oh", "Block height of outer products for SSVD", String.valueOf(30000));
        this.addOption("oversampling", "p", "Oversampling parameter for SSVD", String.valueOf(15));
        this.addOption("powerIter", "q", "Additional power iterations for SSVD", String.valueOf(0));
        Map<String, List<String>> parsedArgs = this.parseArguments(arg0);
        if (parsedArgs == null) {
            return 0;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(conf, output);
        }
        int numDims = Integer.parseInt(this.getOption("dimensions"));
        int clusters = Integer.parseInt(this.getOption("clusters"));
        String measureClass = this.getOption("distanceMeasure");
        DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
        double convergenceDelta = Double.parseDouble(this.getOption("convergenceDelta"));
        int maxIterations = Integer.parseInt(this.getOption("maxIter"));
        Path tempdir = new Path(this.getOption("tempDir"));
        boolean ssvd = parsedArgs.containsKey("--usessvd");
        if (ssvd) {
            int reducers = Integer.parseInt(this.getOption("reduceTasks"));
            int blockheight = Integer.parseInt(this.getOption("outerProdBlockHeight"));
            int oversampling = Integer.parseInt(this.getOption("oversampling"));
            int poweriters = Integer.parseInt(this.getOption("powerIter"));
            SpectralKMeansDriver.run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations, tempdir, true, reducers, blockheight, oversampling, poweriters);
        } else {
            SpectralKMeansDriver.run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations, tempdir, false);
        }
        return 0;
    }

    public static void run(Configuration conf, Path input, Path output, int numDims, int clusters, DistanceMeasure measure, double convergenceDelta, int maxIterations, Path tempDir, boolean ssvd) throws IOException, InterruptedException, ClassNotFoundException {
        SpectralKMeansDriver.run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations, tempDir, ssvd, 10, 30000, 15, 0);
    }

    public static void run(Configuration conf, Path input, Path output, int numDims, int clusters, DistanceMeasure measure, double convergenceDelta, int maxIterations, Path tempDir, boolean ssvd, int numReducers, int blockHeight, int oversampling, int poweriters) throws IOException, InterruptedException, ClassNotFoundException {
        Path data;
        Path outputCalc = new Path(tempDir, "calculations");
        Path outputTmp = new Path(tempDir, "temporary");
        Path affSeqFiles = new Path(outputCalc, "seqfile");
        AffinityMatrixInputJob.runJob(input, affSeqFiles, numDims, numDims);
        DistributedRowMatrix A = new DistributedRowMatrix(affSeqFiles, new Path(outputTmp, "afftmp"), numDims, numDims);
        Configuration depConf = new Configuration(conf);
        A.setConf(depConf);
        Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
        DistributedRowMatrix L = VectorMatrixMultiplicationJob.runJob(affSeqFiles, D, new Path(outputCalc, "laplacian"), new Path(outputCalc, outputCalc));
        L.setConf(depConf);
        if (ssvd) {
            Path[] LPath = new Path[]{L.getRowPath()};
            Path SSVDout = new Path(outputCalc, "SSVD");
            SSVDSolver solveIt = new SSVDSolver(depConf, LPath, SSVDout, blockHeight, clusters, oversampling, numReducers);
            solveIt.setComputeV(false);
            solveIt.setComputeU(true);
            solveIt.setOverwrite(true);
            solveIt.setQ(poweriters);
            solveIt.run();
            data = new Path(solveIt.getUPath());
        } else {
            int overshoot = Math.min((int)((double)clusters * 2.0), numDims);
            DistributedLanczosSolver solver = new DistributedLanczosSolver();
            LanczosState state = new LanczosState(L, overshoot, DistributedLanczosSolver.getInitialVector(L));
            Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors");
            solver.runJob(conf, state, overshoot, true, lanczosSeqFiles.toString());
            EigenVerificationJob verifier = new EigenVerificationJob();
            Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
            verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath, true, 1.0, clusters);
            Path cleanedEigens = verifier.getCleanedEigensPath();
            DistributedRowMatrix W = new DistributedRowMatrix(cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
            W.setConf(depConf);
            DistributedRowMatrix Wtrans = W.transpose();
            data = Wtrans.getRowPath();
        }
        Path unitVectors = new Path(outputCalc, "unitvectors");
        UnitVectorizerJob.runJob(data, unitVectors);
        DistributedRowMatrix Wt = new DistributedRowMatrix(unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
        Wt.setConf(depConf);
        data = Wt.getRowPath();
        Path initialclusters = EigenSeedGenerator.buildFromEigens(conf, data, new Path(output, "clusters-0"), clusters, measure);
        Path answer = new Path(output, "kmeans_out");
        KMeansDriver.run(conf, data, initialclusters, answer, convergenceDelta, maxIterations, true, 0.0, false);
        Path mappingPath = new Path(new Path(conf.get("hadoop.tmp.dir")), "generic_input_mapping");
        ArrayList<String> mapping = Lists.newArrayList();
        FileSystem fs = FileSystem.get((URI)mappingPath.toUri(), (Configuration)conf);
        if (fs.exists(mappingPath)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, mappingPath, conf);
            Text mappingValue = new Text();
            IntWritable mappingIndex = new IntWritable();
            while (reader.next((Writable)mappingIndex, (Writable)mappingValue)) {
                String s = mappingValue.toString();
                mapping.add(s);
            }
            HadoopUtil.delete(conf, mappingPath);
        } else {
            log.warn("generic input mapping file not found!");
        }
        Path clusteredPointsPath = new Path(answer, "clusteredPoints");
        Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
        int id = 0;
        for (Pair record : new SequenceFileIterable(inputPath, conf)) {
            if (!mapping.isEmpty()) {
                log.info("{}: {}", mapping.get(id++), (Object)((IntWritable)record.getFirst()).get());
                continue;
            }
            log.info("{}: {}", (Object)id++, (Object)((IntWritable)record.getFirst()).get());
        }
    }
}

