/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.preprocessing.text.TokenizerMode;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;

public class KerasTokenizer {
    private static final String DEFAULT_FILTER = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n";
    private static final String DEFAULT_SPLIT = " ";
    private Integer numWords;
    private String filters;
    private boolean lower;
    private String split;
    private boolean charLevel;
    private String outOfVocabularyToken;
    private Map<String, Integer> wordCounts = new HashMap<String, Integer>();
    private HashMap<String, Integer> wordDocs = new HashMap();
    private Map<String, Integer> wordIndex = new HashMap<String, Integer>();
    private Map<Integer, String> indexWord = new HashMap<Integer, String>();
    private Map<Integer, Integer> indexDocs = new HashMap<Integer, Integer>();
    private Integer documentCount;

    public KerasTokenizer(Integer numWords, String filters, boolean lower, String split, boolean charLevel, String outOfVocabularyToken) {
        this.numWords = numWords;
        this.filters = filters;
        this.lower = lower;
        this.split = split;
        this.charLevel = charLevel;
        this.outOfVocabularyToken = outOfVocabularyToken;
    }

    public KerasTokenizer(Integer numWords) {
        this(numWords, DEFAULT_FILTER, true, DEFAULT_SPLIT, false, null);
    }

    public KerasTokenizer() {
        this(null, DEFAULT_FILTER, true, DEFAULT_SPLIT, false, null);
    }

    public static KerasTokenizer fromJson(String jsonFileName) throws IOException, InvalidKerasConfigurationException {
        String json = new String(Files.readAllBytes(Paths.get(jsonFileName, new String[0])));
        Map<String, Object> tokenizerBaseConfig = KerasModelUtils.parseJsonString(json);
        if (!tokenizerBaseConfig.containsKey("config")) {
            throw new InvalidKerasConfigurationException("No configuration found for Keras tokenizer");
        }
        Map tokenizerConfig = (Map)tokenizerBaseConfig.get("config");
        int numWords = (Integer)tokenizerConfig.get("num_words");
        String filters = (String)tokenizerConfig.get("filters");
        Boolean lower = (Boolean)tokenizerConfig.get("lower");
        String split = (String)tokenizerConfig.get("split");
        Boolean charLevel = (Boolean)tokenizerConfig.get("char_level");
        String oovToken = (String)tokenizerConfig.get("oov_token");
        int documentCount = (Integer)tokenizerConfig.get("document_count");
        Map<String, Object> wordCounts = KerasModelUtils.parseJsonString((String)tokenizerConfig.get("word_counts"));
        Map<String, Object> wordDocs = KerasModelUtils.parseJsonString((String)tokenizerConfig.get("word_docs"));
        Map<String, Object> wordIndex = KerasModelUtils.parseJsonString((String)tokenizerConfig.get("word_index"));
        Map<String, Object> indexWord = KerasModelUtils.parseJsonString((String)tokenizerConfig.get("index_word"));
        Map<String, Object> indexDocs = KerasModelUtils.parseJsonString((String)tokenizerConfig.get("index_docs"));
        KerasTokenizer tokenizer = new KerasTokenizer(numWords, filters, lower, split, charLevel, oovToken);
        tokenizer.setDocumentCount(documentCount);
        tokenizer.setWordCounts(wordCounts);
        tokenizer.setWordDocs(new HashMap<String, Object>(wordDocs));
        tokenizer.setWordIndex(wordIndex);
        tokenizer.setIndexWord(indexWord);
        tokenizer.setIndexDocs(indexDocs);
        return tokenizer;
    }

    public static String[] textToWordSequence(String text, String filters, boolean lower, String split) {
        if (lower) {
            text = text.toLowerCase();
        }
        for (String filter : filters.split("")) {
            text = text.replace(filter, split);
        }
        String[] sequences = text.split(split);
        List<String> seqList = Arrays.asList(sequences);
        seqList.removeAll(Arrays.asList("", null));
        return seqList.toArray(new String[seqList.size()]);
    }

    public void fitOnTexts(String[] texts) {
        for (String text : texts) {
            String[] sequence;
            this.documentCount = this.documentCount == null ? Integer.valueOf(1) : Integer.valueOf(this.documentCount + 1);
            if (this.charLevel) {
                if (this.lower) {
                    text = text.toLowerCase();
                }
                sequence = text.split("");
            } else {
                sequence = KerasTokenizer.textToWordSequence(text, this.filters, this.lower, this.split);
            }
            for (String word : sequence) {
                if (this.wordCounts.containsKey(word)) {
                    this.wordCounts.put(word, this.wordCounts.get(word) + 1);
                    continue;
                }
                this.wordCounts.put(word, 1);
            }
            HashSet<String> sequenceSet = new HashSet<String>(Arrays.asList(sequence));
            for (String word : sequenceSet) {
                if (this.wordDocs.containsKey(word)) {
                    this.wordDocs.put(word, this.wordDocs.get(word) + 1);
                    continue;
                }
                this.wordDocs.put(word, 1);
            }
        }
        HashMap sortedWordCounts = KerasTokenizer.reverseSortByValues(this.wordDocs);
        ArrayList<String> sortedVocabulary = new ArrayList<String>();
        if (this.outOfVocabularyToken != null) {
            sortedVocabulary.add(this.outOfVocabularyToken);
        }
        for (String word : sortedWordCounts.keySet()) {
            sortedVocabulary.add(word);
        }
        for (int i = 0; i < sortedVocabulary.size(); ++i) {
            this.wordIndex.put((String)sortedVocabulary.get(i), i + 1);
        }
        for (String key : this.wordIndex.keySet()) {
            this.indexWord.put(this.wordIndex.get(key), key);
        }
        for (String key : this.wordDocs.keySet()) {
            this.indexDocs.put(this.wordIndex.get(key), this.wordDocs.get(key));
        }
    }

    private static HashMap reverseSortByValues(HashMap map) {
        LinkedList list = new LinkedList(map.entrySet());
        Collections.sort(list, new Comparator(){

            public int compare(Object o1, Object o2) {
                return ((Comparable)((Map.Entry)o2).getValue()).compareTo(((Map.Entry)o1).getValue());
            }
        });
        LinkedHashMap sortedHashMap = new LinkedHashMap();
        for (Map.Entry entry : list) {
            sortedHashMap.put(entry.getKey(), entry.getValue());
        }
        return sortedHashMap;
    }

    public void fitOnSequences(Integer[][] sequences) {
        this.documentCount = this.documentCount + 1;
        for (Integer[] sequence : sequences) {
            HashSet<Integer> sequenceSet = new HashSet<Integer>(Arrays.asList(sequence));
            for (Integer index : sequenceSet) {
                this.indexDocs.put(index, this.indexDocs.get(index) + 1);
            }
        }
    }

    public Integer[][] textsToSequences(String[] texts) {
        Integer oovTokenIndex = this.wordIndex.get(this.outOfVocabularyToken);
        ArrayList<Integer[]> sequences = new ArrayList<Integer[]>();
        for (String text : texts) {
            String[] wordSequence;
            if (this.charLevel) {
                if (this.lower) {
                    text = text.toLowerCase();
                }
                wordSequence = text.split("");
            } else {
                wordSequence = KerasTokenizer.textToWordSequence(text, this.filters, this.lower, this.split);
            }
            ArrayList<Integer> indexVector = new ArrayList<Integer>();
            for (String word : wordSequence) {
                if (this.wordIndex.containsKey(word)) {
                    int index = this.wordIndex.get(word);
                    if (this.numWords != null && index >= this.numWords) {
                        if (oovTokenIndex == null) continue;
                        indexVector.add(oovTokenIndex);
                        continue;
                    }
                    indexVector.add(index);
                    continue;
                }
                if (oovTokenIndex == null) continue;
                indexVector.add(oovTokenIndex);
            }
            Integer[] indices = indexVector.toArray(new Integer[indexVector.size()]);
            sequences.add(indices);
        }
        return (Integer[][])sequences.toArray((T[])new Integer[sequences.size()][]);
    }

    public String[] sequencesToTexts(Integer[][] sequences) {
        Integer oovTokenIndex = this.wordIndex.get(this.outOfVocabularyToken);
        ArrayList<String> texts = new ArrayList<String>();
        for (Integer[] sequence : sequences) {
            ArrayList<String> wordVector = new ArrayList<String>();
            for (Integer index : sequence) {
                if (this.indexWord.containsKey(index)) {
                    String word = this.indexWord.get(index);
                    if (this.numWords == null || index < this.numWords) continue;
                    if (oovTokenIndex != null) {
                        wordVector.add(this.indexWord.get(oovTokenIndex));
                        continue;
                    }
                    wordVector.add(word);
                    continue;
                }
                if (oovTokenIndex == null) continue;
                wordVector.add(this.indexWord.get(oovTokenIndex));
            }
            StringBuilder builder = new StringBuilder();
            for (String word : wordVector) {
                builder.append(word + this.split);
            }
            String text = builder.toString();
            texts.add(text);
        }
        return texts.toArray(new String[texts.size()]);
    }

    public INDArray textsToMatrix(String[] texts, TokenizerMode mode) {
        Integer[][] sequences = this.textsToSequences(texts);
        return this.sequencesToMatrix(sequences, mode);
    }

    public INDArray sequencesToMatrix(Integer[][] sequences, TokenizerMode mode) {
        if (this.numWords == null) {
            if (!this.wordIndex.isEmpty()) {
                this.numWords = this.wordIndex.size();
            } else {
                throw new IllegalArgumentException("Either specify numWords argumentor fit Tokenizer on data first, i.e. by using fitOnTexts");
            }
        }
        if (mode.equals((Object)TokenizerMode.TFIDF) && this.documentCount == null) {
            throw new IllegalArgumentException("To use TFIDF mode you need tofit the Tokenizer instance with fitOnTexts first.");
        }
        INDArray x = Nd4j.zeros((long)sequences.length, (long)this.numWords.intValue());
        for (int i = 0; i < sequences.length; ++i) {
            Integer[] sequence = sequences[i];
            if (sequence == null) continue;
            HashMap<Integer, Integer> counts = new HashMap<Integer, Integer>();
            Object object = sequence;
            int n = ((Integer[])object).length;
            for (int j = 0; j < n; ++j) {
                int j2 = object[j];
                if (j2 >= this.numWords) continue;
                if (counts.containsKey(j2)) {
                    counts.put(j2, (Integer)counts.get(j2) + 1);
                    continue;
                }
                counts.put(j2, 1);
            }
            object = counts.keySet().iterator();
            while (object.hasNext()) {
                int j = (Integer)object.next();
                int count = (Integer)counts.get(j);
                switch (mode) {
                    case COUNT: {
                        x.put(i, j, (Number)count);
                        break;
                    }
                    case FREQ: {
                        x.put(i, j, (Number)(count / sequence.length));
                        break;
                    }
                    case BINARY: {
                        x.put(i, j, (Number)1);
                        break;
                    }
                    case TFIDF: {
                        double tf = 1.0 + Math.log(count);
                        int index = this.indexDocs.containsKey(j) ? this.indexDocs.get(j) : 0;
                        double idf = Math.log(1.0 + (double)this.documentCount.intValue() / (1.0 + (double)index));
                        x.put(i, j, (Number)(tf * idf));
                    }
                }
            }
        }
        return x;
    }

    public Integer getNumWords() {
        return this.numWords;
    }

    public String getFilters() {
        return this.filters;
    }

    public boolean isLower() {
        return this.lower;
    }

    public String getSplit() {
        return this.split;
    }

    public boolean isCharLevel() {
        return this.charLevel;
    }

    public String getOutOfVocabularyToken() {
        return this.outOfVocabularyToken;
    }

    public Map<String, Integer> getWordCounts() {
        return this.wordCounts;
    }

    public HashMap<String, Integer> getWordDocs() {
        return this.wordDocs;
    }

    public Map<String, Integer> getWordIndex() {
        return this.wordIndex;
    }

    public Map<Integer, String> getIndexWord() {
        return this.indexWord;
    }

    public Map<Integer, Integer> getIndexDocs() {
        return this.indexDocs;
    }

    public Integer getDocumentCount() {
        return this.documentCount;
    }

    public void setNumWords(Integer numWords) {
        this.numWords = numWords;
    }

    public void setFilters(String filters) {
        this.filters = filters;
    }

    public void setLower(boolean lower) {
        this.lower = lower;
    }

    public void setSplit(String split) {
        this.split = split;
    }

    public void setCharLevel(boolean charLevel) {
        this.charLevel = charLevel;
    }

    public void setOutOfVocabularyToken(String outOfVocabularyToken) {
        this.outOfVocabularyToken = outOfVocabularyToken;
    }

    public void setWordCounts(Map<String, Integer> wordCounts) {
        this.wordCounts = wordCounts;
    }

    public void setWordDocs(HashMap<String, Integer> wordDocs) {
        this.wordDocs = wordDocs;
    }

    public void setWordIndex(Map<String, Integer> wordIndex) {
        this.wordIndex = wordIndex;
    }

    public void setIndexWord(Map<Integer, String> indexWord) {
        this.indexWord = indexWord;
    }

    public void setIndexDocs(Map<Integer, Integer> indexDocs) {
        this.indexDocs = indexDocs;
    }

    public void setDocumentCount(Integer documentCount) {
        this.documentCount = documentCount;
    }

    public boolean equals(Object o) {
        if (o == this) {
            return true;
        }
        if (!(o instanceof KerasTokenizer)) {
            return false;
        }
        KerasTokenizer other = (KerasTokenizer)o;
        if (!other.canEqual(this)) {
            return false;
        }
        Integer this$numWords = this.getNumWords();
        Integer other$numWords = other.getNumWords();
        if (this$numWords == null ? other$numWords != null : !((Object)this$numWords).equals(other$numWords)) {
            return false;
        }
        String this$filters = this.getFilters();
        String other$filters = other.getFilters();
        if (this$filters == null ? other$filters != null : !this$filters.equals(other$filters)) {
            return false;
        }
        if (this.isLower() != other.isLower()) {
            return false;
        }
        String this$split = this.getSplit();
        String other$split = other.getSplit();
        if (this$split == null ? other$split != null : !this$split.equals(other$split)) {
            return false;
        }
        if (this.isCharLevel() != other.isCharLevel()) {
            return false;
        }
        String this$outOfVocabularyToken = this.getOutOfVocabularyToken();
        String other$outOfVocabularyToken = other.getOutOfVocabularyToken();
        if (this$outOfVocabularyToken == null ? other$outOfVocabularyToken != null : !this$outOfVocabularyToken.equals(other$outOfVocabularyToken)) {
            return false;
        }
        Map<String, Integer> this$wordCounts = this.getWordCounts();
        Map<String, Integer> other$wordCounts = other.getWordCounts();
        if (this$wordCounts == null ? other$wordCounts != null : !((Object)this$wordCounts).equals(other$wordCounts)) {
            return false;
        }
        HashMap<String, Integer> this$wordDocs = this.getWordDocs();
        HashMap<String, Integer> other$wordDocs = other.getWordDocs();
        if (this$wordDocs == null ? other$wordDocs != null : !((Object)this$wordDocs).equals(other$wordDocs)) {
            return false;
        }
        Map<String, Integer> this$wordIndex = this.getWordIndex();
        Map<String, Integer> other$wordIndex = other.getWordIndex();
        if (this$wordIndex == null ? other$wordIndex != null : !((Object)this$wordIndex).equals(other$wordIndex)) {
            return false;
        }
        Map<Integer, String> this$indexWord = this.getIndexWord();
        Map<Integer, String> other$indexWord = other.getIndexWord();
        if (this$indexWord == null ? other$indexWord != null : !((Object)this$indexWord).equals(other$indexWord)) {
            return false;
        }
        Map<Integer, Integer> this$indexDocs = this.getIndexDocs();
        Map<Integer, Integer> other$indexDocs = other.getIndexDocs();
        if (this$indexDocs == null ? other$indexDocs != null : !((Object)this$indexDocs).equals(other$indexDocs)) {
            return false;
        }
        Integer this$documentCount = this.getDocumentCount();
        Integer other$documentCount = other.getDocumentCount();
        return !(this$documentCount == null ? other$documentCount != null : !((Object)this$documentCount).equals(other$documentCount));
    }

    protected boolean canEqual(Object other) {
        return other instanceof KerasTokenizer;
    }

    public int hashCode() {
        int PRIME = 59;
        int result = 1;
        Integer $numWords = this.getNumWords();
        result = result * 59 + ($numWords == null ? 43 : ((Object)$numWords).hashCode());
        String $filters = this.getFilters();
        result = result * 59 + ($filters == null ? 43 : $filters.hashCode());
        result = result * 59 + (this.isLower() ? 79 : 97);
        String $split = this.getSplit();
        result = result * 59 + ($split == null ? 43 : $split.hashCode());
        result = result * 59 + (this.isCharLevel() ? 79 : 97);
        String $outOfVocabularyToken = this.getOutOfVocabularyToken();
        result = result * 59 + ($outOfVocabularyToken == null ? 43 : $outOfVocabularyToken.hashCode());
        Map<String, Integer> $wordCounts = this.getWordCounts();
        result = result * 59 + ($wordCounts == null ? 43 : ((Object)$wordCounts).hashCode());
        HashMap<String, Integer> $wordDocs = this.getWordDocs();
        result = result * 59 + ($wordDocs == null ? 43 : ((Object)$wordDocs).hashCode());
        Map<String, Integer> $wordIndex = this.getWordIndex();
        result = result * 59 + ($wordIndex == null ? 43 : ((Object)$wordIndex).hashCode());
        Map<Integer, String> $indexWord = this.getIndexWord();
        result = result * 59 + ($indexWord == null ? 43 : ((Object)$indexWord).hashCode());
        Map<Integer, Integer> $indexDocs = this.getIndexDocs();
        result = result * 59 + ($indexDocs == null ? 43 : ((Object)$indexDocs).hashCode());
        Integer $documentCount = this.getDocumentCount();
        result = result * 59 + ($documentCount == null ? 43 : ((Object)$documentCount).hashCode());
        return result;
    }

    public String toString() {
        return "KerasTokenizer(numWords=" + this.getNumWords() + ", filters=" + this.getFilters() + ", lower=" + this.isLower() + ", split=" + this.getSplit() + ", charLevel=" + this.isCharLevel() + ", outOfVocabularyToken=" + this.getOutOfVocabularyToken() + ", wordCounts=" + this.getWordCounts() + ", wordDocs=" + this.getWordDocs() + ", wordIndex=" + this.getWordIndex() + ", indexWord=" + this.getIndexWord() + ", indexDocs=" + this.getIndexDocs() + ", documentCount=" + this.getDocumentCount() + ")";
    }
}

