package smile.nlp.pos;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import smile.math.Math;

/* loaded from: input_file:BOOT-INF/lib/libarx-3.8.0.jar:smile/nlp/pos/HMMPOSTagger.class */
public class HMMPOSTagger implements POSTagger, Serializable {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) HMMPOSTagger.class);
    private static final long serialVersionUID = 6600840654340610562L;
    private Map<String, Integer> symbol;
    private Map<String, Integer> suffix;
    private double[] pi;
    private double[][] a;
    private double[][] b;
    private double[][] c;
    private static HMMPOSTagger DEFAULT_TAGGER;

    public HMMPOSTagger() {
    }

    private HMMPOSTagger(Map<String, Integer> map, Map<String, Integer> map2, double[] dArr, double[][] dArr2, double[][] dArr3, double[][] dArr4) {
        if (dArr.length != PennTreebankPOS.values().length) {
            throw new IllegalArgumentException("The number of states is different from the size of Penn Treebank tagset.");
        }
        if (dArr2[0].length != PennTreebankPOS.values().length) {
            throw new IllegalArgumentException("Invlid state transition probability size.");
        }
        if (dArr3[0].length != map.size() + 1) {
            throw new IllegalArgumentException("Invlid symbol emission probability size.");
        }
        if (dArr4[0].length != map2.size()) {
            throw new IllegalArgumentException("Invlid symbol suffix emission probability size.");
        }
        this.pi = dArr;
        this.a = dArr2;
        this.b = dArr3;
        this.c = dArr4;
        this.symbol = map;
        this.suffix = map2;
    }

    public static HMMPOSTagger getDefault() {
        if (DEFAULT_TAGGER == null) {
            try {
                ObjectInputStream objectInputStream = new ObjectInputStream(HMMPOSTagger.class.getResourceAsStream("/smile/nlp/pos/hmmpostagger.model"));
                DEFAULT_TAGGER = (HMMPOSTagger) objectInputStream.readObject();
                objectInputStream.close();
            } catch (Exception e) {
                logger.error("Failed to load /smile/nlp/pos/hmmpostagger.model", (Throwable) e);
            }
        }
        return DEFAULT_TAGGER;
    }

    @Override // smile.nlp.pos.POSTagger
    public PennTreebankPOS[] tag(String[] strArr) {
        int[] viterbi = viterbi(strArr);
        int length = strArr.length;
        PennTreebankPOS[] pennTreebankPOSArr = new PennTreebankPOS[length];
        for (int i = 0; i < length; i++) {
            if (this.symbol.get(strArr[i]) == null) {
                pennTreebankPOSArr[i] = RegexPOSTagger.tag(strArr[i]);
            }
            if (pennTreebankPOSArr[i] == null) {
                pennTreebankPOSArr[i] = PennTreebankPOS.values()[viterbi[i]];
            }
        }
        return pennTreebankPOSArr;
    }

    private static double log(double d) {
        return d < 1.0E-300d ? -690.7755d : Math.log(d);
    }

    private int[] viterbi(String[] strArr) {
        int length = strArr.length;
        int[][] translate = translate(this.symbol, this.suffix, strArr);
        int[] iArr = new int[length];
        int length2 = this.pi.length;
        double[][] dArr = new double[length][length2];
        int[][] iArr2 = new int[length][length2];
        for (int i = 0; i < length2; i++) {
            if (translate[0][0] != 0 || translate[0][1] < 0) {
                dArr[0][i] = log(this.pi[i]) + log(this.b[i][translate[0][0]]);
            } else {
                dArr[0][i] = log(this.pi[i]) + log(this.c[i][translate[0][1]]);
            }
        }
        for (int i2 = 1; i2 < length; i2++) {
            for (int i3 = 0; i3 < length2; i3++) {
                double d = Double.NEGATIVE_INFINITY;
                int i4 = -1;
                for (int i5 = 0; i5 < length2; i5++) {
                    double log = dArr[i2 - 1][i5] + log(this.a[i5][i3]);
                    if (d < log) {
                        d = log;
                        i4 = i5;
                    }
                }
                if (translate[i2][0] != 0 || translate[i2][1] < 0) {
                    dArr[i2][i3] = d + log(this.b[i3][translate[i2][0]]);
                } else {
                    dArr[i2][i3] = d + log(this.c[i3][translate[i2][1]]);
                }
                iArr2[i2][i3] = i4;
            }
        }
        int length3 = translate.length - 1;
        double d2 = Double.NEGATIVE_INFINITY;
        for (int i6 = 0; i6 < length2; i6++) {
            if (d2 < dArr[length3][i6]) {
                d2 = dArr[length3][i6];
                iArr[length3] = i6;
            }
        }
        int i7 = length3;
        while (true) {
            int i8 = i7;
            i7--;
            if (i8 <= 0) {
                return iArr;
            }
            iArr[i7] = iArr2[i7 + 1][iArr[i7 + 1]];
        }
    }

    private static int[][] translate(Map<String, Integer> map, Map<String, Integer> map2, String[] strArr) {
        int[][] iArr = new int[strArr.length][2];
        for (int i = 0; i < strArr.length; i++) {
            Integer num = map.get(strArr[i]);
            if (num != null) {
                iArr[i][0] = num.intValue();
            } else {
                iArr[i][0] = 0;
            }
            Integer num2 = strArr[i].length() > 2 ? map2.get(strArr[i].substring(strArr[i].length() - 2)) : null;
            if (num2 != null) {
                iArr[i][1] = num2.intValue();
            } else {
                iArr[i][1] = -1;
            }
        }
        return iArr;
    }

    private static int[] translate(PennTreebankPOS[] pennTreebankPOSArr) {
        int[] iArr = new int[pennTreebankPOSArr.length];
        for (int i = 0; i < pennTreebankPOSArr.length; i++) {
            iArr[i] = pennTreebankPOSArr[i].ordinal();
        }
        return iArr;
    }

    public static HMMPOSTagger learn(String[][] strArr, PennTreebankPOS[][] pennTreebankPOSArr) {
        int i = 1;
        int i2 = 0;
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (int i3 = 0; i3 < strArr.length; i3++) {
            for (int i4 = 0; i4 < strArr[i3].length; i4++) {
                if (((Integer) hashMap.get(strArr[i3][i4])) == null) {
                    int i5 = i;
                    i++;
                    hashMap.put(strArr[i3][i4], Integer.valueOf(i5));
                }
                if (strArr[i3][i4].length() > 2) {
                    String substring = strArr[i3][i4].substring(strArr[i3][i4].length() - 2);
                    if (((Integer) hashMap2.get(substring)) == null) {
                        int i6 = i2;
                        i2++;
                        hashMap2.put(substring, Integer.valueOf(i6));
                    }
                }
            }
        }
        int length = PennTreebankPOS.values().length;
        double[] dArr = new double[length];
        double[][] dArr2 = new double[length][length];
        double[][] dArr3 = new double[length][hashMap.size() + 1];
        double[][] dArr4 = new double[length][hashMap2.size()];
        PennTreebankPOS[] values = PennTreebankPOS.values();
        for (int i7 = 0; i7 < length; i7++) {
            if (values[i7].open) {
                dArr3[i7][0] = 1.0d;
            }
        }
        for (int i8 = 0; i8 < strArr.length; i8++) {
            int[] translate = translate(pennTreebankPOSArr[i8]);
            int[][] translate2 = translate(hashMap, hashMap2, strArr[i8]);
            int i9 = translate[0];
            dArr[i9] = dArr[i9] + 1.0d;
            double[] dArr5 = dArr3[translate[0]];
            int i10 = translate2[0][0];
            dArr5[i10] = dArr5[i10] + 1.0d;
            if (translate2[0][1] >= 0) {
                double[] dArr6 = dArr4[translate[0]];
                int i11 = translate2[0][1];
                dArr6[i11] = dArr6[i11] + 1.0d;
            }
            for (int i12 = 1; i12 < translate2.length; i12++) {
                double[] dArr7 = dArr2[translate[i12 - 1]];
                int i13 = translate[i12];
                dArr7[i13] = dArr7[i13] + 1.0d;
                double[] dArr8 = dArr3[translate[i12]];
                int i14 = translate2[i12][0];
                dArr8[i14] = dArr8[i14] + 1.0d;
                if (translate2[i12][1] >= 0) {
                    double[] dArr9 = dArr4[translate[i12]];
                    int i15 = translate2[i12][1];
                    dArr9[i15] = dArr9[i15] + 1.0d;
                }
            }
        }
        Math.unitize1(dArr);
        for (int i16 = 0; i16 < length; i16++) {
            Math.unitize1(dArr2[i16]);
            Math.unitize1(dArr3[i16]);
            Math.unitize1(dArr4[i16]);
        }
        return new HMMPOSTagger(hashMap, hashMap2, dArr, dArr2, dArr3, dArr4);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void load(String str, List<String[]> list, List<PennTreebankPOS[]> list2) {
        ArrayList<File> arrayList = new ArrayList();
        walkin(new File(str), arrayList);
        for (File file : arrayList) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String trim = readLine.trim();
                    if (trim.isEmpty()) {
                        if (!arrayList2.isEmpty()) {
                            list.add(arrayList2.toArray(new String[arrayList2.size()]));
                            list2.add(arrayList3.toArray(new PennTreebankPOS[arrayList3.size()]));
                            arrayList2.clear();
                            arrayList3.clear();
                        }
                    } else if (!trim.startsWith("===") && !trim.startsWith("*x*")) {
                        for (String str2 : trim.split("\\s")) {
                            String[] split = str2.split("/");
                            if (split.length == 2) {
                                arrayList2.add(split[0]);
                                int indexOf = split[1].indexOf(124);
                                String substring = indexOf == -1 ? split[1] : split[1].substring(0, indexOf);
                                if (substring.equals("PRP$R")) {
                                    substring = "PRP$";
                                }
                                if (substring.equals("JJSS")) {
                                    substring = "JJS";
                                }
                                arrayList3.add(PennTreebankPOS.getValue(substring));
                            }
                        }
                    }
                }
                if (!arrayList2.isEmpty()) {
                    list.add(arrayList2.toArray(new String[arrayList2.size()]));
                    list2.add(arrayList3.toArray(new PennTreebankPOS[arrayList3.size()]));
                    arrayList2.clear();
                    arrayList3.clear();
                }
                bufferedReader.close();
            } catch (Exception e) {
                logger.error("Failed to load training data {}", file, e);
            }
        }
    }

    public static void walkin(File file, List<File> list) {
        File[] listFiles = file.listFiles();
        if (listFiles != null) {
            for (File file2 : listFiles) {
                if (file2.isDirectory()) {
                    walkin(file2, list);
                } else if (file2.getName().endsWith(".POS")) {
                    list.add(file2);
                }
            }
        }
    }

    public static void main(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        load("D:\\sourceforge\\corpora\\PennTreebank\\PennTreebank2\\TAGGED\\POS\\WSJ", arrayList, arrayList2);
        load("D:\\sourceforge\\corpora\\PennTreebank\\PennTreebank2\\TAGGED\\POS\\BROWN", arrayList, arrayList2);
        HMMPOSTagger learn = learn((String[][]) arrayList.toArray(new String[arrayList.size()]), (PennTreebankPOS[][]) arrayList2.toArray(new PennTreebankPOS[arrayList2.size()]));
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream("hmmpostagger.model"));
            objectOutputStream.writeObject(learn);
            objectOutputStream.flush();
            objectOutputStream.close();
        } catch (Exception e) {
            logger.error("Failed to save HMM POS model", (Throwable) e);
        }
    }
}
