package com.cybozu.labs.langdetect.util;

import com.tencent.matrix.trace.core.AppMethodBeat;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes2.dex */
public class NGram {
    public static final Pattern ALPHABET_WITH_DMARK;
    public static final String[] CJK_CLASS;
    public static final String DMARK_CLASS;
    public static final String LATIN1_EXCLUDED;
    public static final String[] NORMALIZED_VI_CHARS;
    public static final int N_GRAM = 3;
    public static final String TO_NORMALIZE_VI_CHARS;
    public static HashMap<Character, Character> cjk_map;
    public boolean capitalword_;
    public StringBuffer grams_;

    static {
        AppMethodBeat.in("䁠䧊઼洹婖㝼");
        LATIN1_EXCLUDED = Messages.getString("NGram.LATIN1_EXCLUDE");
        NORMALIZED_VI_CHARS = new String[]{Messages.getString("NORMALIZED_VI_CHARS_0300"), Messages.getString("NORMALIZED_VI_CHARS_0301"), Messages.getString("NORMALIZED_VI_CHARS_0303"), Messages.getString("NORMALIZED_VI_CHARS_0309"), Messages.getString("NORMALIZED_VI_CHARS_0323")};
        TO_NORMALIZE_VI_CHARS = Messages.getString("TO_NORMALIZE_VI_CHARS");
        DMARK_CLASS = Messages.getString("DMARK_CLASS");
        ALPHABET_WITH_DMARK = Pattern.compile("([" + TO_NORMALIZE_VI_CHARS + "])([" + DMARK_CLASS + "])");
        CJK_CLASS = new String[]{Messages.getString("NGram.KANJI_1_0"), Messages.getString("NGram.KANJI_1_2"), Messages.getString("NGram.KANJI_1_4"), Messages.getString("NGram.KANJI_1_8"), Messages.getString("NGram.KANJI_1_11"), Messages.getString("NGram.KANJI_1_12"), Messages.getString("NGram.KANJI_1_13"), Messages.getString("NGram.KANJI_1_14"), Messages.getString("NGram.KANJI_1_16"), Messages.getString("NGram.KANJI_1_18"), Messages.getString("NGram.KANJI_1_22"), Messages.getString("NGram.KANJI_1_27"), Messages.getString("NGram.KANJI_1_29"), Messages.getString("NGram.KANJI_1_31"), Messages.getString("NGram.KANJI_1_35"), Messages.getString("NGram.KANJI_2_0"), Messages.getString("NGram.KANJI_2_1"), Messages.getString("NGram.KANJI_2_4"), Messages.getString("NGram.KANJI_2_9"), Messages.getString("NGram.KANJI_2_10"), Messages.getString("NGram.KANJI_2_11"), Messages.getString("NGram.KANJI_2_12"), Messages.getString("NGram.KANJI_2_13"), Messages.getString("NGram.KANJI_2_15"), Messages.getString("NGram.KANJI_2_16"), Messages.getString("NGram.KANJI_2_18"), Messages.getString("NGram.KANJI_2_21"), Messages.getString("NGram.KANJI_2_22"), Messages.getString("NGram.KANJI_2_23"), Messages.getString("NGram.KANJI_2_28"), Messages.getString("NGram.KANJI_2_29"), Messages.getString("NGram.KANJI_2_30"), Messages.getString("NGram.KANJI_2_31"), Messages.getString("NGram.KANJI_2_32"), Messages.getString("NGram.KANJI_2_35"), Messages.getString("NGram.KANJI_2_36"), Messages.getString("NGram.KANJI_2_37"), Messages.getString("NGram.KANJI_2_38"), Messages.getString("NGram.KANJI_3_1"), Messages.getString("NGram.KANJI_3_2"), Messages.getString("NGram.KANJI_3_3"), Messages.getString("NGram.KANJI_3_4"), Messages.getString("NGram.KANJI_3_5"), Messages.getString("NGram.KANJI_3_8"), Messages.getString("NGram.KANJI_3_9"), Messages.getString("NGram.KANJI_3_11"), Messages.getString("NGram.KANJI_3_12"), Messages.getString("NGram.KANJI_3_13"), Messages.getString("NGram.KANJI_3_15"), Messages.getString("NGram.KANJI_3_16"), Messages.getString("NGram.KANJI_3_18"), Messages.getString("NGram.KANJI_3_19"), Messages.getString("NGram.KANJI_3_22"), Messages.getString("NGram.KANJI_3_23"), Messages.getString("NGram.KANJI_3_27"), Messages.getString("NGram.KANJI_3_29"), Messages.getString("NGram.KANJI_3_30"), Messages.getString("NGram.KANJI_3_31"), Messages.getString("NGram.KANJI_3_32"), Messages.getString("NGram.KANJI_3_35"), Messages.getString("NGram.KANJI_3_36"), Messages.getString("NGram.KANJI_3_37"), Messages.getString("NGram.KANJI_3_38"), Messages.getString("NGram.KANJI_4_0"), Messages.getString("NGram.KANJI_4_9"), Messages.getString("NGram.KANJI_4_10"), Messages.getString("NGram.KANJI_4_16"), Messages.getString("NGram.KANJI_4_17"), Messages.getString("NGram.KANJI_4_18"), Messages.getString("NGram.KANJI_4_22"), Messages.getString("NGram.KANJI_4_24"), Messages.getString("NGram.KANJI_4_28"), Messages.getString("NGram.KANJI_4_34"), Messages.getString("NGram.KANJI_4_39"), Messages.getString("NGram.KANJI_5_10"), Messages.getString("NGram.KANJI_5_11"), Messages.getString("NGram.KANJI_5_12"), Messages.getString("NGram.KANJI_5_13"), Messages.getString("NGram.KANJI_5_14"), Messages.getString("NGram.KANJI_5_18"), Messages.getString("NGram.KANJI_5_26"), Messages.getString("NGram.KANJI_5_29"), Messages.getString("NGram.KANJI_5_34"), Messages.getString("NGram.KANJI_5_39"), Messages.getString("NGram.KANJI_6_0"), Messages.getString("NGram.KANJI_6_3"), Messages.getString("NGram.KANJI_6_9"), Messages.getString("NGram.KANJI_6_10"), Messages.getString("NGram.KANJI_6_11"), Messages.getString("NGram.KANJI_6_12"), Messages.getString("NGram.KANJI_6_16"), Messages.getString("NGram.KANJI_6_18"), Messages.getString("NGram.KANJI_6_20"), Messages.getString("NGram.KANJI_6_21"), Messages.getString("NGram.KANJI_6_22"), Messages.getString("NGram.KANJI_6_23"), Messages.getString("NGram.KANJI_6_25"), Messages.getString("NGram.KANJI_6_28"), Messages.getString("NGram.KANJI_6_29"), Messages.getString("NGram.KANJI_6_30"), Messages.getString("NGram.KANJI_6_32"), Messages.getString("NGram.KANJI_6_34"), Messages.getString("NGram.KANJI_6_35"), Messages.getString("NGram.KANJI_6_37"), Messages.getString("NGram.KANJI_6_39"), Messages.getString("NGram.KANJI_7_0"), Messages.getString("NGram.KANJI_7_3"), Messages.getString("NGram.KANJI_7_6"), Messages.getString("NGram.KANJI_7_7"), Messages.getString("NGram.KANJI_7_9"), Messages.getString("NGram.KANJI_7_11"), Messages.getString("NGram.KANJI_7_12"), Messages.getString("NGram.KANJI_7_13"), Messages.getString("NGram.KANJI_7_16"), Messages.getString("NGram.KANJI_7_18"), Messages.getString("NGram.KANJI_7_19"), Messages.getString("NGram.KANJI_7_20"), Messages.getString("NGram.KANJI_7_21"), Messages.getString("NGram.KANJI_7_23"), Messages.getString("NGram.KANJI_7_25"), Messages.getString("NGram.KANJI_7_28"), Messages.getString("NGram.KANJI_7_29"), Messages.getString("NGram.KANJI_7_32"), Messages.getString("NGram.KANJI_7_33"), Messages.getString("NGram.KANJI_7_35"), Messages.getString("NGram.KANJI_7_37")};
        cjk_map = new HashMap<>();
        for (String str : CJK_CLASS) {
            char charAt = str.charAt(0);
            for (int i = 0; i < str.length(); i++) {
                cjk_map.put(Character.valueOf(str.charAt(i)), Character.valueOf(charAt));
            }
        }
        AppMethodBeat.out("䁠䧊઼洹婖㝼");
    }

    public NGram() {
        AppMethodBeat.in("䁠䧊઼滒妻");
        this.grams_ = new StringBuffer(" ");
        this.capitalword_ = false;
        AppMethodBeat.out("䁠䧊઼滒妻");
    }

    /* JADX WARN: Code restructure failed: missing block: B:10:0x001d, code lost:
    
        if (r4 <= 'z') goto L59;
     */
    /* JADX WARN: Code restructure failed: missing block: B:18:0x002d, code lost:
    
        if (com.cybozu.labs.langdetect.util.NGram.LATIN1_EXCLUDED.indexOf(r4) >= 0) goto L12;
     */
    /* JADX WARN: Code restructure failed: missing block: B:8:0x0019, code lost:
    
        if (r4 <= 'Z') goto L10;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static char normalize(char r4) {
        /*
            java.lang.String r0 = "䁠䧊઼䬊呙塧"
            com.tencent.matrix.trace.core.AppMethodBeat.in(r0)
            java.lang.Character$UnicodeBlock r1 = java.lang.Character.UnicodeBlock.of(r4)
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.BASIC_LATIN
            r3 = 32
            if (r1 != r2) goto L23
            r1 = 65
            if (r4 < r1) goto L1f
            r1 = 97
            if (r4 >= r1) goto L1b
            r1 = 90
            if (r4 > r1) goto L1f
        L1b:
            r1 = 122(0x7a, float:1.71E-43)
            if (r4 <= r1) goto L9e
        L1f:
            r4 = 32
            goto L9e
        L23:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.LATIN_1_SUPPLEMENT
            if (r1 != r2) goto L30
            java.lang.String r1 = com.cybozu.labs.langdetect.util.NGram.LATIN1_EXCLUDED
            int r1 = r1.indexOf(r4)
            if (r1 < 0) goto L9e
            goto L1f
        L30:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.LATIN_EXTENDED_B
            if (r1 != r2) goto L41
            r1 = 537(0x219, float:7.52E-43)
            if (r4 != r1) goto L3a
            r4 = 351(0x15f, float:4.92E-43)
        L3a:
            r1 = 539(0x21b, float:7.55E-43)
            if (r4 != r1) goto L9e
            r4 = 355(0x163, float:4.97E-43)
            goto L9e
        L41:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.GENERAL_PUNCTUATION
            if (r1 != r2) goto L46
            goto L1f
        L46:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.ARABIC
            if (r1 != r2) goto L51
            r1 = 1740(0x6cc, float:2.438E-42)
            if (r4 != r1) goto L9e
            r4 = 1610(0x64a, float:2.256E-42)
            goto L9e
        L51:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
            if (r1 != r2) goto L5c
            r1 = 7840(0x1ea0, float:1.0986E-41)
            if (r4 < r1) goto L9e
            r4 = 7875(0x1ec3, float:1.1035E-41)
            goto L9e
        L5c:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.HIRAGANA
            if (r1 != r2) goto L63
            r4 = 12354(0x3042, float:1.7312E-41)
            goto L9e
        L63:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.KATAKANA
            if (r1 != r2) goto L6a
            r4 = 12450(0x30a2, float:1.7446E-41)
            goto L9e
        L6a:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.BOPOMOFO
            if (r1 == r2) goto L9c
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.BOPOMOFO_EXTENDED
            if (r1 != r2) goto L73
            goto L9c
        L73:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
            if (r1 != r2) goto L94
            java.util.HashMap<java.lang.Character, java.lang.Character> r1 = com.cybozu.labs.langdetect.util.NGram.cjk_map
            java.lang.Character r2 = java.lang.Character.valueOf(r4)
            boolean r1 = r1.containsKey(r2)
            if (r1 == 0) goto L9e
            java.util.HashMap<java.lang.Character, java.lang.Character> r1 = com.cybozu.labs.langdetect.util.NGram.cjk_map
            java.lang.Character r4 = java.lang.Character.valueOf(r4)
            java.lang.Object r4 = r1.get(r4)
            java.lang.Character r4 = (java.lang.Character) r4
            char r4 = r4.charValue()
            goto L9e
        L94:
            java.lang.Character$UnicodeBlock r2 = java.lang.Character.UnicodeBlock.HANGUL_SYLLABLES
            if (r1 != r2) goto L9e
            r4 = 44032(0xac00, float:6.1702E-41)
            goto L9e
        L9c:
            r4 = 12549(0x3105, float:1.7585E-41)
        L9e:
            com.tencent.matrix.trace.core.AppMethodBeat.out(r0)
            return r4
        */
        throw new UnsupportedOperationException("Method not decompiled: com.cybozu.labs.langdetect.util.NGram.normalize(char):char");
    }

    public static String normalize_vi(String str) {
        AppMethodBeat.in("䁠䧊઼䬊呙塧涖");
        Matcher matcher = ALPHABET_WITH_DMARK.matcher(str);
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            int indexOf = TO_NORMALIZE_VI_CHARS.indexOf(matcher.group(1));
            matcher.appendReplacement(stringBuffer, NORMALIZED_VI_CHARS[DMARK_CLASS.indexOf(matcher.group(2))].substring(indexOf, indexOf + 1));
        }
        if (stringBuffer.length() == 0) {
            AppMethodBeat.out("䁠䧊઼䬊呙塧涖");
            return str;
        }
        matcher.appendTail(stringBuffer);
        String stringBuffer2 = stringBuffer.toString();
        AppMethodBeat.out("䁠䧊઼䬊呙塧涖");
        return stringBuffer2;
    }

    public void addChar(char c) {
        AppMethodBeat.in("䁤䧊઼ई☢⮜");
        char normalize = normalize(c);
        StringBuffer stringBuffer = this.grams_;
        char charAt = stringBuffer.charAt(stringBuffer.length() - 1);
        if (charAt == ' ') {
            this.grams_ = new StringBuffer(" ");
            this.capitalword_ = false;
            if (normalize == ' ') {
                AppMethodBeat.out("䁤䧊઼ई☢⮜");
                return;
            }
        } else if (this.grams_.length() >= 3) {
            this.grams_.deleteCharAt(0);
        }
        this.grams_.append(normalize);
        if (!Character.isUpperCase(normalize)) {
            this.capitalword_ = false;
        } else if (Character.isUpperCase(charAt)) {
            this.capitalword_ = true;
        }
        AppMethodBeat.out("䁤䧊઼ई☢⮜");
    }

    public String get(int i) {
        AppMethodBeat.in("䁠䧊઼㣭");
        if (this.capitalword_) {
            AppMethodBeat.out("䁠䧊઼㣭");
            return null;
        }
        int length = this.grams_.length();
        if (i < 1 || i > 3 || length < i) {
            AppMethodBeat.out("䁠䧊઼㣭");
            return null;
        }
        if (i != 1) {
            String substring = this.grams_.substring(length - i, length);
            AppMethodBeat.out("䁠䧊઼㣭");
            return substring;
        }
        char charAt = this.grams_.charAt(length - 1);
        if (charAt == ' ') {
            AppMethodBeat.out("䁠䧊઼㣭");
            return null;
        }
        String ch = Character.toString(charAt);
        AppMethodBeat.out("䁠䧊઼㣭");
        return ch;
    }
}
