/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.rules.common;

import ai.grazie.nlp.langs.Language;
import ai.grazie.nlp.patterns.Pattern;
import ai.grazie.nlp.utils.Symbols;
import ai.grazie.rules.common.Diacritics;
import ai.grazie.rules.common.WordSet;
import ai.grazie.rules.tree.Formatter;
import ai.grazie.rules.tree.Node;
import ai.grazie.rules.tree.TextRange;
import ai.grazie.rules.tree.Tree;
import ai.grazie.rules.tree.TreeCache;
import ai.grazie.rules.tree.TreeSupport;
import ai.grazie.rules.util.CharUtil;
import ai.grazie.rules.util.TransformingCharSequence;
import ai.grazie.rules.util.regex.Regex;
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie;
import java.io.Serializable;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.WeakHashMap;
import java.util.stream.Stream;
import one.util.streamex.EntryStream;
import one.util.streamex.StreamEx;
import org.apache.commons.lang3.StringUtils;
import org.jspecify.annotations.NonNull;
import org.jspecify.annotations.Nullable;
import org.languagetool.tools.StringTools;

public class KnownPhrases {
    public static final Set<Language> SUPPORTED_LANGUAGES = Set.of(Language.ENGLISH, Language.GERMAN, Language.RUSSIAN, Language.UKRAINIAN);
    static final String MULTI_WORD_NO_TYPOS = "multi-word-no-typos.txt";
    public final Language language;
    private final Map<String, List<Phrase>> knownPhrases = new LinkedHashMap<String, List<Phrase>>();
    private @Nullable AhoCorasickDoubleArrayTrie<Serializable> trie;
    private static final Map<Language, WeakReference<KnownPhrases>> instances = new WeakHashMap<Language, WeakReference<KnownPhrases>>();
    private final TreeCache<List<Match>> phraseCache = new TreeCache<List>("phrases", t -> this.validPhrases(t.text()));

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static KnownPhrases forLanguage(@NonNull Language language) {
        Map<Language, WeakReference<KnownPhrases>> map = instances;
        synchronized (map) {
            KnownPhrases result;
            WeakReference<KnownPhrases> ref = instances.get(language);
            KnownPhrases knownPhrases = result = ref == null ? null : (KnownPhrases)ref.get();
            if (result == null) {
                result = new KnownPhrases(language);
                instances.put(language, new WeakReference<KnownPhrases>(result));
            }
            return result;
        }
    }

    private KnownPhrases(Language language) {
        assert (SUPPORTED_LANGUAGES.contains(language)) : "Language " + String.valueOf(language) + " is not supported by KnownPhrases";
        this.language = language;
        for (String file : this.multiWordFiles()) {
            if (!WordSet.resourceExists(file)) continue;
            this.knownPhrases.put(file, KnownPhrases.readRegexFile(file));
        }
        HashMap<String, String> sourceInterner = new HashMap<String, String>();
        for (String file : List.of(this.diacriticsPath(), this.geoDiacriticsPath())) {
            if (!WordSet.resourceExists(file)) continue;
            ArrayList<Phrase> filePhrases = new ArrayList<Phrase>();
            this.knownPhrases.put(file, filePhrases);
            for (String line : WordSet.loadLines(file)) {
                KnownPhrases.checkApostrophes(line, line);
                String[] parts = line.split(";", 2);
                filePhrases.add(new Phrase(parts[0], sourceInterner.computeIfAbsent(parts[1], __ -> parts[1])));
            }
        }
        for (String file : Stream.of("international", language.getIso().toString()).map(s -> s + "/accepted_nosuggest.txt").toList()) {
            if (!WordSet.resourceExists(file)) continue;
            this.knownPhrases.put(file, KnownPhrases.readRegexFile(file));
        }
    }

    private static void checkApostrophes(String phrase, String line) {
        assert (!phrase.contains("'")) : "Phrases should contain smart apostrophes: '" + phrase + "' in line " + line;
    }

    private static List<Phrase> readRegexFile(String file) {
        ArrayList<Phrase> filePhrases = new ArrayList<Phrase>();
        for (String line : WordSet.loadLines(file)) {
            if (line.startsWith("#") || line.isBlank()) continue;
            Set<String> phrases = Regex.parse(line).possibleValues();
            assert (phrases != null) : "The entries in " + file + " should be finite enumerable regexes with not too many possible values. Couldn't extract values from: " + line;
            for (String phrase : (StreamEx)StreamEx.of(phrases).sorted(String.CASE_INSENSITIVE_ORDER)) {
                KnownPhrases.checkApostrophes(phrase, line);
                filePhrases.add(new Phrase(phrase, line));
            }
        }
        return filePhrases;
    }

    public List<Phrase> phrasesFromFile(String file) {
        return this.knownPhrases.getOrDefault(file, List.of());
    }

    public String geoDiacriticsPath() {
        return String.valueOf(this.language.getIso()) + "/geo_diacritics.txt";
    }

    public String diacriticsPath() {
        return String.valueOf(this.language.getIso()) + "/diacritics.txt";
    }

    List<String> multiWordFiles() {
        return StreamEx.of((Object[])new String[]{"international", this.language.getIso().toString()}).toFlatList(code -> List.of(code + "/multi-word-spelling.txt", code + "/multi-word-no-typos.txt"));
    }

    public boolean isPartOfValidPhrase(Node node) {
        return this.validPhrases(node.tree()).stream().anyMatch(tr -> tr.range().containsInclusive(node.textRange()));
    }

    public List<Match> validPhrases(Tree tree) {
        return tree.getCached(this.phraseCache);
    }

    public List<Match> validPhrases(CharSequence sentence) {
        StringBuilder clean = new StringBuilder(sentence.length());
        int[] cleanToSentence = new int[sentence.length() + 1];
        for (int i = 0; i < sentence.length(); ++i) {
            char norm = KnownPhrases.normalizeChar(sentence.charAt(i));
            if (norm == '\u0000' || norm == ' ' && !clean.isEmpty() && clean.charAt(clean.length() - 1) == ' ') continue;
            cleanToSentence[clean.length()] = i;
            clean.append(norm);
        }
        cleanToSentence[clean.length()] = sentence.length();
        AhoCorasickDoubleArrayTrie<Serializable> trie = this.obtainTrie();
        ArrayList<Match> result = new ArrayList<Match>();
        trie.parseText((CharSequence)TransformingCharSequence.lowerCase(Diacritics.removeDiacritics(clean.toString())), (start, end, entry) -> {
            if (Pattern.isWordBoundaryBefore((CharSequence)clean, (int)start) && (Pattern.isWordBoundaryBefore((CharSequence)clean, (int)end) || end < clean.length() && Symbols.INSTANCE.getApostrophes().contains(Character.valueOf(clean.charAt(end))))) {
                String[] stringArray;
                if (entry instanceof String) {
                    String s = (String)((Object)entry);
                    String[] stringArray2 = new String[1];
                    stringArray = stringArray2;
                    stringArray2[0] = s;
                } else {
                    stringArray = (String[])entry;
                }
                String[] candidates = stringArray;
                String actual = clean.substring(start, end);
                for (String candidate : candidates) {
                    if (KnownPhrases.isAllowed(actual, candidate, start, clean)) {
                        result.add(new Match(cleanToSentence[start], cleanToSentence[end], candidate, false));
                        continue;
                    }
                    if (!KnownPhrases.isAllowed(actual, Diacritics.removeDiacritics(candidate), start, clean)) continue;
                    result.add(new Match(cleanToSentence[start], cleanToSentence[end], candidate, true));
                }
            }
        });
        return result;
    }

    private static char normalizeChar(char c) {
        if (Character.getType(c) == 28) {
            return '\u0000';
        }
        return (char)(c == '\'' ? 8217 : (c == '\u2011' ? 45 : (CharUtil.isAnySpace(c) ? 32 : (int)c)));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private AhoCorasickDoubleArrayTrie<Serializable> obtainTrie() {
        Map<String, List<Phrase>> map = this.knownPhrases;
        synchronized (map) {
            AhoCorasickDoubleArrayTrie<Serializable> trie = this.trie;
            if (trie == null) {
                this.trie = trie = this.buildTrie();
            }
            return trie;
        }
    }

    public boolean isRangeCoveredByValidPhrase(CharSequence text, int start, int end) {
        return this.validPhrases(text).stream().anyMatch(tr -> tr.start() <= start && end <= tr.end());
    }

    private AhoCorasickDoubleArrayTrie<Serializable> buildTrie() {
        HashMap<String, List> trieMap = new HashMap<String, List>();
        for (List<Phrase> value : this.knownPhrases.values()) {
            for (Phrase p : value) {
                String phrase = p.phrase;
                trieMap.computeIfAbsent(TransformingCharSequence.lowerCase(Diacritics.removeDiacritics(phrase)).toString(), __ -> new ArrayList()).add(phrase);
            }
        }
        AhoCorasickDoubleArrayTrie trie = new AhoCorasickDoubleArrayTrie();
        trie.build((Map)EntryStream.of(trieMap).mapValues(v -> v.size() > 1 ? v.toArray(new String[0]) : (String[])v.getFirst()).toSortedMap());
        return trie;
    }

    private static boolean isAllowed(String actual, String expected, int start, CharSequence sentence) {
        if (expected.equals(actual)) {
            return true;
        }
        if (actual.equalsIgnoreCase(expected)) {
            if (KnownPhrases.isUpperCaseOnly(actual) || actual.matches("(Mc|Le)[A-Z]+")) {
                return true;
            }
            if (expected.split(" ")[0].chars().noneMatch(Character::isUpperCase)) {
                if ((TreeSupport.isCapitalizedSentenceStart(sentence, start) || Formatter.possiblyEndsWithSentenceBoundary(sentence.subSequence(0, start)) || start > 0 && CharUtil.isAnyOf("'\"\u201c\u201d\u201e\u00ab\u00bb`\u2018\u2019", sentence.charAt(start - 1))) && actual.equals(StringTools.uppercaseFirstChar((String)expected))) {
                    return true;
                }
                if (KnownPhrases.isAllCapitalized(actual) && (!StringUtils.isMixedCase((CharSequence)expected) || KnownPhrases.looksLikeAllCapitalizedHeader(sentence))) {
                    return true;
                }
            }
        }
        return false;
    }

    private static boolean looksLikeAllCapitalizedHeader(CharSequence sentence) {
        return KnownPhrases.isAllCapitalized(sentence) && !Formatter.possiblyEndsWithSentenceBoundary(sentence);
    }

    private static boolean isUpperCaseOnly(String phrase) {
        return phrase.chars().noneMatch(Character::isLowerCase);
    }

    private static boolean isAllCapitalized(CharSequence sentence) {
        for (int i = 0; i < sentence.length(); ++i) {
            if (!Character.isLowerCase(sentence.charAt(i)) || i != 0 && KnownPhrases.isWordChar(sentence.charAt(i - 1))) continue;
            return false;
        }
        return true;
    }

    private static boolean isWordChar(char c) {
        return Character.isLetterOrDigit(c) || c == '-' || c == '_';
    }

    public record Phrase(String phrase, String source) {
    }

    public record Match(int start, int end, String original, boolean strippedDiacritics) {
        public TextRange range() {
            return new TextRange(this.start, this.end);
        }
    }
}

