package model.index.impl;

import ai.grazie.nlp.stemmer.PorterStemmer;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import com.ibm.icu.text.PluralRules;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.SetsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.SequencesKt;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import nebula.core.model.ModelIncludeTagElement;
import org.jetbrains.annotations.NotNull;

/* compiled from: WrsTokenizer.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��(\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010\"\n\u0002\u0010\u000e\n\u0002\b\r\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010 \n\u0002\b\u0004\bÆ\u0002\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u000e\u0010\u0014\u001a\u00020\u00052\u0006\u0010\u0015\u001a\u00020\u0005J\u0010\u0010\u0016\u001a\u00020\u00052\u0006\u0010\u0017\u001a\u00020\u0005H\u0002J\u001c\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u00050\u00192\f\u0010\u001a\u001a\b\u0012\u0004\u0012\u00020\u00050\u0019H\u0002J\u0014\u0010\u001b\u001a\b\u0012\u0004\u0012\u00020\u00050\u00192\u0006\u0010\u0017\u001a\u00020\u0005J\u0014\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\u00050\u00192\u0006\u0010\u0017\u001a\u00020\u0005R\u0014\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0006\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0007\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\b\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\n\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0012\u001a\u00020\u0013X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u001d"}, d2 = {"Lmodel/index/impl/WrsTokenizer;", "", "()V", "ARTICLES", "", "", "AUXILIARY_VERBS", "DEMONSTRATIVE_PRONOUNS", "DETERMINERS", "INDEFINITE_PRONOUNS", "INTERROGATIVE_PRONOUNS", "MODAL_VERBS", "NEGATION", "PERSONAL_PRONOUNS", "POSSESSIVE_PRONOUNS", "PREPOSITIONS", "REFLEXIVE_PRONOUNS", "STOPWORDS", "stemmer", "Lai/grazie/nlp/stemmer/PorterStemmer;", "getTextFromUrl", "url", "normalizeText", "text", "stemTokens", "", "tokens", "tokenize", "tokenizeAndStem", "nebula"})
@SourceDebugExtension({"SMAP\nWrsTokenizer.kt\nKotlin\n*S Kotlin\n*F\n+ 1 WrsTokenizer.kt\nmodel/index/impl/WrsTokenizer\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,54:1\n1855#2,2:55\n*S KotlinDebug\n*F\n+ 1 WrsTokenizer.kt\nmodel/index/impl/WrsTokenizer\n*L\n50#1:55,2\n*E\n"})
/* loaded from: input_file:BOOT-INF/lib/nebula.jar:model/index/impl/WrsTokenizer.class */
public final class WrsTokenizer {

    @NotNull
    public static final WrsTokenizer INSTANCE = new WrsTokenizer();

    @NotNull
    private static final PorterStemmer stemmer = new PorterStemmer();

    @NotNull
    private static final Set<String> ARTICLES = SetsKt.setOf((Object[]) new String[]{"a", "an", "the"});

    @NotNull
    private static final Set<String> AUXILIARY_VERBS = SetsKt.setOf((Object[]) new String[]{"am", "are", "be", "is"});

    @NotNull
    private static final Set<String> MODAL_VERBS = SetsKt.setOf((Object[]) new String[]{"can", "could", "may", "might", "must", "shall", "should", "will", "would"});

    @NotNull
    private static final Set<String> PERSONAL_PRONOUNS = SetsKt.setOf((Object[]) new String[]{"he", "her", "him", "it", "me", "she", "them", "they", "you", "us", "we"});

    @NotNull
    private static final Set<String> DEMONSTRATIVE_PRONOUNS = SetsKt.setOf((Object[]) new String[]{"that", "these", "this", "those"});

    @NotNull
    private static final Set<String> POSSESSIVE_PRONOUNS = SetsKt.setOf((Object[]) new String[]{"hers", "his", "mine", "ours", "theirs", "yours"});

    @NotNull
    private static final Set<String> INTERROGATIVE_PRONOUNS = SetsKt.setOf((Object[]) new String[]{"what", "which", "who", "whom", "whose"});

    @NotNull
    private static final Set<String> REFLEXIVE_PRONOUNS = SetsKt.setOf((Object[]) new String[]{"herself", "himself", "itself", "myself", "ourself", "ourselves", "themself", "themselves", "yourself", "yourselves"});

    @NotNull
    private static final Set<String> INDEFINITE_PRONOUNS = SetsKt.setOf((Object[]) new String[]{"all", "another", "any", "anybody", "anyone", "anything", "both", "each", "either", "enough", "everybody", "everyone", "everything", PluralRules.KEYWORD_FEW, "fewer", "less", "little", PluralRules.KEYWORD_MANY, "more", "most", "much", "neither", "nobody", "none", "nothing", PluralRules.KEYWORD_ONE, PluralRules.KEYWORD_OTHER, "others", "several", "some", "somebody", "someone", "something", "such"});

    @NotNull
    private static final Set<String> PREPOSITIONS = SetsKt.setOf((Object[]) new String[]{"above", "across", "against", "along", "among", "around", "at", "before", "behind", "below", "beneath", "beside", "between", "by", "down", ModelIncludeTagElement.FROM, "in", "into", "near", "of", "off", "on", "to", "toward", "under", "upon", JsonPOJOBuilder.DEFAULT_WITH_PREFIX, "within"});

    @NotNull
    private static final Set<String> DETERMINERS = SetsKt.setOf((Object[]) new String[]{"each", "every", "her", "my", "our", "their", "your"});

    @NotNull
    private static final Set<String> NEGATION = SetsKt.setOf((Object[]) new String[]{"aren't", "can't", "cannot", "couldn't", "isn't", "mustn't", "no", "not", "shouldn't", "wouldn't"});

    @NotNull
    private static final Set<String> STOPWORDS = SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus(SetsKt.plus((Set) ARTICLES, (Iterable) AUXILIARY_VERBS), (Iterable) MODAL_VERBS), (Iterable) PERSONAL_PRONOUNS), (Iterable) DEMONSTRATIVE_PRONOUNS), (Iterable) POSSESSIVE_PRONOUNS), (Iterable) INTERROGATIVE_PRONOUNS), (Iterable) REFLEXIVE_PRONOUNS), (Iterable) INDEFINITE_PRONOUNS), (Iterable) PREPOSITIONS), (Iterable) DETERMINERS), (Iterable) NEGATION);

    private WrsTokenizer() {
    }

    @NotNull
    public final List<String> tokenize(@NotNull String text) {
        Intrinsics.checkNotNullParameter(text, "text");
        return SequencesKt.toList(SequencesKt.filterNot(SequencesKt.filterNot(SequencesKt.filterNot(SequencesKt.filter(SequencesKt.filter(CollectionsKt.asSequence(StringsKt.split$default((CharSequence) normalizeText(text), new String[]{" "}, false, 0, 6, (Object) null)), new Function1<String, Boolean>() { // from class: model.index.impl.WrsTokenizer$tokenize$1
            @Override // kotlin.jvm.functions.Function1
            @NotNull
            public final Boolean invoke(@NotNull String it2) {
                Intrinsics.checkNotNullParameter(it2, "it");
                return Boolean.valueOf(!StringsKt.isBlank(it2));
            }
        }), new Function1<String, Boolean>() { // from class: model.index.impl.WrsTokenizer$tokenize$2
            @Override // kotlin.jvm.functions.Function1
            @NotNull
            public final Boolean invoke(@NotNull String it2) {
                Intrinsics.checkNotNullParameter(it2, "it");
                return Boolean.valueOf(it2.length() > 1);
            }
        }), new Function1<String, Boolean>() { // from class: model.index.impl.WrsTokenizer$tokenize$3
            @Override // kotlin.jvm.functions.Function1
            @NotNull
            public final Boolean invoke(@NotNull String it2) {
                Set set;
                Intrinsics.checkNotNullParameter(it2, "it");
                set = WrsTokenizer.STOPWORDS;
                return Boolean.valueOf(set.contains(it2));
            }
        }), new Function1<String, Boolean>() { // from class: model.index.impl.WrsTokenizer$tokenize$4
            @Override // kotlin.jvm.functions.Function1
            @NotNull
            public final Boolean invoke(@NotNull String it2) {
                boolean z;
                Intrinsics.checkNotNullParameter(it2, "it");
                if (it2.length() == 8) {
                    if (new Regex("\\d").containsMatchIn(it2)) {
                        z = true;
                        return Boolean.valueOf(z);
                    }
                }
                z = false;
                return Boolean.valueOf(z);
            }
        }), new Function1<String, Boolean>() { // from class: model.index.impl.WrsTokenizer$tokenize$5
            @Override // kotlin.jvm.functions.Function1
            @NotNull
            public final Boolean invoke(@NotNull String it2) {
                Intrinsics.checkNotNullParameter(it2, "it");
                return Boolean.valueOf(StringsKt.startsWith$default(it2, "topicid", false, 2, (Object) null));
            }
        }));
    }

    @NotNull
    public final List<String> tokenizeAndStem(@NotNull String text) {
        Intrinsics.checkNotNullParameter(text, "text");
        return stemTokens(tokenize(text));
    }

    private final String normalizeText(String str) {
        String lowerCase = str.toLowerCase(Locale.ROOT);
        Intrinsics.checkNotNullExpressionValue(lowerCase, "toLowerCase(...)");
        return new Regex("[^\\p{L}\\p{M}*0-9']+").replace(lowerCase, " ");
    }

    @NotNull
    public final String getTextFromUrl(@NotNull String url) {
        Intrinsics.checkNotNullParameter(url, "url");
        return new Regex("\\.html#?").replace((CharSequence) CollectionsKt.last(StringsKt.split$default((CharSequence) url, new String[]{"/"}, false, 0, 6, (Object) null)), " ");
    }

    private final List<String> stemTokens(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<T> it2 = list.iterator();
        while (it2.hasNext()) {
            arrayList.add(stemmer.stem((String) it2.next()));
        }
        return arrayList;
    }
}
