/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.tokenizer;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.HMMChineseTokenizer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.omegat.tokenizer.BaseTokenizer;
import org.omegat.tokenizer.Tokenizer;
import org.omegat.util.Token;

@Tokenizer(languages={"zh"}, isDefault=true)
public class LuceneSmartChineseTokenizer
extends BaseTokenizer {
    @Override
    public Token[] tokenizeVerbatim(String strOrig) {
        return this.tokenizeByCodePoint(strOrig);
    }

    @Override
    public String[] tokenizeVerbatimToStrings(String strOrig) {
        return this.tokenizeByCodePointToStrings(strOrig);
    }

    @Override
    protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) throws IOException {
        if (stemsAllowed) {
            SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(stopWordsAllowed);
            return analyzer.tokenStream("", (Reader)new StringReader(strOrig));
        }
        HMMChineseTokenizer tokenizer = new HMMChineseTokenizer();
        tokenizer.setReader((Reader)new StringReader(strOrig));
        return tokenizer;
    }
}

