package smile.nlp.tokenizer;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;

/* loaded from: input_file:BOOT-INF/lib/libarx-3.8.0.jar:smile/nlp/tokenizer/BreakIteratorTokenizer.class */
public class BreakIteratorTokenizer implements Tokenizer {
    private BreakIterator boundary;

    public BreakIteratorTokenizer() {
        this.boundary = BreakIterator.getWordInstance();
    }

    public BreakIteratorTokenizer(Locale locale) {
        this.boundary = BreakIterator.getWordInstance(locale);
    }

    @Override // smile.nlp.tokenizer.Tokenizer
    public String[] split(String str) {
        this.boundary.setText(str);
        ArrayList arrayList = new ArrayList();
        int first = this.boundary.first();
        int next = this.boundary.next();
        while (true) {
            int i = next;
            if (i == -1) {
                break;
            }
            String trim = str.substring(first, i).trim();
            if (!trim.isEmpty()) {
                arrayList.add(trim);
            }
            first = i;
            next = this.boundary.next();
        }
        String[] strArr = new String[arrayList.size()];
        for (int i2 = 0; i2 < strArr.length; i2++) {
            strArr[i2] = (String) arrayList.get(i2);
        }
        return strArr;
    }
}
