package org.cogroo.formats.ad;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.formats.ad.PortugueseContractionUtility;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import org.cogroo.ContractionUtility;

/* loaded from: input_file:org/cogroo/formats/ad/ADContractionNameSampleStream.class */
public class ADContractionNameSampleStream implements ObjectStream<NameSample> {
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private String leftContractionPart = null;
    private static final Pattern underlinePattern = Pattern.compile("[_]+");
    private Set<String> tags;

    public ADContractionNameSampleStream(ObjectStream<String> objectStream, Set<String> set) {
        this.adSentenceStream = new ADSentenceStream(objectStream);
        this.tags = set;
    }

    public ADContractionNameSampleStream(InputStreamFactory inputStreamFactory, String str, Set<String> set) throws IOException {
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(inputStreamFactory, str));
            this.tags = set;
        } catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    /* renamed from: read, reason: merged with bridge method [inline-methods] */
    public NameSample m9read() throws IOException {
        ADSentenceStream.Sentence sentence = (ADSentenceStream.Sentence) this.adSentenceStream.read();
        if (sentence == null) {
            return null;
        }
        ADSentenceStream.SentenceParser.Node root = sentence.getRoot();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        process(root, arrayList, arrayList2);
        return new NameSample((String[]) arrayList.toArray(new String[arrayList.size()]), (Span[]) arrayList2.toArray(new Span[arrayList2.size()]), true);
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> list, List<Span> list2) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement treeElement : node.getElements()) {
                if (treeElement.isLeaf()) {
                    processLeaf((ADSentenceStream.SentenceParser.Leaf) treeElement, list, list2);
                } else {
                    process((ADSentenceStream.SentenceParser.Node) treeElement, list, list2);
                }
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> list, List<Span> list2) {
        if (leaf != null && this.leftContractionPart == null) {
            String secondaryTag = leaf.getSecondaryTag();
            if (secondaryTag == null || !secondaryTag.contains("<sam->")) {
                list.addAll(Arrays.asList(leaf.getLexeme()));
                return;
            }
            String[] split = underlinePattern.split(leaf.getLexeme());
            if (split.length > 1) {
                for (int i = 0; i < split.length - 1; i++) {
                    list.add(split[i]);
                    if (ContractionUtility.expand(split[i]) != null) {
                        int size = list.size();
                        Span span = new Span(size - 1, size, "default");
                        list2.add(span);
                        System.out.println(Arrays.toString(Span.spansToStrings(new Span[]{span}, (String[]) list.toArray(new String[list.size()]))));
                    }
                }
            }
            this.leftContractionPart = split[split.length - 1];
            return;
        }
        String secondaryTag2 = leaf.getSecondaryTag();
        String lexeme = leaf.getLexeme();
        if (secondaryTag2 == null || !secondaryTag2.contains("<-sam>")) {
            System.err.println("unmatch" + this.leftContractionPart + " + " + lexeme);
        } else {
            String[] split2 = underlinePattern.split(leaf.getLexeme());
            if (split2 != null) {
                String contraction = PortugueseContractionUtility.toContraction(this.leftContractionPart, split2[0]);
                if (contraction != null) {
                    list.add(contraction);
                    list2.add(new Span(list.size() - 1, list.size(), "default"));
                }
                for (int i2 = 1; i2 < split2.length; i2++) {
                    list.add(split2[i2]);
                }
            } else {
                String lexeme2 = leaf.getLexeme();
                String contraction2 = PortugueseContractionUtility.toContraction(this.leftContractionPart, lexeme2);
                if (contraction2 != null) {
                    list.add(contraction2);
                    list2.add(new Span(list.size() - 1, list.size(), "default"));
                } else {
                    System.err.println("missing " + this.leftContractionPart + " + " + lexeme2);
                    list.add(this.leftContractionPart);
                    list.add(lexeme2);
                }
            }
        }
        this.leftContractionPart = null;
    }

    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    public void close() throws IOException {
        this.adSentenceStream.close();
    }
}
