package net.bluemind.tika.server.impl;

import com.google.common.io.FileWriteMode;
import com.google.common.io.Files;
import io.vertx.core.AbstractVerticle;
import io.vertx.core.json.JsonObject;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:net/bluemind/tika/server/impl/ExtractTextWorker.class */
public final class ExtractTextWorker extends AbstractVerticle {
    private final AutoDetectParser adp;
    private final ParseContext parseContext;
    private static final Logger logger = LoggerFactory.getLogger(ExtractTextWorker.class);
    private static final AtomicLong extractions = new AtomicLong();

    public ExtractTextWorker() {
        logger.info("Created.");
        this.parseContext = new ParseContext();
        this.adp = new AutoDetectParser(new DefaultDetector());
    }

    public void start() {
        this.vertx.eventBus().consumer("tika.extract", message -> {
            String extractToCacheFile;
            try {
                JsonObject jsonObject = (JsonObject) message.body();
                String string = jsonObject.getString("hash");
                String string2 = jsonObject.getString("path");
                File ifPresent = HashCache.getIfPresent(string);
                if (ifPresent == null || !ifPresent.exists()) {
                    extractToCacheFile = extractToCacheFile(string, string2);
                } else {
                    try {
                        extractToCacheFile = Files.asCharSource(ifPresent, StandardCharsets.UTF_8).read();
                        if (logger.isDebugEnabled()) {
                            logger.debug("Used hashed value for {}", string2);
                        }
                    } catch (IOException e) {
                        logger.warn("problem with cached file, re-indexing: {}", e.getMessage());
                        extractToCacheFile = extractToCacheFile(string, string2);
                    }
                }
                if (extractions.incrementAndGet() % 100 == 0) {
                    logger.info("HASH cached stats: {}", HashCache.stats());
                }
                message.reply(extractToCacheFile);
            } catch (Exception | LinkageError e2) {
                message.fail(500, e2.getMessage());
            }
        });
    }

    private String extractToCacheFile(String str, String str2) {
        String extractText = extractText(str2);
        File file = new File(TikaDirectories.CACHED_TEXTS, str + ".txt");
        try {
            Files.asCharSink(file, StandardCharsets.UTF_8, new FileWriteMode[0]).write(extractText);
            HashCache.put(str, file);
            if (logger.isDebugEnabled()) {
                logger.debug("Cached {} characters in {}", Integer.valueOf(extractText.length()), file.getAbsolutePath());
            }
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }
        return extractText;
    }

    private String extractText(String str) {
        logger.info("Extracting text from {}...", str);
        try {
            return tikaExtract(str);
        } catch (Exception e) {
            logger.error("Failed to parse: " + e.getMessage(), e);
            return "";
        }
    }

    private String tikaExtract(String str) throws IOException, SAXException, TikaException {
        final StringBuilder sb = new StringBuilder(1048576);
        DefaultHandler defaultHandler = new DefaultHandler() { // from class: net.bluemind.tika.server.impl.ExtractTextWorker.1
            private int total;
            private boolean crAdded = false;

            @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
            public void startElement(String str2, String str3, String str4, Attributes attributes) throws SAXException {
                if (this.crAdded) {
                    return;
                }
                sb.append('\n');
                this.crAdded = true;
            }

            @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
            public void characters(char[] cArr, int i, int i2) throws SAXException {
                this.total += i2;
                if (this.total < 524288) {
                    sb.append(cArr, i, i2);
                    this.crAdded = false;
                }
            }
        };
        Metadata metadata = new Metadata();
        this.adp.parse(new FileInputStream(str), defaultHandler, metadata, this.parseContext);
        return sb.toString();
    }
}
