package org.nuxeo.ecm.platform.semanticentities.service;

import com.google.common.collect.MapMaker;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.StringReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.nuxeo.common.utils.StringUtils;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.ClientException;
import org.nuxeo.ecm.core.api.CoreSession;
import org.nuxeo.ecm.core.api.DocumentLocation;
import org.nuxeo.ecm.core.api.DocumentModel;
import org.nuxeo.ecm.core.api.DocumentRef;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
import org.nuxeo.ecm.core.api.impl.DocumentLocationImpl;
import org.nuxeo.ecm.core.api.impl.blob.StreamingBlob;
import org.nuxeo.ecm.core.api.model.PropertyException;
import org.nuxeo.ecm.core.api.pathsegment.PathSegmentService;
import org.nuxeo.ecm.core.convert.api.ConversionService;
import org.nuxeo.ecm.core.schema.SchemaManager;
import org.nuxeo.ecm.core.utils.BlobsExtractor;
import org.nuxeo.ecm.platform.semanticentities.AnalysisTask;
import org.nuxeo.ecm.platform.semanticentities.DereferencingException;
import org.nuxeo.ecm.platform.semanticentities.EntitySuggestion;
import org.nuxeo.ecm.platform.semanticentities.LocalEntityService;
import org.nuxeo.ecm.platform.semanticentities.ProgressStatus;
import org.nuxeo.ecm.platform.semanticentities.RemoteEntityService;
import org.nuxeo.ecm.platform.semanticentities.RemoteEntitySource;
import org.nuxeo.ecm.platform.semanticentities.SemanticAnalysisService;
import org.nuxeo.ecm.platform.semanticentities.SerializationTask;
import org.nuxeo.ecm.platform.semanticentities.adapter.OccurrenceGroup;
import org.nuxeo.ecm.platform.semanticentities.adapter.OccurrenceInfo;
import org.nuxeo.runtime.api.Framework;
import org.nuxeo.runtime.model.ComponentContext;
import org.nuxeo.runtime.model.DefaultComponent;

/* loaded from: input_file:org/nuxeo/ecm/platform/semanticentities/service/SemanticAnalysisServiceImpl.class */
public class SemanticAnalysisServiceImpl extends DefaultComponent implements SemanticAnalysisService {
    private static final String ANY2TEXT = "any2text";
    protected static final String DEFAULT_STANBOL_URL = "https://stanbol.demo.nuxeo.com/";
    protected static final String STANBOL_URL_PROPERTY = "org.nuxeo.ecm.platform.semanticentities.stanbolUrl";
    protected static final String DEFAULT_ENGINE_OUTPUT_FORMAT = "application/rdf+xml";
    protected HttpClient httpClient;
    protected ConversionService conversionService;
    protected LocalEntityService leService;
    protected PathSegmentService pathService;
    protected SchemaManager schemaManager;
    protected BlockingQueue<Runnable> analysisTaskQueue;
    protected ThreadPoolExecutor analysisExecutor;
    protected BlockingQueue<Runnable> serializationTaskQueue;
    protected ThreadPoolExecutor serializationExecutor;
    protected RemoteEntityService reService;
    private static final Log log = LogFactory.getLog(SemanticAnalysisServiceImpl.class);
    protected static final Map<String, String> localTypes = new HashMap();
    Pattern INVALID_XML_CHARS = Pattern.compile("[^\\u0009\\u000A\\u000D\\u0020-\\uD7FF\\uE000-\\uFFFD��-��]");
    protected final Map<DocumentLocation, String> states = new MapMaker().concurrencyLevel(10).expiration(30, TimeUnit.MINUTES).makeMap();
    protected String outputFormat = DEFAULT_ENGINE_OUTPUT_FORMAT;
    protected boolean linkToUnrecognizedEntities = true;
    protected boolean linkToAmbiguousEntities = true;
    protected boolean linkShortPersonNames = false;
    protected boolean prefetchSuggestion = true;
    protected String engineURL = null;
    protected boolean active = false;

    /* loaded from: input_file:org/nuxeo/ecm/platform/semanticentities/service/SemanticAnalysisServiceImpl$NamedThreadFactory.class */
    public static class NamedThreadFactory implements ThreadFactory {
        private static final AtomicInteger poolNumber = new AtomicInteger();
        private final AtomicInteger threadNumber = new AtomicInteger();
        private final ThreadGroup group;
        private final String namePrefix;

        public NamedThreadFactory(String str) {
            SecurityManager securityManager = System.getSecurityManager();
            this.group = securityManager == null ? Thread.currentThread().getThreadGroup() : securityManager.getThreadGroup();
            this.namePrefix = str + ' ' + poolNumber.incrementAndGet() + '-';
        }

        @Override // java.util.concurrent.ThreadFactory
        public Thread newThread(Runnable runnable) {
            Thread thread = new Thread(this.group, runnable, this.namePrefix + this.threadNumber.incrementAndGet());
            thread.setDaemon(true);
            thread.setPriority(5);
            return thread;
        }
    }

    public void activate(ComponentContext componentContext) throws Exception {
        super.activate(componentContext);
        this.conversionService = (ConversionService) Framework.getService(ConversionService.class);
        this.leService = (LocalEntityService) Framework.getService(LocalEntityService.class);
        this.pathService = (PathSegmentService) Framework.getService(PathSegmentService.class);
        this.schemaManager = (SchemaManager) Framework.getService(SchemaManager.class);
        initHttpClient();
        NamedThreadFactory namedThreadFactory = new NamedThreadFactory("Nuxeo Async Semantic Analysis");
        this.analysisTaskQueue = new LinkedBlockingQueue();
        this.analysisExecutor = new ThreadPoolExecutor(4, 8, 5L, TimeUnit.MINUTES, this.analysisTaskQueue, namedThreadFactory);
        NamedThreadFactory namedThreadFactory2 = new NamedThreadFactory("Nuxeo Async Semantic Link Serialization");
        this.serializationTaskQueue = new LinkedBlockingQueue();
        this.serializationExecutor = new ThreadPoolExecutor(1, 1, 5L, TimeUnit.MINUTES, this.serializationTaskQueue, namedThreadFactory2);
        this.active = true;
    }

    public void deactivate(ComponentContext componentContext) throws Exception {
        this.active = false;
        this.analysisTaskQueue.clear();
        this.serializationTaskQueue.clear();
        this.analysisExecutor.shutdownNow();
        this.serializationExecutor.shutdownNow();
    }

    public void scheduleSerializationTask(SerializationTask serializationTask) {
        this.states.put(serializationTask.getDocumentLocation(), "status.semantic.linkingQueued");
        do {
        } while (this.serializationTaskQueue.remove(serializationTask));
        this.serializationExecutor.execute(serializationTask);
    }

    protected void initHttpClient() {
        SchemeRegistry schemeRegistry = new SchemeRegistry();
        schemeRegistry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
        schemeRegistry.register(new Scheme("https", SSLSocketFactory.getSocketFactory(), 443));
        BasicHttpParams basicHttpParams = new BasicHttpParams();
        this.httpClient = new DefaultHttpClient(new ThreadSafeClientConnManager(basicHttpParams, schemeRegistry), basicHttpParams);
    }

    protected boolean shouldSkip(DocumentModel documentModel) throws PropertyException, ClientException {
        if (this.schemaManager.getDocumentTypeNamesExtending("Entity").contains(documentModel.getType()) || this.schemaManager.getDocumentTypeNamesExtending("Occurrence").contains(documentModel.getType())) {
            return true;
        }
        String str = (String) documentModel.getProperty("dc:language").getValue(String.class);
        return (str == null || str.isEmpty() || "en".equalsIgnoreCase(str) || "english".equalsIgnoreCase(str)) ? false : true;
    }

    public void launchAnalysis(String str, DocumentRef documentRef) throws ClientException {
        Runnable analysisTask = new AnalysisTask(str, documentRef, this);
        if (this.analysisTaskQueue.contains(analysisTask)) {
            return;
        }
        this.states.put(new DocumentLocationImpl(str, documentRef), "status.semantic.analysisQueued");
        this.analysisExecutor.execute(analysisTask);
    }

    public void launchSynchronousAnalysis(DocumentModel documentModel, CoreSession coreSession) throws ClientException, IOException {
        if (shouldSkip(documentModel)) {
            return;
        }
        try {
            this.states.put(new DocumentLocationImpl(documentModel.getRepositoryName(), documentModel.getRef()), "status.semantic.analysisPending");
            createLinks(documentModel, coreSession, analyze(coreSession, extractText(documentModel)));
            this.states.remove(new DocumentLocationImpl(documentModel.getRepositoryName(), documentModel.getRef()));
        } catch (Throwable th) {
            this.states.remove(new DocumentLocationImpl(documentModel.getRepositoryName(), documentModel.getRef()));
            throw th;
        }
    }

    public void createLinks(DocumentModel documentModel, CoreSession coreSession, List<OccurrenceGroup> list) throws ClientException, IOException {
        if (list.isEmpty()) {
            return;
        }
        this.states.put(new DocumentLocationImpl(documentModel.getRepositoryName(), documentModel.getRef()), "status.semantic.linkingPending");
        DocumentModel entityContainer = this.leService.getEntityContainer(coreSession);
        for (OccurrenceGroup occurrenceGroup : list) {
            if (this.linkShortPersonNames || !"Person".equals(occurrenceGroup.type) || occurrenceGroup.name.trim().split(" ").length > 1) {
                List suggestEntity = this.leService.suggestEntity(coreSession, occurrenceGroup, 3);
                if (suggestEntity.isEmpty() && this.linkToUnrecognizedEntities) {
                    DocumentModel createDocumentModel = coreSession.createDocumentModel(occurrenceGroup.type);
                    createDocumentModel.setPropertyValue("dc:title", occurrenceGroup.name);
                    createDocumentModel.setPropertyValue("entity:automaticallyCreated", true);
                    createDocumentModel.setPathInfo(entityContainer.getPathAsString(), this.pathService.generatePathSegment(createDocumentModel));
                    DocumentModel createDocument = coreSession.createDocument(createDocumentModel);
                    coreSession.save();
                    this.leService.addOccurrences(coreSession, documentModel.getRef(), createDocument.getRef(), occurrenceGroup.occurrences);
                } else if (suggestEntity.size() <= 1 || this.linkToAmbiguousEntities) {
                    this.leService.addOccurrences(coreSession, documentModel.getRef(), ((EntitySuggestion) suggestEntity.get(0)).withAutomaticallyCreated(true), occurrenceGroup.occurrences);
                }
            }
        }
    }

    public List<OccurrenceGroup> findStanbolEntityOccurrences(CoreSession coreSession, Model model) throws DereferencingException, ClientException {
        Statement property;
        OccurrenceInfo occurrenceInfo;
        EntitySuggestion entitySuggestion;
        Property property2 = model.getProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
        Property property3 = model.getProperty("http://purl.org/dc/terms/type");
        Property property4 = model.getProperty("http://purl.org/dc/terms/relation");
        ResIterator listSubjectsWithProperty = model.listSubjectsWithProperty(property2, model.getResource("http://fise.iks-project.eu/ontology/TextAnnotation"));
        ArrayList arrayList = new ArrayList();
        while (listSubjectsWithProperty.hasNext()) {
            Resource nextResource = listSubjectsWithProperty.nextResource();
            if (!model.listObjectsOfProperty(nextResource, property4).hasNext() && (property = nextResource.getProperty(property3)) != null && property.getObject().isURIResource()) {
                String str = localTypes.get(property.getObject().as(Resource.class).getURI());
                if (str != null && (occurrenceInfo = getOccurrenceInfo(model, nextResource)) != null) {
                    OccurrenceGroup occurrenceGroup = new OccurrenceGroup(occurrenceInfo.mention, str);
                    occurrenceGroup.occurrences.add(occurrenceInfo);
                    ResIterator listSubjectsWithProperty2 = model.listSubjectsWithProperty(property4, nextResource);
                    while (listSubjectsWithProperty2.hasNext()) {
                        Resource nextResource2 = listSubjectsWithProperty2.nextResource();
                        OccurrenceInfo occurrenceInfo2 = getOccurrenceInfo(model, nextResource2);
                        if (occurrenceInfo2 != null) {
                            occurrenceGroup.occurrences.add(occurrenceInfo2);
                        } else if (this.prefetchSuggestion && (entitySuggestion = getEntitySuggestion(coreSession, model, nextResource2, str)) != null) {
                            occurrenceGroup.entitySuggestions.add(entitySuggestion.withAutomaticallyCreated(true));
                        }
                    }
                    Collections.sort(occurrenceGroup.entitySuggestions);
                    Collections.sort(occurrenceGroup.occurrences);
                    arrayList.add(occurrenceGroup);
                }
            }
        }
        Collections.sort(arrayList);
        return arrayList;
    }

    protected EntitySuggestion getEntitySuggestion(CoreSession coreSession, Model model, Resource resource, String str) throws DereferencingException, ClientException {
        Property property = model.getProperty("http://fise.iks-project.eu/ontology/entity-reference");
        Property property2 = model.getProperty("http://fise.iks-project.eu/ontology/confidence");
        Property property3 = model.getProperty("http://fise.iks-project.eu/ontology/entity-label");
        Resource propertyResourceValue = resource.getPropertyResourceValue(property);
        if (propertyResourceValue == null) {
            return null;
        }
        Statement property4 = resource.getProperty(property2);
        double d = property4 != null ? property4.getObject().asLiteral().getDouble() : 0.0d;
        StmtIterator listProperties = propertyResourceValue.listProperties(model.getProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"));
        String uri = propertyResourceValue.getURI();
        if (listProperties.hasNext()) {
            DocumentModel createDocumentModel = coreSession.createDocumentModel(str);
            getRemoteEntityService().dereferenceIntoFromModel(createDocumentModel, URI.create(uri), model, true, true);
            return new EntitySuggestion(createDocumentModel).withScore(d);
        }
        Statement property5 = resource.getProperty(property3);
        if (property5 != null) {
            return new EntitySuggestion(property5.getObject().asLiteral().getString(), uri, str).withScore(d);
        }
        return null;
    }

    protected OccurrenceInfo getOccurrenceInfo(Model model, Resource resource) {
        Property property = model.getProperty("http://fise.iks-project.eu/ontology/selected-text");
        Property property2 = model.getProperty("http://fise.iks-project.eu/ontology/start");
        Statement property3 = resource.getProperty(property);
        if (property3 == null || !property3.getObject().isLiteral()) {
            return null;
        }
        String trim = property3.getObject().as(Literal.class).getString().trim();
        double d = 0.0d;
        Statement property4 = resource.getProperty(property2);
        if (property4 != null && property4.getObject().isLiteral()) {
            d = Double.parseDouble(property4.getObject().as(Literal.class).getString());
        }
        Statement property5 = resource.getProperty(model.getProperty("http://fise.iks-project.eu/ontology/selection-context"));
        if (property5 == null || !property5.getObject().isLiteral()) {
            return new OccurrenceInfo(trim, trim).withOrder(d);
        }
        String trim2 = property5.getObject().as(Literal.class).getString().trim();
        if (!trim2.contains(trim) || trim2.length() > 10000) {
            trim2 = trim;
        }
        return new OccurrenceInfo(trim, trim2).withOrder(d);
    }

    public List<OccurrenceGroup> analyze(CoreSession coreSession, String str) throws IOException, ClientException {
        return findStanbolEntityOccurrences(coreSession, ModelFactory.createDefaultModel().read(new StringReader(callSemanticEngine(str, this.outputFormat, 2)), (String) null));
    }

    public List<OccurrenceGroup> analyze(CoreSession coreSession, DocumentModel documentModel) throws IOException, ClientException {
        this.states.put(new DocumentLocationImpl(documentModel.getRepositoryName(), documentModel.getRef()), "status.semantic.analysisPending");
        return shouldSkip(documentModel) ? Collections.emptyList() : analyze(coreSession, extractText(documentModel));
    }

    public String callSemanticEngine(String str, String str2, int i) throws IOException {
        String str3 = this.engineURL;
        if (str3 == null) {
            String property = Framework.getProperty(STANBOL_URL_PROPERTY, DEFAULT_STANBOL_URL);
            if (property.trim().isEmpty()) {
                property = DEFAULT_STANBOL_URL;
            }
            if (!property.endsWith("/")) {
                property = property + "/";
            }
            str3 = property + "engines/";
        }
        HttpPost httpPost = new HttpPost(str3);
        try {
            httpPost.setHeader("Accept", str2);
            httpPost.setHeader("Content-Type", "text/plain");
            httpPost.setEntity(new ByteArrayEntity(str.getBytes("utf-8")));
            HttpResponse execute = this.httpClient.execute(httpPost);
            InputStream content = execute.getEntity().getContent();
            String iOUtils = IOUtils.toString(content);
            content.close();
            if (execute.getStatusLine().getStatusCode() == 200) {
                return iOUtils;
            }
            if (i <= 0) {
                throw new IOException(String.format("Unexpected response from '%s': %s\n %s", str3, execute.getStatusLine().toString(), iOUtils));
            }
            try {
                Thread.sleep(1000L);
            } catch (InterruptedException e) {
            }
            return callSemanticEngine(str, str2, i - 1);
        } catch (ClientProtocolException e2) {
            httpPost.abort();
            throw new ClientProtocolException(String.format("Error connecting to '%s': %s", str3, e2.getMessage(), e2));
        } catch (IOException e3) {
            httpPost.abort();
            throw e3;
        }
    }

    protected String extractText(DocumentModel documentModel) throws ClientException {
        StringBuilder sb = new StringBuilder();
        sb.append(documentModel.getTitle());
        sb.append("\n\n");
        Serializable propertyValue = documentModel.getPropertyValue("dc:description");
        if (propertyValue != null) {
            sb.append(propertyValue);
            sb.append("\n\n");
        }
        try {
            StreamingBlob createFromString = StreamingBlob.createFromString(this.INVALID_XML_CHARS.matcher((String) documentModel.getPropertyValue("note:note")).replaceAll(""));
            createFromString.setMimeType("text/html");
            sb.append(this.conversionService.convert(ANY2TEXT, new SimpleBlobHolder(createFromString), (Map) null).getBlob().getString());
            sb.append("\n\n");
        } catch (IOException e) {
            throw new ClientException(e);
        } catch (PropertyException e2) {
        }
        if (documentModel.hasFacet("HasRelatedText")) {
            Iterator it = ((List) documentModel.getProperty("relatedtext:relatedtextresources").getValue(List.class)).iterator();
            while (it.hasNext()) {
                String str = (String) ((Map) it.next()).get("relatedtext");
                if (str != null && !str.trim().isEmpty()) {
                    sb.append(str);
                    sb.append("\n\n");
                }
            }
        }
        sb.append(blobsToText(new BlobsExtractor().getBlobs(documentModel)));
        return this.INVALID_XML_CHARS.matcher(sb.toString()).replaceAll("");
    }

    protected String blobsToText(List<Blob> list) {
        Blob blob;
        LinkedList linkedList = new LinkedList();
        Iterator<Blob> it = list.iterator();
        while (it.hasNext()) {
            try {
                BlobHolder convert = this.conversionService.convert(ANY2TEXT, new SimpleBlobHolder(it.next()), (Map) null);
                if (convert != null && (blob = convert.getBlob()) != null) {
                    String str = new String(blob.getByteArray(), "UTF-8");
                    if (str.indexOf(0) >= 0) {
                        str = str.replace("��", " ");
                    }
                    linkedList.add(str);
                }
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
        }
        return StringUtils.join(linkedList, "\n\n");
    }

    public ProgressStatus getProgressStatus(String str, DocumentRef documentRef) {
        DocumentLocationImpl documentLocationImpl = new DocumentLocationImpl(str, documentRef);
        String str2 = this.states.get(documentLocationImpl);
        if (str2 == null) {
            return null;
        }
        BlockingQueue<Runnable> blockingQueue = null;
        if ("status.semantic.analysisQueued".equals(str2)) {
            blockingQueue = this.analysisTaskQueue;
        } else if ("status.semantic.linkingQueued".equals(str2)) {
            blockingQueue = this.serializationTaskQueue;
        }
        int i = 0;
        int i2 = 0;
        if (blockingQueue != null) {
            Object[] array = blockingQueue.toArray();
            i2 = array.length;
            int i3 = 0;
            while (true) {
                if (i3 >= i2) {
                    break;
                }
                if (array[i3].equals(documentLocationImpl)) {
                    i = i3 + 1;
                    break;
                }
                i3++;
            }
        }
        return new ProgressStatus(str2, i, i2);
    }

    public void clearProgressStatus(String str, DocumentRef documentRef) {
        this.states.remove(new DocumentLocationImpl(str, documentRef));
    }

    public boolean isActive() {
        return this.active;
    }

    protected RemoteEntitySource getRemoteEntityService() {
        if (this.reService == null) {
            try {
                this.reService = (RemoteEntityService) Framework.getService(RemoteEntityService.class);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
        return this.reService;
    }

    static {
        localTypes.put("http://dbpedia.org/ontology/Place", "Place");
        localTypes.put("http://dbpedia.org/ontology/Person", "Person");
        localTypes.put("http://dbpedia.org/ontology/Organisation", "Organization");
    }
}
