package com.geoway.es.service.impl;

import cn.hutool.core.img.ImgUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.util.ZipUtil;
import cn.hutool.json.JSONUtil;
import cn.hutool.log.StaticLog;
import cn.hutool.system.SystemUtil;
import com.geoway.base.config.BaseConfig;
import com.geoway.base.config.Constant;
import com.geoway.base.database.DbConfigDTO;
import com.geoway.base.database.DbType;
import com.geoway.base.database.postgres.PgUtil;
import com.geoway.base.helper.FileHelper;
import com.geoway.base.util.BaseUtil;
import com.geoway.es.config.FieldDefaultConfig;
import com.geoway.es.constant.IndexConstant;
import com.geoway.es.constant.ObjectType;
import com.geoway.es.dao.DocumentDao;
import com.geoway.es.dto.FieldsMapping;
import com.geoway.es.dto.PdfPage;
import com.geoway.es.dto.PdfParseResult;
import com.geoway.es.entity.DocumentBean;
import com.geoway.es.service.DocumentService;
import com.geoway.es.service.JobService;
import com.geoway.es.util.JdbcUtil;
import com.geoway.es.util.PdfUtil;
import com.geoway.es.util.SmbUtil;
import com.geoway.es.util.TikaUtil;
import com.geoway.ocr.helper.OcrHelper;
import java.awt.image.BufferedImage;
import java.io.File;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import javax.annotation.Resource;
import jcifs.smb.SmbException;
import jcifs.smb.SmbFile;
import org.jetbrains.annotations.Nullable;
import org.springframework.beans.BeanUtils;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
import org.springframework.data.elasticsearch.core.query.IndexQuery;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;
import org.springframework.web.multipart.MultipartFile;

@Service
/* loaded from: input_file:BOOT-INF/lib/atlas-es-0.0.1-SNAPSHOT.jar:com/geoway/es/service/impl/DocumentServiceImpl.class */
public class DocumentServiceImpl implements DocumentService {
    private static final String ID = "id";
    private static final String TYPE = "type";
    private static final String PATH = "path";
    private static final String NAME = "name";
    private static final String CONTENT = "content";

    @Resource
    private DocumentDao documentDao;

    @Resource
    private ElasticsearchRestTemplate restTemplate;

    @Resource
    ThreadPoolTaskExecutor executor;

    @Resource
    private JobService jobService;

    @Resource
    private FieldDefaultConfig fieldDefaultConfig;

    @Resource
    private OcrHelper ocrHelper;

    @Resource
    private BaseConfig baseConfig;

    @Resource
    private FileHelper fileHelper;

    @Override // com.geoway.es.service.DocumentService
    public List<DocumentBean> save(MultipartFile multipartFile) {
        File file = this.fileHelper.toFile(multipartFile);
        try {
            if ("zip".equalsIgnoreCase(FileUtil.getSuffix(file.getName()))) {
                return saveZip(file);
            }
            return Collections.singletonList(this.documentDao.save(file2Doc(file)));
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        }
    }

    private List<DocumentBean> saveZip(File file) {
        File file2 = new File(this.baseConfig.storePath());
        List<DocumentBean> synchronizedList = Collections.synchronizedList(new ArrayList());
        try {
            collectBeans(forFiles(ZipUtil.unzip(file, file2, CharsetUtil.CHARSET_GBK).getAbsolutePath(), null), synchronizedList);
            FileUtil.del(file);
            if (synchronizedList.size() > 0) {
                this.documentDao.saveAll(synchronizedList);
            }
            return synchronizedList;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override // com.geoway.es.service.DocumentService
    public DocumentBean file2Doc(File file) {
        DocumentBean documentBean = new DocumentBean();
        documentBean.setPath(file.getAbsolutePath());
        documentBean.setContent(String.join(SystemUtil.getOsInfo().getLineSeparator(), forContent(file)));
        documentBean.setName(file.getName());
        documentBean.setType(ObjectType.DOC.type);
        documentBean.setTime(new Date());
        return documentBean;
    }

    @Override // com.geoway.es.service.DocumentService
    public String saveDocs(String str, String str2, String str3, String str4) {
        String start = this.jobService.start();
        this.executor.execute(() -> {
            try {
                doSaveDocs(str, str2, str3, str4);
                this.jobService.finish(start);
            } catch (Exception e) {
                this.jobService.finish(start);
                throw new RuntimeException(e.getMessage());
            }
        });
        return start;
    }

    @Override // com.geoway.es.service.DocumentService
    public void update(String str, MultipartFile multipartFile) {
        Optional<DocumentBean> findById = this.documentDao.findById(str);
        Assert.state(findById.isPresent(), "未找到数据");
        DocumentBean documentBean = findById.get();
        DocumentBean file2Doc = file2Doc(this.fileHelper.toFile(multipartFile));
        Assert.state(StrUtil.isNotEmpty(file2Doc.getContent()), "未解析到文档内容");
        BeanUtils.copyProperties(file2Doc, documentBean, BaseUtil.getNullPropertyNames(file2Doc));
        this.documentDao.save(documentBean);
    }

    @Override // com.geoway.es.service.DocumentService
    public void delete(String str) {
        this.documentDao.deleteById(str);
    }

    @Override // com.geoway.es.service.DocumentService
    public String bulk(String str, String str2, String str3, String str4, String str5) {
        String start = this.jobService.start();
        this.executor.execute(() -> {
            Arrays.stream(str3.split(",")).forEach(str6 -> {
                doBulk(str, str2, str6, str4, str5);
            });
            this.jobService.finish(start);
        });
        return start;
    }

    @Override // com.geoway.es.service.DocumentService
    public void doBulk(String str, String str2, String str3, String str4, String str5) {
        FieldsMapping fieldsMapping;
        if (StrUtil.isNotEmpty(str4)) {
            fieldsMapping = FieldsMapping.fromJsonString(str4);
        } else {
            fieldsMapping = new FieldsMapping();
            BeanUtils.copyProperties(this.fieldDefaultConfig, fieldsMapping, BaseUtil.getNullPropertyNames(this.fieldDefaultConfig));
        }
        Connection connection = PgUtil.getConnection(DbConfigDTO.build(str, DbType.forName(str2)));
        List<Map<String, Object>> queryAndParse = JdbcUtil.queryAndParse(connection, fieldsMapping.sql(str3));
        BaseUtil.close(connection);
        ArrayList arrayList = new ArrayList(30);
        int i = 0;
        IndexCoordinates of = IndexCoordinates.of(IndexConstant.DOCUMENTS);
        Iterator<Map<String, Object>> it = queryAndParse.iterator();
        while (it.hasNext()) {
            i++;
            arrayList.add(buildIndexQuery(str5, it.next()));
            if (i % 30 == 0) {
                this.restTemplate.bulkIndex(arrayList, of);
                arrayList.clear();
            }
        }
        if (!arrayList.isEmpty()) {
            this.restTemplate.bulkIndex(arrayList, of);
        }
        StaticLog.info("成功索引 " + i + " 条数据", new Object[0]);
        this.restTemplate.indexOps(of).refresh();
    }

    @Override // com.geoway.es.service.DocumentService
    public void save(DocumentBean documentBean) {
        String path = documentBean.getPath();
        if (StrUtil.isNotEmpty(path) && StrUtil.isEmpty(documentBean.getContent())) {
            documentBean.setContent(String.join(SystemUtil.getOsInfo().getLineSeparator(), forContent(new File(path))));
        }
        this.documentDao.save(documentBean);
    }

    @Override // com.geoway.es.service.DocumentService
    public DocumentBean findOne(String str) {
        return this.documentDao.findById(str).orElse(null);
    }

    private IndexQuery buildIndexQuery(String str, Map<String, Object> map) {
        IndexQuery indexQuery = new IndexQuery();
        if (map.containsKey("id")) {
            indexQuery.setId(map.get("id").toString());
            map.remove("id");
        }
        if (StrUtil.isNotEmpty(str)) {
            map.put("type", str);
        }
        if (map.containsKey("path")) {
            File file = new File(map.get("path").toString());
            String name = file.getName();
            if (!map.containsKey("name")) {
                map.put("name", name);
            }
            if (!map.containsKey("type")) {
                map.put("type", ObjectType.DOC.type);
            }
            if (file.exists() && file.isFile()) {
                map.put(CONTENT, forContent(file));
            }
        }
        indexQuery.setSource(JSONUtil.toJsonStr(map));
        indexQuery.setOpType(IndexQuery.OpType.INDEX);
        return indexQuery;
    }

    @Override // com.geoway.es.service.DocumentService
    public int doSaveDocs(String str, String str2, String str3, String str4) {
        List<DocumentBean> synchronizedList = Collections.synchronizedList(new ArrayList());
        if (str.startsWith("\\\\")) {
            collectBeans((List) forSmbFiles(str, str2, str3, str4).stream().map(smbFile -> {
                return this.fileHelper.toFile(smbFile);
            }).collect(Collectors.toList()), synchronizedList);
        } else {
            collectBeans(forFiles(str, str4), synchronizedList);
        }
        Assert.state(!synchronizedList.isEmpty(), "路径 " + str + " 下未找到文档");
        this.documentDao.saveAll(synchronizedList);
        return synchronizedList.size();
    }

    private void collectBeans(List<File> list, List<DocumentBean> list2) {
        CompletableFuture.allOf((CompletableFuture[]) list.stream().map(file -> {
            return CompletableFuture.runAsync(() -> {
                list2.add(file2Doc(file));
            }, this.executor);
        }).toArray(i -> {
            return new CompletableFuture[i];
        })).join();
    }

    private List<File> forFiles(String str, String str2) {
        File file = new File(str);
        if (file.exists()) {
            return file.isFile() ? Collections.singletonList(file) : fromDir(file, str2);
        }
        StaticLog.error(str + "不存在 ! 跳过", new Object[0]);
        return Collections.emptyList();
    }

    private List<SmbFile> forSmbFiles(String str, String str2, String str3, String str4) {
        try {
            SmbFile smbFile = SmbUtil.getSmbFile(str, str2, str3);
            if (smbFile.exists()) {
                return smbFile.isFile() ? Collections.singletonList(smbFile) : fromDir(smbFile, str4);
            }
            StaticLog.error(str + "不存在 ! 跳过", new Object[0]);
            return Collections.emptyList();
        } catch (Exception e) {
            StaticLog.error("连接 {} 失败 : " + e.getMessage(), str);
            return Collections.emptyList();
        }
    }

    private List<File> fromDir(File file, String str) {
        ArrayList arrayList = new ArrayList();
        File[] listFiles = file.listFiles();
        if (listFiles != null) {
            for (File file2 : listFiles) {
                if (!file2.isFile()) {
                    arrayList.addAll(fromDir(file2, str));
                } else if (match(str, file2.getName())) {
                    arrayList.add(file2);
                }
            }
        }
        return arrayList;
    }

    private List<SmbFile> fromDir(SmbFile smbFile, String str) {
        ArrayList arrayList = new ArrayList();
        try {
            for (SmbFile smbFile2 : smbFile.listFiles()) {
                if (!smbFile2.isFile()) {
                    arrayList.addAll(fromDir(smbFile2, str));
                } else if (match(str, smbFile2.getName())) {
                    arrayList.add(smbFile2);
                }
            }
        } catch (SmbException e) {
        }
        return arrayList;
    }

    private boolean match(String str, String str2) {
        if (StrUtil.isEmpty(str)) {
            return true;
        }
        String subAfter = StrUtil.subAfter((CharSequence) str2, (CharSequence) ".", true);
        if (StrUtil.isEmpty(subAfter)) {
            return false;
        }
        return Arrays.asList(str.split(",")).contains(subAfter);
    }

    @Override // com.geoway.es.service.DocumentService
    public List<String> forContent(File file) {
        return forContent(file, this.ocrHelper.useBaidu);
    }

    @Override // com.geoway.es.service.DocumentService
    public PdfParseResult forPdf(File file, int i, int i2, String str, boolean z) {
        if (!z) {
            PdfParseResult parse = parse(file, i, i2, str);
            if (parse != null) {
                return parse;
            }
            StaticLog.warn(file.getName() + " 未解析到数据, 尝试ocr识别", new Object[0]);
        }
        ArrayList arrayList = new ArrayList();
        if (StrUtil.isNotEmpty(str)) {
            int[] splitToInt = StrUtil.splitToInt(str, ",");
            List<BufferedImage> pdf2Pic = PdfUtil.pdf2Pic(file.getAbsolutePath(), this.ocrHelper.getPdfZoom(), splitToInt);
            int i3 = 0;
            for (int i4 : splitToInt) {
                savePdfPage(arrayList, pdf2Pic.get(i3), i4);
                i3++;
            }
        } else {
            List<BufferedImage> pdf2Pic2 = PdfUtil.pdf2Pic(file.getAbsolutePath(), this.ocrHelper.getPdfZoom(), i, i2);
            int max = Math.max(i, 1);
            Iterator<BufferedImage> it = pdf2Pic2.iterator();
            while (it.hasNext()) {
                savePdfPage(arrayList, it.next(), max);
                max++;
            }
        }
        Collections.sort(arrayList);
        PdfParseResult pdfParseResult = new PdfParseResult();
        pdfParseResult.setPdfPages(arrayList);
        pdfParseResult.setFromOcr(true);
        return pdfParseResult;
    }

    @Nullable
    private PdfParseResult parse(File file, int i, int i2, String str) {
        List<PdfPage> parse = StrUtil.isNotEmpty(str) ? PdfUtil.parse(file, StrUtil.splitToInt(str, ",")) : PdfUtil.parse(file, i, i2);
        if (parse.size() <= 0) {
            return null;
        }
        PdfParseResult pdfParseResult = new PdfParseResult();
        pdfParseResult.setPdfPages(parse);
        pdfParseResult.setFromOcr(false);
        return pdfParseResult;
    }

    private void savePdfPage(List<PdfPage> list, BufferedImage bufferedImage, int i) {
        if (bufferedImage == null) {
            list.add(new PdfPage(Integer.valueOf(i), null));
            return;
        }
        File createTempFile = FileUtil.createTempFile(".png", true);
        ImgUtil.write(bufferedImage, createTempFile);
        List<String> forImgResult = this.ocrHelper.forImgResult(createTempFile, this.ocrHelper.useBaidu);
        PdfPage pdfPage = new PdfPage();
        pdfPage.setPage(Integer.valueOf(i));
        pdfPage.setContent(forImgResult);
        list.add(pdfPage);
        this.executor.execute(() -> {
            FileUtil.del(createTempFile);
        });
        StaticLog.info("删除临时文件 : " + createTempFile.getAbsolutePath(), new Object[0]);
    }

    @Override // com.geoway.es.service.DocumentService
    public List<String> forContent(File file, boolean z) {
        long currentTimeMillis = System.currentTimeMillis();
        String name = file.getName();
        List<String> forImgResult = BaseUtil.isImg(name) ? this.ocrHelper.forImgResult(file, z) : forDocContent(file);
        StaticLog.info("解析 {} 耗时 {} 毫秒", name, Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        return forImgResult;
    }

    private List<String> forDocContent(File file) {
        try {
            if (!Constant.PDF.equalsIgnoreCase(FileUtil.getSuffix(file.getName()))) {
                String parseToString = TikaUtil.get().parseToString(file);
                if (StrUtil.isNotBlank(parseToString)) {
                    return (List) Arrays.stream(parseToString.split("\r\n|\n")).filter((v0) -> {
                        return CharSequenceUtil.isNotBlank(v0);
                    }).collect(Collectors.toList());
                }
            }
        } catch (Exception e) {
            StaticLog.warn("tika解析文件异常 : " + e.getMessage(), new Object[0]);
        }
        return (List) forPdf(file, -1, 999999, null, false).getPdfPages().stream().map((v0) -> {
            return v0.getContent();
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList());
    }
}
