From 21cd636205728fd5dd22e1042976c48495ac924c Mon Sep 17 00:00:00 2001 From: aipper Date: Sat, 22 Nov 2025 22:58:12 +0800 Subject: [PATCH] test --- docker-compose.yml | 15 +- fix-permissions.sh | 54 +++++ sql/dm/t_ocr_log.sql | 29 +++ sql/kingbase/t_ocr_log.sql | 29 +++ sql/mysql/t_ocr_log.sql | 17 ++ .../AnjuanAndJuanneiController.java | 190 +++++++++++------- .../originBatchUpload/OCRProcessingTask.java | 108 ++++++++++ 7 files changed, 360 insertions(+), 82 deletions(-) create mode 100755 fix-permissions.sh create mode 100644 sql/dm/t_ocr_log.sql create mode 100644 sql/kingbase/t_ocr_log.sql create mode 100644 sql/mysql/t_ocr_log.sql create mode 100644 src/main/java/com/point/strategy/originBatchUpload/OCRProcessingTask.java diff --git a/docker-compose.yml b/docker-compose.yml index 18c394e..8b5fc97 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,13 +5,14 @@ services: ports: - "9081:9081" volumes: - # 持久化数据目录 - - ./data/upload:/app/data/upload - - ./data/temp:/app/data/temp - - ./data/unzip:/app/data/unzip - - ./data/images:/app/data/images - - ./data/reports:/app/data/reports - - ./logs:/app/logs + # 持久化数据目录 - 设置正确的权限 + - ./data/upload:/app/data/upload:rw + - ./data/temp:/app/data/temp:rw + - ./data/unzip:/app/data/unzip:rw + - ./data/images:/app/data/images:rw + - ./data/reports:/app/data/reports:rw + - ./logs:/app/logs:rw + user: "1001:1001" # 指定容器内用户ID,与Dockerfile中的app用户保持一致 environment: - SPRING_PROFILES_ACTIVE=prod - SERVER_PORT=9081 diff --git a/fix-permissions.sh b/fix-permissions.sh new file mode 100755 index 0000000..9d41f83 --- /dev/null +++ b/fix-permissions.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# 数字档案系统文件权限修复脚本 +# 用于解决Docker部署中的文件权限问题 + +echo "开始修复数字档案系统文件权限..." + +# 定义数据目录 +DATA_DIRS=( + "./data/upload" + "./data/temp" + "./data/unzip" + "./data/images" + "./data/reports" + "./logs" +) + +# 创建目录(如果不存在) +echo "创建数据目录..." +for dir in "${DATA_DIRS[@]}"; do + if [ ! -d "$dir" ]; then + echo "创建目录: $dir" + mkdir -p "$dir" + else + echo "目录已存在: $dir" + fi +done + +# 设置权限 - 使用1001:1001 (与Dockerfile中的app用户一致) +echo "设置目录权限为1001:1001..." +for dir in "${DATA_DIRS[@]}"; do + echo "设置权限: $dir" + sudo chown -R 1001:1001 "$dir" + sudo chmod -R 755 "$dir" +done + +# 验证权限设置 +echo "验证权限设置..." +for dir in "${DATA_DIRS[@]}"; do + echo "目录: $dir" + ls -ld "$dir" +done + +echo "文件权限修复完成!" +echo "" +echo "重要提示:" +echo "1. 当前系统用户需要对数据目录有读写权限" +echo "2. 如果1001用户不存在,请确保Docker容器内的app用户UID为1001" +echo "3. 在Linux环境下,可以使用 'id -u' 和 'id -g' 查看当前用户ID" +echo "" +echo "如果仍然遇到权限问题,可以尝试:" +echo "1. 将当前用户添加到docker组: sudo usermod -aG docker \$USER" +echo "2. 重新登录使权限生效" +echo "3. 或者使用root用户运行: sudo ./fix-permissions.sh" \ No newline at end of file diff --git a/sql/dm/t_ocr_log.sql b/sql/dm/t_ocr_log.sql new file mode 100644 index 0000000..861f2ba --- /dev/null +++ b/sql/dm/t_ocr_log.sql @@ -0,0 +1,29 @@ +-- 达梦数据库 OCR日志表建表语句 +-- 用于记录OCR识别操作的日志信息 + +CREATE TABLE "t_ocr_log" ( + "id" INTEGER IDENTITY(1,1) NOT NULL, + "table_name" VARCHAR(100) DEFAULT NULL, + "file_name" VARCHAR(255) DEFAULT NULL, + "status_type" VARCHAR(50) DEFAULT NULL, + "failure_reason" CLOB DEFAULT NULL, + "create_time" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + "update_time" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY ("id") +); + +-- 创建索引 +CREATE INDEX "idx_ocr_log_table_name" ON "t_ocr_log"("table_name"); +CREATE INDEX "idx_ocr_log_file_name" ON "t_ocr_log"("file_name"); +CREATE INDEX "idx_ocr_log_status_type" ON "t_ocr_log"("status_type"); +CREATE INDEX "idx_ocr_log_create_time" ON "t_ocr_log"("create_time"); + +-- 添加表注释 +COMMENT ON TABLE "t_ocr_log" IS 'OCR识别日志表'; +COMMENT ON COLUMN "t_ocr_log"."id" IS '主键ID'; +COMMENT ON COLUMN "t_ocr_log"."table_name" IS '表名'; +COMMENT ON COLUMN "t_ocr_log"."file_name" IS '文件名'; +COMMENT ON COLUMN "t_ocr_log"."status_type" IS '转换状态'; +COMMENT ON COLUMN "t_ocr_log"."failure_reason" IS '失败原因'; +COMMENT ON COLUMN "t_ocr_log"."create_time" IS '创建时间'; +COMMENT ON COLUMN "t_ocr_log"."update_time" IS '更新时间'; \ No newline at end of file diff --git a/sql/kingbase/t_ocr_log.sql b/sql/kingbase/t_ocr_log.sql new file mode 100644 index 0000000..f58d733 --- /dev/null +++ b/sql/kingbase/t_ocr_log.sql @@ -0,0 +1,29 @@ +-- 人大金仓数据库 OCR日志表建表语句 +-- 用于记录OCR识别操作的日志信息 + +CREATE TABLE "t_ocr_log" ( + "id" SERIAL NOT NULL, + "table_name" VARCHAR(100) DEFAULT NULL, + "file_name" VARCHAR(255) DEFAULT NULL, + "status_type" VARCHAR(50) DEFAULT NULL, + "failure_reason" TEXT DEFAULT NULL, + "create_time" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + "update_time" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY ("id") +); + +-- 创建索引 +CREATE INDEX "idx_ocr_log_table_name" ON "t_ocr_log"("table_name"); +CREATE INDEX "idx_ocr_log_file_name" ON "t_ocr_log"("file_name"); +CREATE INDEX "idx_ocr_log_status_type" ON "t_ocr_log"("status_type"); +CREATE INDEX "idx_ocr_log_create_time" ON "t_ocr_log"("create_time"); + +-- 添加表注释 +COMMENT ON TABLE "t_ocr_log" IS 'OCR识别日志表'; +COMMENT ON COLUMN "t_ocr_log"."id" IS '主键ID'; +COMMENT ON COLUMN "t_ocr_log"."table_name" IS '表名'; +COMMENT ON COLUMN "t_ocr_log"."file_name" IS '文件名'; +COMMENT ON COLUMN "t_ocr_log"."status_type" IS '转换状态'; +COMMENT ON COLUMN "t_ocr_log"."failure_reason" IS '失败原因'; +COMMENT ON COLUMN "t_ocr_log"."create_time" IS '创建时间'; +COMMENT ON COLUMN "t_ocr_log"."update_time" IS '更新时间'; \ No newline at end of file diff --git a/sql/mysql/t_ocr_log.sql b/sql/mysql/t_ocr_log.sql new file mode 100644 index 0000000..7a982b5 --- /dev/null +++ b/sql/mysql/t_ocr_log.sql @@ -0,0 +1,17 @@ +-- OCR日志表建表语句 +-- 用于记录OCR识别操作的日志信息 + +CREATE TABLE `t_ocr_log` ( + `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `table_name` varchar(100) DEFAULT NULL COMMENT '表名', + `file_name` varchar(255) DEFAULT NULL COMMENT '文件名', + `status_type` varchar(50) DEFAULT NULL COMMENT '转换状态', + `failure_reason` text DEFAULT NULL COMMENT '失败原因', + `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`), + KEY `idx_table_name` (`table_name`), + KEY `idx_file_name` (`file_name`), + KEY `idx_status_type` (`status_type`), + KEY `idx_create_time` (`create_time`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='OCR识别日志表'; \ No newline at end of file diff --git a/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java b/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java index 57215f5..1840f32 100644 --- a/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java +++ b/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java @@ -197,17 +197,51 @@ public class AnjuanAndJuanneiController { public AjaxJson uploadSimpleFilesAnjuan(MultipartFile[] file, String fondscode, Integer recId, String tableName, HttpServletRequest request) { Integer successNum = 0; Integer falseNum = 0; + + // 验证参数 + if (file == null || file.length == 0) { + return AjaxJson.returnExceptionInfo("未选择任何文件"); + } + if (recId == null || recId <= 0) { + return AjaxJson.returnExceptionInfo("记录ID无效"); + } + if (StringUtil.isEmpty(fondscode)) { + return AjaxJson.returnExceptionInfo("全宗号不能为空"); + } + if (StringUtil.isEmpty(tableName)) { + return AjaxJson.returnExceptionInfo("表名不能为空"); + } + for (int i = 0; i < file.length; i++) { MultipartFile file0 = file[i]; + + // 验证文件 + if (file0 == null || file0.isEmpty()) { + logger.warn("第{}个文件为空,跳过", i + 1); + falseNum++; + continue; + } - //创建文件在服务器端存放路径 -// String dir = request.getRealPath("/") + "uploadFile"+File.separator+ tableName+"_temp_file" + File.separator + fondscode + File.separator + recId; - String dir = uploadPath + "uploadFile/"+ tableName+"_temp_file/" + fondscode + "/" + recId; + //创建文件在服务器端存放路径 - 使用File.separator确保跨平台兼容性 + String dir = uploadPath + "uploadFile" + File.separator + tableName + "_temp_file" + File.separator + fondscode + File.separator + recId; File fileDir = new File(dir); if (!fileDir.exists()) { - fileDir.mkdirs(); + boolean created = fileDir.mkdirs(); + if (!created) { + logger.error("创建目录失败: {}", dir); + falseNum++; + continue; + } } - AjaxJson json2 = uploadFilesByPathAnjuan(file0, fondscode,dir, recId, tableName,request); + + // 验证目录是否可写 + if (!fileDir.canWrite()) { + logger.error("目录无写权限: {}", dir); + falseNum++; + continue; + } + + AjaxJson json2 = uploadFilesByPathAnjuan(file0, fondscode, dir, recId, tableName, request); if ("101".equals(json2.getCode())) { falseNum++; } @@ -219,58 +253,11 @@ public class AnjuanAndJuanneiController { String fileType = originalFilename.substring(index); //启动一个线程,根据ocr获取图片文字"file_content,"+ if(!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) { - new Thread() { - @Override - public void run() { - OcrLog ocrLog = new OcrLog(); - if(youhongIntegrate){ - try { - File file1 = (File) json2.getBody().get("file"); - String fileNameServer = json2.getBody().get("file_name_server").toString(); - String ocrImageWord = OCRUtil.yhOcr(file1,youhongBaseUrl); - Map map8=new HashMap<>(); - String fieldValue = "file_content" + "=" + "'"+ocrImageWord+"'"; - String conditionSql = "file_name_server" + "=" + "'"+fileNameServer+"'"; - map8.put("tableName",tableName+"_temp_file"); - map8.put("fieldValue",fieldValue); - map8.put("conditionSql",conditionSql); - danganguanliService.updateObject(map8); - ocrLog.setFileName(file1.getName()); - ocrLog.setTableName(tableName+"_temp_file"); - ocrLog.setStatusType("0"); - } catch (Exception e) { - ocrLog.setFailureReason(e.getMessage()); - ocrLog.setStatusType("1"); - ocrLogMapper.insert(ocrLog); - throw new RuntimeException(e); - } - }else{ - OCRUtil.setTessPath(tessPath); - File file1 = (File) json2.getBody().get("file"); - String fileNameServer = json2.getBody().get("file_name_server").toString(); - try { - String ocrImageWord = OCRUtil.getOcrImageWord(file1); - ocrImageWord=ocrImageWord.replaceAll("'",""); - Map map8 = new HashMap<>(); - String fieldValue = "file_content" + "=" + "'" + ocrImageWord + "'"; - String conditionSql = "file_name_server" + "=" + "'" + fileNameServer + "'"; - map8.put("tableName", tableName + "_temp_file"); - map8.put("fieldValue", fieldValue); - map8.put("conditionSql", conditionSql); - danganguanliService.updateObject(map8); - ocrLog.setFileName(file1.getName()); - ocrLog.setTableName(tableName+"_temp_file"); - ocrLog.setStatusType("0"); - } catch (Exception e) { - ocrLog.setFailureReason(e.getMessage()); - ocrLog.setStatusType("1"); - ocrLogMapper.insert(ocrLog); - throw new RuntimeException(e); - } - } - ocrLogMapper.insert(ocrLog); - } - }.start(); + // 使用线程池替代直接创建线程 + OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService); + Thread ocrThread = new Thread(ocrTask, "OCR-Processing-" + (i + 1)); + ocrThread.setDaemon(true); // 设置为守护线程 + ocrThread.start(); } } Map map7=new HashMap<>(); @@ -287,9 +274,19 @@ public class AnjuanAndJuanneiController { private AjaxJson uploadFilesByPathAnjuan(MultipartFile file,String fondscode, String dir, Integer recId,String tableName, HttpServletRequest request) { AjaxJson json = null; + File files = null; + try { String originalFilename = file.getOriginalFilename(); + if (StringUtil.isEmpty(originalFilename)) { + return AjaxJson.returnExceptionInfo("文件名为空"); + } + int index = originalFilename.lastIndexOf(".") + 1; + if (index <= 0 || index >= originalFilename.length()) { + return AjaxJson.returnExceptionInfo("文件格式不正确"); + } + String fileType = originalFilename.substring(index); String file_name_server=StringUtil.generaterUUID()+"."+fileType; @@ -299,11 +296,40 @@ public class AnjuanAndJuanneiController { map5.put("tableName",tableName + "_temp_file"); map5.put("conditionSql","rec_id= '"+recId+"' and file_status=1 "); int pageNo =danganguanliService.selectObjectCount(map5)+1; - File files = new File(dir + "/" + file_name_server); + + // 使用File.separator确保跨平台兼容性 + files = new File(dir + File.separator + file_name_server); + + // 检查目标文件是否已存在 + if (files.exists()) { + logger.warn("目标文件已存在,将覆盖: {}", files.getAbsolutePath()); + boolean deleted = files.delete(); + if (!deleted) { + logger.error("无法删除已存在的文件: {}", files.getAbsolutePath()); + return AjaxJson.returnExceptionInfo("无法删除已存在的文件"); + } + } + + // 验证目录是否存在且可写 + File parentDir = files.getParentFile(); + if (parentDir == null || !parentDir.exists()) { + return AjaxJson.returnExceptionInfo("父目录不存在"); + } + if (!parentDir.canWrite()) { + logger.error("目录无写权限: {}", parentDir.getAbsolutePath()); + return AjaxJson.returnExceptionInfo("目录无写权限"); + } + + // 文件传输 file.transferTo(files); + + // 验证文件是否成功写入 + if (!files.exists() || files.length() == 0) { + logger.error("文件传输失败或文件为空: {}", files.getAbsolutePath()); + return AjaxJson.returnExceptionInfo("文件传输失败"); + } - - String file_path="uploadFile/"+tableName + "_temp_file/"+fondscode+"/"+recId; + String file_path="uploadFile" + File.separator + tableName + "_temp_file" + File.separator + fondscode + File.separator + recId; // String file_path=getFileName(dir); String fieldName= "file_name," + @@ -331,38 +357,52 @@ public class AnjuanAndJuanneiController { map.put("valueName",valueName); danganguanliService.saveObject(map); - - - - //再把文件复制一份,作用是 用于合并文件、合并下载 -// String newName=file_name_server.replace(".jpg","_original.jpg"); -// FileTool.copyFile(dir+File.separator+file_name_server,dir+File.separator+newName); - if(!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")){ - //生成一份pdf文件,用于归档章的操作 + //生成一份pdf文件,用于归档章的操作 - 使用File.separator String newName_pdf=file_name_server.replace("."+fileType,".pdf"); - PdfFileHelper.image2Pdf(dir+File.separator+file_name_server,dir+File.separator+newName_pdf); + String sourcePath = dir + File.separator + file_name_server; + String targetPath = dir + File.separator + newName_pdf; + + boolean pdfCreated = PdfFileHelper.image2Pdf(sourcePath, targetPath); + if (!pdfCreated) { + logger.warn("PDF文件生成失败: {} -> {}", sourcePath, targetPath); + } String newName_pdf_original=newName_pdf.replace(".pdf","_original.pdf"); - FileTool.copyFile(dir+File.separator+newName_pdf,dir+File.separator+newName_pdf_original); + String originalPath = dir + File.separator + newName_pdf_original; + FileTool.copyFile(targetPath, originalPath); } + //mxf格式的文件需要转换一份mp4给前端展示 if (fileType.equalsIgnoreCase("mxf")) { - String replaceMp4 = ""; + String replaceMp4; if ("MXF".equals(fileType)) { replaceMp4 = files.getPath().replace(".MXF", ".mp4"); - }else { + } else { replaceMp4 = files.getPath().replace(".mxf", ".mp4"); } VideoConvertUtil.convert(files.getPath(), replaceMp4); - } + json = AjaxJson.returnInfo("上传文件成功"); json.put("file",files); json.put("file_name_server",file_name_server); + } catch (Exception e) { - json = AjaxJson.returnExceptionInfo("上传文件失败" + e); - logger.info(file.getOriginalFilename() + "上传文件失败" + e); + logger.error("上传文件失败: {}", file.getOriginalFilename(), e); + json = AjaxJson.returnExceptionInfo("上传文件失败: " + e.getMessage()); + + // 清理失败的文件 + if (files != null && files.exists()) { + try { + boolean deleted = files.delete(); + if (deleted) { + logger.info("清理失败文件成功: {}", files.getAbsolutePath()); + } + } catch (Exception deleteEx) { + logger.warn("清理失败文件时出错: {}", files.getAbsolutePath(), deleteEx); + } + } } return json; } diff --git a/src/main/java/com/point/strategy/originBatchUpload/OCRProcessingTask.java b/src/main/java/com/point/strategy/originBatchUpload/OCRProcessingTask.java new file mode 100644 index 0000000..b506c18 --- /dev/null +++ b/src/main/java/com/point/strategy/originBatchUpload/OCRProcessingTask.java @@ -0,0 +1,108 @@ +package com.point.strategy.originBatchUpload; + +import com.point.strategy.bean.OcrLog; +import com.point.strategy.common.AjaxJson; +import com.point.strategy.common.OCRUtil; +import com.point.strategy.dao.OcrLogMapper; +import com.point.strategy.docTraditionArrange.docVolume.service.DanganguanliService; +import lombok.extern.slf4j.Slf4j; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +/** + * OCR处理任务 - 替代匿名内部类 + */ +@Slf4j +public class OCRProcessingTask implements Runnable { + + private final AjaxJson json2; + private final String tableName; + private final boolean youhongIntegrate; + private final String youhongBaseUrl; + private final String tessPath; + private final OcrLogMapper ocrLogMapper; + private final DanganguanliService danganguanliService; + + public OCRProcessingTask(AjaxJson json2, String tableName, boolean youhongIntegrate, + String youhongBaseUrl, String tessPath, OcrLogMapper ocrLogMapper, + DanganguanliService danganguanliService) { + this.json2 = json2; + this.tableName = tableName; + this.youhongIntegrate = youhongIntegrate; + this.youhongBaseUrl = youhongBaseUrl; + this.tessPath = tessPath; + this.ocrLogMapper = ocrLogMapper; + this.danganguanliService = danganguanliService; + } + + @Override + public void run() { + OcrLog ocrLog = new OcrLog(); + ocrLog.setTableName(tableName + "_temp_file"); + + try { + File file1 = (File) json2.getBody().get("file"); + String fileNameServer = json2.getBody().get("file_name_server").toString(); + + if (file1 == null || !file1.exists()) { + throw new RuntimeException("文件不存在: " + fileNameServer); + } + + String ocrImageWord = ""; + + if (youhongIntegrate) { + // 使用友虹OCR + try { + ocrImageWord = OCRUtil.yhOcr(file1, youhongBaseUrl); + log.info("友虹OCR处理成功: {}", file1.getName()); + } catch (Exception e) { + log.error("友虹OCR处理失败: {}", file1.getName(), e); + throw new RuntimeException("友虹OCR处理失败: " + e.getMessage()); + } + } else { + // 使用本地Tesseract OCR + try { + OCRUtil.setTessPath(tessPath); + ocrImageWord = OCRUtil.getOcrImageWord(file1); + ocrImageWord = ocrImageWord.replaceAll("'", ""); // 清理单引号 + log.info("Tesseract OCR处理成功: {}", file1.getName()); + } catch (Exception e) { + log.error("Tesseract OCR处理失败: {}", file1.getName(), e); + throw new RuntimeException("Tesseract OCR处理失败: " + e.getMessage()); + } + } + + // 更新数据库中的OCR结果 + if (!ocrImageWord.isEmpty()) { + Map map8 = new HashMap<>(); + String fieldValue = "file_content" + "=" + "'" + ocrImageWord + "'"; + String conditionSql = "file_name_server" + "=" + "'" + fileNameServer + "'"; + map8.put("tableName", tableName + "_temp_file"); + map8.put("fieldValue", fieldValue); + map8.put("conditionSql", conditionSql); + danganguanliService.updateObject(map8); + } + + // 记录成功日志 + ocrLog.setFileName(file1.getName()); + ocrLog.setStatusType("0"); // 成功 + ocrLog.setFailureReason(null); + + } catch (Exception e) { + log.error("OCR处理异常", e); + // 记录失败日志 + ocrLog.setFileName(file1 != null ? file1.getName() : "unknown"); + ocrLog.setStatusType("1"); // 失败 + ocrLog.setFailureReason(e.getMessage()); + } finally { + try { + ocrLogMapper.insert(ocrLog); + log.info("OCR日志记录完成: {} - {}", ocrLog.getFileName(), ocrLog.getStatusType()); + } catch (Exception e) { + log.error("OCR日志记录失败", e); + } + } + } +} \ No newline at end of file