diff --git a/operations-log.md b/operations-log.md index 11174a4..adaf6fe 100644 --- a/operations-log.md +++ b/operations-log.md @@ -101,3 +101,44 @@ - 动作:查询当前系统时间 - 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z') - 输出:2025-11-20 09:44:48 CST + +- 时间:2025-11-24 14:47(UTC+8) +- 动作:确认 .claude 目录是否存在 +- 工具:shell(ls .claude) +- 输出:目录不存在,命令返回 “No such file or directory” + +- 时间:2025-11-24 14:48(UTC+8) +- 动作:阅读 AnjuanAndJuanneiController.java 源码 +- 工具:shell(sed -n '1,960p' src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java) +- 输出:获取 uploadSimpleFilesAnjuan 及相关上传实现用于 OOM 分析 + +- 时间:2025-11-24 14:49(UTC+8) +- 动作:查阅 PdfFileHelper.image2Pdf 实现 +- 工具:shell(sed -n '520,620p' src/main/java/com/point/strategy/common/PdfFileHelper.java) +- 输出:确认图片转 PDF 逻辑使用 ImageIO.read + iText Image 双重加载 + +- 时间:2025-11-24 14:50(UTC+8) +- 动作:查询当前系统时间 +- 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z') +- 输出:2025-11-24 14:50:02 CST + +- 时间:2025-11-24 14:55(UTC+8) +- 动作:为传统案卷上传OCR处理增加线程池限流 +- 位置:src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java +- 变更:新增受控线程池替代每文件新建线程,限制并发与队列,避免多文件同时触发大量OCR线程造成内存峰值。 +- 工具:apply_patch +- 结果:代码已更新 + +- 时间:2025-11-24 14:56(UTC+8) +- 动作:优化图片转PDF内存占用 +- 位置:src/main/java/com/point/strategy/common/PdfFileHelper.java +- 变更:移除 ImageIO 双重加载大图,直接用 iText Image 尺寸设置页面,减少一次内存占用。 +- 工具:apply_patch +- 结果:代码已更新 + +- 时间:2025-11-24 14:59(UTC+8) +- 动作:对上传接口增加并发限流(每次最多3个文件) +- 位置:src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java +- 变更:新增 Semaphore,保证全局并行处理文件数不超过3;同时将 OCR 线程池调为 1~3 线程并缩短队列,匹配限流策略。 +- 工具:apply_patch +- 结果:代码已更新 diff --git a/src/main/java/com/point/strategy/common/PdfFileHelper.java b/src/main/java/com/point/strategy/common/PdfFileHelper.java index 9dad7dc..0e8c0e2 100644 --- a/src/main/java/com/point/strategy/common/PdfFileHelper.java +++ b/src/main/java/com/point/strategy/common/PdfFileHelper.java @@ -13,12 +13,12 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.util.ResourceUtils; import com.itextpdf.text.pdf.PdfCopy; import com.itextpdf.text.pdf.PdfReader; + import javax.imageio.ImageIO; import javax.imageio.ImageReader; import javax.imageio.stream.FileImageInputStream; import javax.swing.*; import java.awt.*; -import java.awt.image.BufferedImage; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; @@ -41,13 +41,13 @@ public class PdfFileHelper { private static class FontFile { public File file; public boolean isTempFile = false; - + public FontFile(File file, boolean isTempFile) { this.file = file; this.isTempFile = isTempFile; } } - + /** * 获取SIMYOU字体文件,兼容JAR包和文件系统 * @return FontFile对象 @@ -61,7 +61,7 @@ public class PdfFileHelper { "/app/fonts/SIMYOU.TTF", "/app/data/fonts/SIMYOU.TTF" }; - + for (String fontPath : dockerFontPaths) { File fontFile = new File(fontPath); if (fontFile.exists() && fontFile.canRead()) { @@ -69,7 +69,7 @@ public class PdfFileHelper { return new FontFile(fontFile, false); } } - + // 2. 从classpath中读取字体文件(适用于JAR包环境) try { InputStream fontStream = PdfFileHelper.class.getClassLoader().getResourceAsStream("SIMYOU.TTF"); @@ -77,7 +77,7 @@ public class PdfFileHelper { // 创建临时文件 File tempFontFile = File.createTempFile("SIMYOU", ".TTF"); tempFontFile.deleteOnExit(); // JVM退出时删除临时文件 - + // 将资源流写入临时文件 try (InputStream input = fontStream; FileOutputStream output = new FileOutputStream(tempFontFile)) { @@ -87,14 +87,14 @@ public class PdfFileHelper { output.write(buffer, 0, bytesRead); } } - + log.info("从classpath创建临时字体文件: {}", tempFontFile.getAbsolutePath()); return new FontFile(tempFontFile, true); } } catch (Exception e) { log.warn("从classpath读取字体文件失败: {}", e.getMessage()); } - + // 3. 尝试使用ResourceUtils(开发环境) try { File fontFile = ResourceUtils.getFile("classpath:SIMYOU.TTF"); @@ -105,10 +105,10 @@ public class PdfFileHelper { } catch (Exception e) { log.warn("使用ResourceUtils读取字体文件失败: {}", e.getMessage()); } - + throw new IOException("无法找到SIMYOU.TTF字体文件,请确保字体文件存在于文件系统或classpath中"); } - + /** * 获取BaseFont对象,自动处理字体文件路径 * @return BaseFont对象 @@ -542,26 +542,15 @@ public class PdfFileHelper { **/ public static boolean image2Pdf(String source, String target) { try { - BufferedImage img = ImageIO.read(new File(source)); PdfReader.unethicalreading = true; Image png1 = Image.getInstance(source); //通过文件路径获取image -// float heigth = png1.getHeight(); -// float width = png1.getWidth(); // 新增:读取图片的EXIF方向信息 int orientation = getExifOrientation(source); - //new一个pdf文档 + // new一个pdf文档,直接使用图片宽高设置页面,避免重复加载 Document doc = new Document(null, 0, 0, 0, 0); - if (img == null) { - doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight())); - } else { - doc.setPageSize(new Rectangle(img.getWidth(), img.getHeight())); - } + doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight())); PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入 doc.open();//打开文档 -// doc.newPage(); //在pdf创建一页 -// int percent = getPercent2(heigth, width); -// png1.setAlignment(Image.MIDDLE); -// png1.scalePercent(percent+3);// 表示是原来图像的比例; // 新增:根据EXIF方向调整图片 adjustImageOrientation(png1, orientation); doc.add(png1); diff --git a/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java b/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java index 12c61ea..548eb30 100644 --- a/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java +++ b/src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java @@ -38,6 +38,12 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.nio.file.StandardOpenOption; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.Semaphore; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -76,6 +82,29 @@ public class AnjuanAndJuanneiController { @Autowired private OcrLogMapper ocrLogMapper; + /** + * OCR线程池,限制并发与队列长度,防止瞬时创建大量线程导致内存峰值 + */ + private final ExecutorService ocrExecutor = new ThreadPoolExecutor( + 1, + 3, + 60L, + TimeUnit.SECONDS, + new LinkedBlockingQueue<>(30), + runnable -> { + Thread t = new Thread(runnable); + t.setName("OCR-Processing"); + t.setDaemon(true); + return t; + }, + new ThreadPoolExecutor.AbortPolicy() + ); + + /** + * 上传串行限流:全局仅允许并行处理3个文件,超出排队等待 + */ + private static final Semaphore uploadSemaphore = new Semaphore(3); + @RequestMapping(value = "/getWord", method = RequestMethod.POST) @ApiOperation(value = "获取图片文字") public AjaxJson getWord(String tableName,Integer id){ @@ -246,48 +275,63 @@ public class AnjuanAndJuanneiController { int fileIndex = 0; while (iterator.hasNext()) { - fileIndex++; - String name = iterator.next(); - MultipartFile file0 = multipartRequest.getFile(name); - - if (file0 == null || file0.isEmpty()) { - logger.warn("第{}个文件为空,跳过", fileIndex); - falseNum++; - continue; - } - - // 使用零拷贝方式处理单个文件,最大程度优化内存使用 - AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request); - - if ("101".equals(json2.getCode())) { - falseNum++; - } - if ("100".equals(json2.getCode())) { - successNum++; - } - - // 异步处理OCR,避免阻塞 - String originalFilename = file0.getOriginalFilename(); - if (originalFilename != null) { - int index = originalFilename.lastIndexOf(".") + 1; - if (index > 0 && index < originalFilename.length()) { - String fileType = originalFilename.substring(index); - if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) { - OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService); - Thread ocrThread = new Thread(ocrTask, "OCR-Processing-" + fileIndex); - ocrThread.setDaemon(true); - ocrThread.start(); + boolean acquired = false; + try { + uploadSemaphore.acquire(); // 最多并行处理3个文件 + acquired = true; + fileIndex++; + String name = iterator.next(); + MultipartFile file0 = multipartRequest.getFile(name); + + if (file0 == null || file0.isEmpty()) { + logger.warn("第{}个文件为空,跳过", fileIndex); + falseNum++; + continue; + } + + // 使用零拷贝方式处理单个文件,最大程度优化内存使用 + AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request); + + if ("101".equals(json2.getCode())) { + falseNum++; + } + if ("100".equals(json2.getCode())) { + successNum++; + } + + // 异步处理OCR,避免阻塞 + String originalFilename = file0.getOriginalFilename(); + if (originalFilename != null) { + int index = originalFilename.lastIndexOf(".") + 1; + if (index > 0 && index < originalFilename.length()) { + String fileType = originalFilename.substring(index); + if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) { + OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService); + try { + ocrExecutor.execute(ocrTask); + } catch (RejectedExecutionException ex) { + logger.warn("OCR队列已满,跳过文件: {}", originalFilename); + } + } } } - } - - // 显式释放资源 - try { - if (file0.getInputStream() != null) { - file0.getInputStream().close(); + + // 显式释放资源 + try { + if (file0.getInputStream() != null) { + file0.getInputStream().close(); + } + } catch (IOException e) { + logger.warn("关闭文件流时出错: {}", e.getMessage()); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + logger.warn("获取上传许可被中断,终止处理"); + return AjaxJson.returnExceptionInfo("上传处理中断"); + } finally { + if (acquired) { + uploadSemaphore.release(); } - } catch (IOException e) { - logger.warn("关闭文件流时出错: {}", e.getMessage()); } } } else {