This commit is contained in:
2025-11-24 15:01:03 +08:00
parent b75f05bac9
commit e604bacfa6
3 changed files with 136 additions and 62 deletions

View File

@@ -101,3 +101,44 @@
- 动作:查询当前系统时间 - 动作:查询当前系统时间
- 工具shelldate '+%Y-%m-%d %H:%M:%S %Z' - 工具shelldate '+%Y-%m-%d %H:%M:%S %Z'
- 输出2025-11-20 09:44:48 CST - 输出2025-11-20 09:44:48 CST
- 时间2025-11-24 14:47UTC+8
- 动作:确认 .claude 目录是否存在
- 工具shellls .claude
- 输出:目录不存在,命令返回 “No such file or directory”
- 时间2025-11-24 14:48UTC+8
- 动作:阅读 AnjuanAndJuanneiController.java 源码
- 工具shellsed -n '1,960p' src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
- 输出:获取 uploadSimpleFilesAnjuan 及相关上传实现用于 OOM 分析
- 时间2025-11-24 14:49UTC+8
- 动作:查阅 PdfFileHelper.image2Pdf 实现
- 工具shellsed -n '520,620p' src/main/java/com/point/strategy/common/PdfFileHelper.java
- 输出:确认图片转 PDF 逻辑使用 ImageIO.read + iText Image 双重加载
- 时间2025-11-24 14:50UTC+8
- 动作:查询当前系统时间
- 工具shelldate '+%Y-%m-%d %H:%M:%S %Z'
- 输出2025-11-24 14:50:02 CST
- 时间2025-11-24 14:55UTC+8
- 动作为传统案卷上传OCR处理增加线程池限流
- 位置src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
- 变更新增受控线程池替代每文件新建线程限制并发与队列避免多文件同时触发大量OCR线程造成内存峰值。
- 工具apply_patch
- 结果:代码已更新
- 时间2025-11-24 14:56UTC+8
- 动作优化图片转PDF内存占用
- 位置src/main/java/com/point/strategy/common/PdfFileHelper.java
- 变更:移除 ImageIO 双重加载大图,直接用 iText Image 尺寸设置页面,减少一次内存占用。
- 工具apply_patch
- 结果:代码已更新
- 时间2025-11-24 14:59UTC+8
- 动作对上传接口增加并发限流每次最多3个文件
- 位置src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
- 变更:新增 Semaphore保证全局并行处理文件数不超过3同时将 OCR 线程池调为 1~3 线程并缩短队列,匹配限流策略。
- 工具apply_patch
- 结果:代码已更新

View File

@@ -13,12 +13,12 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.util.ResourceUtils; import org.springframework.util.ResourceUtils;
import com.itextpdf.text.pdf.PdfCopy; import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.PdfReader;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import javax.imageio.ImageReader; import javax.imageio.ImageReader;
import javax.imageio.stream.FileImageInputStream; import javax.imageio.stream.FileImageInputStream;
import javax.swing.*; import javax.swing.*;
import java.awt.*; import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*; import java.io.*;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@@ -542,26 +542,15 @@ public class PdfFileHelper {
**/ **/
public static boolean image2Pdf(String source, String target) { public static boolean image2Pdf(String source, String target) {
try { try {
BufferedImage img = ImageIO.read(new File(source));
PdfReader.unethicalreading = true; PdfReader.unethicalreading = true;
Image png1 = Image.getInstance(source); //通过文件路径获取image Image png1 = Image.getInstance(source); //通过文件路径获取image
// float heigth = png1.getHeight();
// float width = png1.getWidth();
// 新增读取图片的EXIF方向信息 // 新增读取图片的EXIF方向信息
int orientation = getExifOrientation(source); int orientation = getExifOrientation(source);
//new一个pdf文档 // new一个pdf文档,直接使用图片宽高设置页面,避免重复加载
Document doc = new Document(null, 0, 0, 0, 0); Document doc = new Document(null, 0, 0, 0, 0);
if (img == null) {
doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight())); doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
} else {
doc.setPageSize(new Rectangle(img.getWidth(), img.getHeight()));
}
PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入 PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入
doc.open();//打开文档 doc.open();//打开文档
// doc.newPage(); //在pdf创建一页
// int percent = getPercent2(heigth, width);
// png1.setAlignment(Image.MIDDLE);
// png1.scalePercent(percent+3);// 表示是原来图像的比例;
// 新增根据EXIF方向调整图片 // 新增根据EXIF方向调整图片
adjustImageOrientation(png1, orientation); adjustImageOrientation(png1, orientation);
doc.add(png1); doc.add(png1);

View File

@@ -38,6 +38,12 @@ import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardCopyOption; import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption; import java.nio.file.StandardOpenOption;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.Semaphore;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@@ -76,6 +82,29 @@ public class AnjuanAndJuanneiController {
@Autowired @Autowired
private OcrLogMapper ocrLogMapper; private OcrLogMapper ocrLogMapper;
/**
* OCR线程池限制并发与队列长度防止瞬时创建大量线程导致内存峰值
*/
private final ExecutorService ocrExecutor = new ThreadPoolExecutor(
1,
3,
60L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(30),
runnable -> {
Thread t = new Thread(runnable);
t.setName("OCR-Processing");
t.setDaemon(true);
return t;
},
new ThreadPoolExecutor.AbortPolicy()
);
/**
* 上传串行限流全局仅允许并行处理3个文件超出排队等待
*/
private static final Semaphore uploadSemaphore = new Semaphore(3);
@RequestMapping(value = "/getWord", method = RequestMethod.POST) @RequestMapping(value = "/getWord", method = RequestMethod.POST)
@ApiOperation(value = "获取图片文字") @ApiOperation(value = "获取图片文字")
public AjaxJson getWord(String tableName,Integer id){ public AjaxJson getWord(String tableName,Integer id){
@@ -246,6 +275,10 @@ public class AnjuanAndJuanneiController {
int fileIndex = 0; int fileIndex = 0;
while (iterator.hasNext()) { while (iterator.hasNext()) {
boolean acquired = false;
try {
uploadSemaphore.acquire(); // 最多并行处理3个文件
acquired = true;
fileIndex++; fileIndex++;
String name = iterator.next(); String name = iterator.next();
MultipartFile file0 = multipartRequest.getFile(name); MultipartFile file0 = multipartRequest.getFile(name);
@@ -274,9 +307,11 @@ public class AnjuanAndJuanneiController {
String fileType = originalFilename.substring(index); String fileType = originalFilename.substring(index);
if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) { if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) {
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService); OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService);
Thread ocrThread = new Thread(ocrTask, "OCR-Processing-" + fileIndex); try {
ocrThread.setDaemon(true); ocrExecutor.execute(ocrTask);
ocrThread.start(); } catch (RejectedExecutionException ex) {
logger.warn("OCR队列已满跳过文件: {}", originalFilename);
}
} }
} }
} }
@@ -289,6 +324,15 @@ public class AnjuanAndJuanneiController {
} catch (IOException e) { } catch (IOException e) {
logger.warn("关闭文件流时出错: {}", e.getMessage()); logger.warn("关闭文件流时出错: {}", e.getMessage());
} }
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
logger.warn("获取上传许可被中断,终止处理");
return AjaxJson.returnExceptionInfo("上传处理中断");
} finally {
if (acquired) {
uploadSemaphore.release();
}
}
} }
} else { } else {
return AjaxJson.returnExceptionInfo("请求类型不支持"); return AjaxJson.returnExceptionInfo("请求类型不支持");