This commit is contained in:
2025-11-24 15:01:03 +08:00
parent b75f05bac9
commit e604bacfa6
3 changed files with 136 additions and 62 deletions

View File

@@ -101,3 +101,44 @@
- 动作:查询当前系统时间 - 动作:查询当前系统时间
- 工具shelldate '+%Y-%m-%d %H:%M:%S %Z' - 工具shelldate '+%Y-%m-%d %H:%M:%S %Z'
- 输出2025-11-20 09:44:48 CST - 输出2025-11-20 09:44:48 CST
- 时间2025-11-24 14:47UTC+8
- 动作:确认 .claude 目录是否存在
- 工具shellls .claude
- 输出:目录不存在,命令返回 “No such file or directory”
- 时间2025-11-24 14:48UTC+8
- 动作:阅读 AnjuanAndJuanneiController.java 源码
- 工具shellsed -n '1,960p' src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
- 输出:获取 uploadSimpleFilesAnjuan 及相关上传实现用于 OOM 分析
- 时间2025-11-24 14:49UTC+8
- 动作:查阅 PdfFileHelper.image2Pdf 实现
- 工具shellsed -n '520,620p' src/main/java/com/point/strategy/common/PdfFileHelper.java
- 输出:确认图片转 PDF 逻辑使用 ImageIO.read + iText Image 双重加载
- 时间2025-11-24 14:50UTC+8
- 动作:查询当前系统时间
- 工具shelldate '+%Y-%m-%d %H:%M:%S %Z'
- 输出2025-11-24 14:50:02 CST
- 时间2025-11-24 14:55UTC+8
- 动作为传统案卷上传OCR处理增加线程池限流
- 位置src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
- 变更新增受控线程池替代每文件新建线程限制并发与队列避免多文件同时触发大量OCR线程造成内存峰值。
- 工具apply_patch
- 结果:代码已更新
- 时间2025-11-24 14:56UTC+8
- 动作优化图片转PDF内存占用
- 位置src/main/java/com/point/strategy/common/PdfFileHelper.java
- 变更:移除 ImageIO 双重加载大图,直接用 iText Image 尺寸设置页面,减少一次内存占用。
- 工具apply_patch
- 结果:代码已更新
- 时间2025-11-24 14:59UTC+8
- 动作对上传接口增加并发限流每次最多3个文件
- 位置src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
- 变更:新增 Semaphore保证全局并行处理文件数不超过3同时将 OCR 线程池调为 1~3 线程并缩短队列,匹配限流策略。
- 工具apply_patch
- 结果:代码已更新

View File

@@ -13,12 +13,12 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.util.ResourceUtils; import org.springframework.util.ResourceUtils;
import com.itextpdf.text.pdf.PdfCopy; import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.PdfReader;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import javax.imageio.ImageReader; import javax.imageio.ImageReader;
import javax.imageio.stream.FileImageInputStream; import javax.imageio.stream.FileImageInputStream;
import javax.swing.*; import javax.swing.*;
import java.awt.*; import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*; import java.io.*;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@@ -41,13 +41,13 @@ public class PdfFileHelper {
private static class FontFile { private static class FontFile {
public File file; public File file;
public boolean isTempFile = false; public boolean isTempFile = false;
public FontFile(File file, boolean isTempFile) { public FontFile(File file, boolean isTempFile) {
this.file = file; this.file = file;
this.isTempFile = isTempFile; this.isTempFile = isTempFile;
} }
} }
/** /**
* 获取SIMYOU字体文件兼容JAR包和文件系统 * 获取SIMYOU字体文件兼容JAR包和文件系统
* @return FontFile对象 * @return FontFile对象
@@ -61,7 +61,7 @@ public class PdfFileHelper {
"/app/fonts/SIMYOU.TTF", "/app/fonts/SIMYOU.TTF",
"/app/data/fonts/SIMYOU.TTF" "/app/data/fonts/SIMYOU.TTF"
}; };
for (String fontPath : dockerFontPaths) { for (String fontPath : dockerFontPaths) {
File fontFile = new File(fontPath); File fontFile = new File(fontPath);
if (fontFile.exists() && fontFile.canRead()) { if (fontFile.exists() && fontFile.canRead()) {
@@ -69,7 +69,7 @@ public class PdfFileHelper {
return new FontFile(fontFile, false); return new FontFile(fontFile, false);
} }
} }
// 2. 从classpath中读取字体文件适用于JAR包环境 // 2. 从classpath中读取字体文件适用于JAR包环境
try { try {
InputStream fontStream = PdfFileHelper.class.getClassLoader().getResourceAsStream("SIMYOU.TTF"); InputStream fontStream = PdfFileHelper.class.getClassLoader().getResourceAsStream("SIMYOU.TTF");
@@ -77,7 +77,7 @@ public class PdfFileHelper {
// 创建临时文件 // 创建临时文件
File tempFontFile = File.createTempFile("SIMYOU", ".TTF"); File tempFontFile = File.createTempFile("SIMYOU", ".TTF");
tempFontFile.deleteOnExit(); // JVM退出时删除临时文件 tempFontFile.deleteOnExit(); // JVM退出时删除临时文件
// 将资源流写入临时文件 // 将资源流写入临时文件
try (InputStream input = fontStream; try (InputStream input = fontStream;
FileOutputStream output = new FileOutputStream(tempFontFile)) { FileOutputStream output = new FileOutputStream(tempFontFile)) {
@@ -87,14 +87,14 @@ public class PdfFileHelper {
output.write(buffer, 0, bytesRead); output.write(buffer, 0, bytesRead);
} }
} }
log.info("从classpath创建临时字体文件: {}", tempFontFile.getAbsolutePath()); log.info("从classpath创建临时字体文件: {}", tempFontFile.getAbsolutePath());
return new FontFile(tempFontFile, true); return new FontFile(tempFontFile, true);
} }
} catch (Exception e) { } catch (Exception e) {
log.warn("从classpath读取字体文件失败: {}", e.getMessage()); log.warn("从classpath读取字体文件失败: {}", e.getMessage());
} }
// 3. 尝试使用ResourceUtils开发环境 // 3. 尝试使用ResourceUtils开发环境
try { try {
File fontFile = ResourceUtils.getFile("classpath:SIMYOU.TTF"); File fontFile = ResourceUtils.getFile("classpath:SIMYOU.TTF");
@@ -105,10 +105,10 @@ public class PdfFileHelper {
} catch (Exception e) { } catch (Exception e) {
log.warn("使用ResourceUtils读取字体文件失败: {}", e.getMessage()); log.warn("使用ResourceUtils读取字体文件失败: {}", e.getMessage());
} }
throw new IOException("无法找到SIMYOU.TTF字体文件请确保字体文件存在于文件系统或classpath中"); throw new IOException("无法找到SIMYOU.TTF字体文件请确保字体文件存在于文件系统或classpath中");
} }
/** /**
* 获取BaseFont对象自动处理字体文件路径 * 获取BaseFont对象自动处理字体文件路径
* @return BaseFont对象 * @return BaseFont对象
@@ -542,26 +542,15 @@ public class PdfFileHelper {
**/ **/
public static boolean image2Pdf(String source, String target) { public static boolean image2Pdf(String source, String target) {
try { try {
BufferedImage img = ImageIO.read(new File(source));
PdfReader.unethicalreading = true; PdfReader.unethicalreading = true;
Image png1 = Image.getInstance(source); //通过文件路径获取image Image png1 = Image.getInstance(source); //通过文件路径获取image
// float heigth = png1.getHeight();
// float width = png1.getWidth();
// 新增读取图片的EXIF方向信息 // 新增读取图片的EXIF方向信息
int orientation = getExifOrientation(source); int orientation = getExifOrientation(source);
//new一个pdf文档 // new一个pdf文档,直接使用图片宽高设置页面,避免重复加载
Document doc = new Document(null, 0, 0, 0, 0); Document doc = new Document(null, 0, 0, 0, 0);
if (img == null) { doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
} else {
doc.setPageSize(new Rectangle(img.getWidth(), img.getHeight()));
}
PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入 PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入
doc.open();//打开文档 doc.open();//打开文档
// doc.newPage(); //在pdf创建一页
// int percent = getPercent2(heigth, width);
// png1.setAlignment(Image.MIDDLE);
// png1.scalePercent(percent+3);// 表示是原来图像的比例;
// 新增根据EXIF方向调整图片 // 新增根据EXIF方向调整图片
adjustImageOrientation(png1, orientation); adjustImageOrientation(png1, orientation);
doc.add(png1); doc.add(png1);

View File

@@ -38,6 +38,12 @@ import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardCopyOption; import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption; import java.nio.file.StandardOpenOption;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.Semaphore;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@@ -76,6 +82,29 @@ public class AnjuanAndJuanneiController {
@Autowired @Autowired
private OcrLogMapper ocrLogMapper; private OcrLogMapper ocrLogMapper;
/**
* OCR线程池限制并发与队列长度防止瞬时创建大量线程导致内存峰值
*/
private final ExecutorService ocrExecutor = new ThreadPoolExecutor(
1,
3,
60L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(30),
runnable -> {
Thread t = new Thread(runnable);
t.setName("OCR-Processing");
t.setDaemon(true);
return t;
},
new ThreadPoolExecutor.AbortPolicy()
);
/**
* 上传串行限流全局仅允许并行处理3个文件超出排队等待
*/
private static final Semaphore uploadSemaphore = new Semaphore(3);
@RequestMapping(value = "/getWord", method = RequestMethod.POST) @RequestMapping(value = "/getWord", method = RequestMethod.POST)
@ApiOperation(value = "获取图片文字") @ApiOperation(value = "获取图片文字")
public AjaxJson getWord(String tableName,Integer id){ public AjaxJson getWord(String tableName,Integer id){
@@ -246,48 +275,63 @@ public class AnjuanAndJuanneiController {
int fileIndex = 0; int fileIndex = 0;
while (iterator.hasNext()) { while (iterator.hasNext()) {
fileIndex++; boolean acquired = false;
String name = iterator.next(); try {
MultipartFile file0 = multipartRequest.getFile(name); uploadSemaphore.acquire(); // 最多并行处理3个文件
acquired = true;
if (file0 == null || file0.isEmpty()) { fileIndex++;
logger.warn("第{}个文件为空,跳过", fileIndex); String name = iterator.next();
falseNum++; MultipartFile file0 = multipartRequest.getFile(name);
continue;
} if (file0 == null || file0.isEmpty()) {
logger.warn("第{}个文件为空,跳过", fileIndex);
// 使用零拷贝方式处理单个文件,最大程度优化内存使用 falseNum++;
AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request); continue;
}
if ("101".equals(json2.getCode())) {
falseNum++; // 使用零拷贝方式处理单个文件,最大程度优化内存使用
} AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request);
if ("100".equals(json2.getCode())) {
successNum++; if ("101".equals(json2.getCode())) {
} falseNum++;
}
// 异步处理OCR避免阻塞 if ("100".equals(json2.getCode())) {
String originalFilename = file0.getOriginalFilename(); successNum++;
if (originalFilename != null) { }
int index = originalFilename.lastIndexOf(".") + 1;
if (index > 0 && index < originalFilename.length()) { // 异步处理OCR避免阻塞
String fileType = originalFilename.substring(index); String originalFilename = file0.getOriginalFilename();
if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) { if (originalFilename != null) {
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService); int index = originalFilename.lastIndexOf(".") + 1;
Thread ocrThread = new Thread(ocrTask, "OCR-Processing-" + fileIndex); if (index > 0 && index < originalFilename.length()) {
ocrThread.setDaemon(true); String fileType = originalFilename.substring(index);
ocrThread.start(); if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) {
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService);
try {
ocrExecutor.execute(ocrTask);
} catch (RejectedExecutionException ex) {
logger.warn("OCR队列已满跳过文件: {}", originalFilename);
}
}
} }
} }
}
// 显式释放资源
// 显式释放资源 try {
try { if (file0.getInputStream() != null) {
if (file0.getInputStream() != null) { file0.getInputStream().close();
file0.getInputStream().close(); }
} catch (IOException e) {
logger.warn("关闭文件流时出错: {}", e.getMessage());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
logger.warn("获取上传许可被中断,终止处理");
return AjaxJson.returnExceptionInfo("上传处理中断");
} finally {
if (acquired) {
uploadSemaphore.release();
} }
} catch (IOException e) {
logger.warn("关闭文件流时出错: {}", e.getMessage());
} }
} }
} else { } else {