test
This commit is contained in:
@@ -101,3 +101,44 @@
|
||||
- 动作:查询当前系统时间
|
||||
- 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z')
|
||||
- 输出:2025-11-20 09:44:48 CST
|
||||
|
||||
- 时间:2025-11-24 14:47(UTC+8)
|
||||
- 动作:确认 .claude 目录是否存在
|
||||
- 工具:shell(ls .claude)
|
||||
- 输出:目录不存在,命令返回 “No such file or directory”
|
||||
|
||||
- 时间:2025-11-24 14:48(UTC+8)
|
||||
- 动作:阅读 AnjuanAndJuanneiController.java 源码
|
||||
- 工具:shell(sed -n '1,960p' src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java)
|
||||
- 输出:获取 uploadSimpleFilesAnjuan 及相关上传实现用于 OOM 分析
|
||||
|
||||
- 时间:2025-11-24 14:49(UTC+8)
|
||||
- 动作:查阅 PdfFileHelper.image2Pdf 实现
|
||||
- 工具:shell(sed -n '520,620p' src/main/java/com/point/strategy/common/PdfFileHelper.java)
|
||||
- 输出:确认图片转 PDF 逻辑使用 ImageIO.read + iText Image 双重加载
|
||||
|
||||
- 时间:2025-11-24 14:50(UTC+8)
|
||||
- 动作:查询当前系统时间
|
||||
- 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z')
|
||||
- 输出:2025-11-24 14:50:02 CST
|
||||
|
||||
- 时间:2025-11-24 14:55(UTC+8)
|
||||
- 动作:为传统案卷上传OCR处理增加线程池限流
|
||||
- 位置:src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
|
||||
- 变更:新增受控线程池替代每文件新建线程,限制并发与队列,避免多文件同时触发大量OCR线程造成内存峰值。
|
||||
- 工具:apply_patch
|
||||
- 结果:代码已更新
|
||||
|
||||
- 时间:2025-11-24 14:56(UTC+8)
|
||||
- 动作:优化图片转PDF内存占用
|
||||
- 位置:src/main/java/com/point/strategy/common/PdfFileHelper.java
|
||||
- 变更:移除 ImageIO 双重加载大图,直接用 iText Image 尺寸设置页面,减少一次内存占用。
|
||||
- 工具:apply_patch
|
||||
- 结果:代码已更新
|
||||
|
||||
- 时间:2025-11-24 14:59(UTC+8)
|
||||
- 动作:对上传接口增加并发限流(每次最多3个文件)
|
||||
- 位置:src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
|
||||
- 变更:新增 Semaphore,保证全局并行处理文件数不超过3;同时将 OCR 线程池调为 1~3 线程并缩短队列,匹配限流策略。
|
||||
- 工具:apply_patch
|
||||
- 结果:代码已更新
|
||||
|
||||
@@ -13,12 +13,12 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
import com.itextpdf.text.pdf.PdfCopy;
|
||||
import com.itextpdf.text.pdf.PdfReader;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.imageio.ImageReader;
|
||||
import javax.imageio.stream.FileImageInputStream;
|
||||
import javax.swing.*;
|
||||
import java.awt.*;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
@@ -41,13 +41,13 @@ public class PdfFileHelper {
|
||||
private static class FontFile {
|
||||
public File file;
|
||||
public boolean isTempFile = false;
|
||||
|
||||
|
||||
public FontFile(File file, boolean isTempFile) {
|
||||
this.file = file;
|
||||
this.isTempFile = isTempFile;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取SIMYOU字体文件,兼容JAR包和文件系统
|
||||
* @return FontFile对象
|
||||
@@ -61,7 +61,7 @@ public class PdfFileHelper {
|
||||
"/app/fonts/SIMYOU.TTF",
|
||||
"/app/data/fonts/SIMYOU.TTF"
|
||||
};
|
||||
|
||||
|
||||
for (String fontPath : dockerFontPaths) {
|
||||
File fontFile = new File(fontPath);
|
||||
if (fontFile.exists() && fontFile.canRead()) {
|
||||
@@ -69,7 +69,7 @@ public class PdfFileHelper {
|
||||
return new FontFile(fontFile, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// 2. 从classpath中读取字体文件(适用于JAR包环境)
|
||||
try {
|
||||
InputStream fontStream = PdfFileHelper.class.getClassLoader().getResourceAsStream("SIMYOU.TTF");
|
||||
@@ -77,7 +77,7 @@ public class PdfFileHelper {
|
||||
// 创建临时文件
|
||||
File tempFontFile = File.createTempFile("SIMYOU", ".TTF");
|
||||
tempFontFile.deleteOnExit(); // JVM退出时删除临时文件
|
||||
|
||||
|
||||
// 将资源流写入临时文件
|
||||
try (InputStream input = fontStream;
|
||||
FileOutputStream output = new FileOutputStream(tempFontFile)) {
|
||||
@@ -87,14 +87,14 @@ public class PdfFileHelper {
|
||||
output.write(buffer, 0, bytesRead);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
log.info("从classpath创建临时字体文件: {}", tempFontFile.getAbsolutePath());
|
||||
return new FontFile(tempFontFile, true);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("从classpath读取字体文件失败: {}", e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
// 3. 尝试使用ResourceUtils(开发环境)
|
||||
try {
|
||||
File fontFile = ResourceUtils.getFile("classpath:SIMYOU.TTF");
|
||||
@@ -105,10 +105,10 @@ public class PdfFileHelper {
|
||||
} catch (Exception e) {
|
||||
log.warn("使用ResourceUtils读取字体文件失败: {}", e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
throw new IOException("无法找到SIMYOU.TTF字体文件,请确保字体文件存在于文件系统或classpath中");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取BaseFont对象,自动处理字体文件路径
|
||||
* @return BaseFont对象
|
||||
@@ -542,26 +542,15 @@ public class PdfFileHelper {
|
||||
**/
|
||||
public static boolean image2Pdf(String source, String target) {
|
||||
try {
|
||||
BufferedImage img = ImageIO.read(new File(source));
|
||||
PdfReader.unethicalreading = true;
|
||||
Image png1 = Image.getInstance(source); //通过文件路径获取image
|
||||
// float heigth = png1.getHeight();
|
||||
// float width = png1.getWidth();
|
||||
// 新增:读取图片的EXIF方向信息
|
||||
int orientation = getExifOrientation(source);
|
||||
//new一个pdf文档
|
||||
// new一个pdf文档,直接使用图片宽高设置页面,避免重复加载
|
||||
Document doc = new Document(null, 0, 0, 0, 0);
|
||||
if (img == null) {
|
||||
doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
|
||||
} else {
|
||||
doc.setPageSize(new Rectangle(img.getWidth(), img.getHeight()));
|
||||
}
|
||||
doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
|
||||
PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入
|
||||
doc.open();//打开文档
|
||||
// doc.newPage(); //在pdf创建一页
|
||||
// int percent = getPercent2(heigth, width);
|
||||
// png1.setAlignment(Image.MIDDLE);
|
||||
// png1.scalePercent(percent+3);// 表示是原来图像的比例;
|
||||
// 新增:根据EXIF方向调整图片
|
||||
adjustImageOrientation(png1, orientation);
|
||||
doc.add(png1);
|
||||
|
||||
@@ -38,6 +38,12 @@ import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@@ -76,6 +82,29 @@ public class AnjuanAndJuanneiController {
|
||||
@Autowired
|
||||
private OcrLogMapper ocrLogMapper;
|
||||
|
||||
/**
|
||||
* OCR线程池,限制并发与队列长度,防止瞬时创建大量线程导致内存峰值
|
||||
*/
|
||||
private final ExecutorService ocrExecutor = new ThreadPoolExecutor(
|
||||
1,
|
||||
3,
|
||||
60L,
|
||||
TimeUnit.SECONDS,
|
||||
new LinkedBlockingQueue<>(30),
|
||||
runnable -> {
|
||||
Thread t = new Thread(runnable);
|
||||
t.setName("OCR-Processing");
|
||||
t.setDaemon(true);
|
||||
return t;
|
||||
},
|
||||
new ThreadPoolExecutor.AbortPolicy()
|
||||
);
|
||||
|
||||
/**
|
||||
* 上传串行限流:全局仅允许并行处理3个文件,超出排队等待
|
||||
*/
|
||||
private static final Semaphore uploadSemaphore = new Semaphore(3);
|
||||
|
||||
@RequestMapping(value = "/getWord", method = RequestMethod.POST)
|
||||
@ApiOperation(value = "获取图片文字")
|
||||
public AjaxJson getWord(String tableName,Integer id){
|
||||
@@ -246,48 +275,63 @@ public class AnjuanAndJuanneiController {
|
||||
int fileIndex = 0;
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
fileIndex++;
|
||||
String name = iterator.next();
|
||||
MultipartFile file0 = multipartRequest.getFile(name);
|
||||
|
||||
if (file0 == null || file0.isEmpty()) {
|
||||
logger.warn("第{}个文件为空,跳过", fileIndex);
|
||||
falseNum++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 使用零拷贝方式处理单个文件,最大程度优化内存使用
|
||||
AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request);
|
||||
|
||||
if ("101".equals(json2.getCode())) {
|
||||
falseNum++;
|
||||
}
|
||||
if ("100".equals(json2.getCode())) {
|
||||
successNum++;
|
||||
}
|
||||
|
||||
// 异步处理OCR,避免阻塞
|
||||
String originalFilename = file0.getOriginalFilename();
|
||||
if (originalFilename != null) {
|
||||
int index = originalFilename.lastIndexOf(".") + 1;
|
||||
if (index > 0 && index < originalFilename.length()) {
|
||||
String fileType = originalFilename.substring(index);
|
||||
if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) {
|
||||
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService);
|
||||
Thread ocrThread = new Thread(ocrTask, "OCR-Processing-" + fileIndex);
|
||||
ocrThread.setDaemon(true);
|
||||
ocrThread.start();
|
||||
boolean acquired = false;
|
||||
try {
|
||||
uploadSemaphore.acquire(); // 最多并行处理3个文件
|
||||
acquired = true;
|
||||
fileIndex++;
|
||||
String name = iterator.next();
|
||||
MultipartFile file0 = multipartRequest.getFile(name);
|
||||
|
||||
if (file0 == null || file0.isEmpty()) {
|
||||
logger.warn("第{}个文件为空,跳过", fileIndex);
|
||||
falseNum++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 使用零拷贝方式处理单个文件,最大程度优化内存使用
|
||||
AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request);
|
||||
|
||||
if ("101".equals(json2.getCode())) {
|
||||
falseNum++;
|
||||
}
|
||||
if ("100".equals(json2.getCode())) {
|
||||
successNum++;
|
||||
}
|
||||
|
||||
// 异步处理OCR,避免阻塞
|
||||
String originalFilename = file0.getOriginalFilename();
|
||||
if (originalFilename != null) {
|
||||
int index = originalFilename.lastIndexOf(".") + 1;
|
||||
if (index > 0 && index < originalFilename.length()) {
|
||||
String fileType = originalFilename.substring(index);
|
||||
if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) {
|
||||
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService);
|
||||
try {
|
||||
ocrExecutor.execute(ocrTask);
|
||||
} catch (RejectedExecutionException ex) {
|
||||
logger.warn("OCR队列已满,跳过文件: {}", originalFilename);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 显式释放资源
|
||||
try {
|
||||
if (file0.getInputStream() != null) {
|
||||
file0.getInputStream().close();
|
||||
|
||||
// 显式释放资源
|
||||
try {
|
||||
if (file0.getInputStream() != null) {
|
||||
file0.getInputStream().close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.warn("关闭文件流时出错: {}", e.getMessage());
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.warn("获取上传许可被中断,终止处理");
|
||||
return AjaxJson.returnExceptionInfo("上传处理中断");
|
||||
} finally {
|
||||
if (acquired) {
|
||||
uploadSemaphore.release();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.warn("关闭文件流时出错: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user