test
This commit is contained in:
@@ -101,3 +101,44 @@
|
|||||||
- 动作:查询当前系统时间
|
- 动作:查询当前系统时间
|
||||||
- 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z')
|
- 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z')
|
||||||
- 输出:2025-11-20 09:44:48 CST
|
- 输出:2025-11-20 09:44:48 CST
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:47(UTC+8)
|
||||||
|
- 动作:确认 .claude 目录是否存在
|
||||||
|
- 工具:shell(ls .claude)
|
||||||
|
- 输出:目录不存在,命令返回 “No such file or directory”
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:48(UTC+8)
|
||||||
|
- 动作:阅读 AnjuanAndJuanneiController.java 源码
|
||||||
|
- 工具:shell(sed -n '1,960p' src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java)
|
||||||
|
- 输出:获取 uploadSimpleFilesAnjuan 及相关上传实现用于 OOM 分析
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:49(UTC+8)
|
||||||
|
- 动作:查阅 PdfFileHelper.image2Pdf 实现
|
||||||
|
- 工具:shell(sed -n '520,620p' src/main/java/com/point/strategy/common/PdfFileHelper.java)
|
||||||
|
- 输出:确认图片转 PDF 逻辑使用 ImageIO.read + iText Image 双重加载
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:50(UTC+8)
|
||||||
|
- 动作:查询当前系统时间
|
||||||
|
- 工具:shell(date '+%Y-%m-%d %H:%M:%S %Z')
|
||||||
|
- 输出:2025-11-24 14:50:02 CST
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:55(UTC+8)
|
||||||
|
- 动作:为传统案卷上传OCR处理增加线程池限流
|
||||||
|
- 位置:src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
|
||||||
|
- 变更:新增受控线程池替代每文件新建线程,限制并发与队列,避免多文件同时触发大量OCR线程造成内存峰值。
|
||||||
|
- 工具:apply_patch
|
||||||
|
- 结果:代码已更新
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:56(UTC+8)
|
||||||
|
- 动作:优化图片转PDF内存占用
|
||||||
|
- 位置:src/main/java/com/point/strategy/common/PdfFileHelper.java
|
||||||
|
- 变更:移除 ImageIO 双重加载大图,直接用 iText Image 尺寸设置页面,减少一次内存占用。
|
||||||
|
- 工具:apply_patch
|
||||||
|
- 结果:代码已更新
|
||||||
|
|
||||||
|
- 时间:2025-11-24 14:59(UTC+8)
|
||||||
|
- 动作:对上传接口增加并发限流(每次最多3个文件)
|
||||||
|
- 位置:src/main/java/com/point/strategy/originBatchUpload/AnjuanAndJuanneiController.java
|
||||||
|
- 变更:新增 Semaphore,保证全局并行处理文件数不超过3;同时将 OCR 线程池调为 1~3 线程并缩短队列,匹配限流策略。
|
||||||
|
- 工具:apply_patch
|
||||||
|
- 结果:代码已更新
|
||||||
|
|||||||
@@ -13,12 +13,12 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
import org.springframework.util.ResourceUtils;
|
import org.springframework.util.ResourceUtils;
|
||||||
import com.itextpdf.text.pdf.PdfCopy;
|
import com.itextpdf.text.pdf.PdfCopy;
|
||||||
import com.itextpdf.text.pdf.PdfReader;
|
import com.itextpdf.text.pdf.PdfReader;
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
import javax.imageio.ImageReader;
|
import javax.imageio.ImageReader;
|
||||||
import javax.imageio.stream.FileImageInputStream;
|
import javax.imageio.stream.FileImageInputStream;
|
||||||
import javax.swing.*;
|
import javax.swing.*;
|
||||||
import java.awt.*;
|
import java.awt.*;
|
||||||
import java.awt.image.BufferedImage;
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
@@ -542,26 +542,15 @@ public class PdfFileHelper {
|
|||||||
**/
|
**/
|
||||||
public static boolean image2Pdf(String source, String target) {
|
public static boolean image2Pdf(String source, String target) {
|
||||||
try {
|
try {
|
||||||
BufferedImage img = ImageIO.read(new File(source));
|
|
||||||
PdfReader.unethicalreading = true;
|
PdfReader.unethicalreading = true;
|
||||||
Image png1 = Image.getInstance(source); //通过文件路径获取image
|
Image png1 = Image.getInstance(source); //通过文件路径获取image
|
||||||
// float heigth = png1.getHeight();
|
|
||||||
// float width = png1.getWidth();
|
|
||||||
// 新增:读取图片的EXIF方向信息
|
// 新增:读取图片的EXIF方向信息
|
||||||
int orientation = getExifOrientation(source);
|
int orientation = getExifOrientation(source);
|
||||||
//new一个pdf文档
|
// new一个pdf文档,直接使用图片宽高设置页面,避免重复加载
|
||||||
Document doc = new Document(null, 0, 0, 0, 0);
|
Document doc = new Document(null, 0, 0, 0, 0);
|
||||||
if (img == null) {
|
doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
|
||||||
doc.setPageSize(new Rectangle(png1.getWidth(), png1.getHeight()));
|
|
||||||
} else {
|
|
||||||
doc.setPageSize(new Rectangle(img.getWidth(), img.getHeight()));
|
|
||||||
}
|
|
||||||
PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入
|
PdfWriter.getInstance(doc, new FileOutputStream(target)); //pdf写入
|
||||||
doc.open();//打开文档
|
doc.open();//打开文档
|
||||||
// doc.newPage(); //在pdf创建一页
|
|
||||||
// int percent = getPercent2(heigth, width);
|
|
||||||
// png1.setAlignment(Image.MIDDLE);
|
|
||||||
// png1.scalePercent(percent+3);// 表示是原来图像的比例;
|
|
||||||
// 新增:根据EXIF方向调整图片
|
// 新增:根据EXIF方向调整图片
|
||||||
adjustImageOrientation(png1, orientation);
|
adjustImageOrientation(png1, orientation);
|
||||||
doc.add(png1);
|
doc.add(png1);
|
||||||
|
|||||||
@@ -38,6 +38,12 @@ import java.nio.file.Path;
|
|||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.RejectedExecutionException;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.Semaphore;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -76,6 +82,29 @@ public class AnjuanAndJuanneiController {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private OcrLogMapper ocrLogMapper;
|
private OcrLogMapper ocrLogMapper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OCR线程池,限制并发与队列长度,防止瞬时创建大量线程导致内存峰值
|
||||||
|
*/
|
||||||
|
private final ExecutorService ocrExecutor = new ThreadPoolExecutor(
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
60L,
|
||||||
|
TimeUnit.SECONDS,
|
||||||
|
new LinkedBlockingQueue<>(30),
|
||||||
|
runnable -> {
|
||||||
|
Thread t = new Thread(runnable);
|
||||||
|
t.setName("OCR-Processing");
|
||||||
|
t.setDaemon(true);
|
||||||
|
return t;
|
||||||
|
},
|
||||||
|
new ThreadPoolExecutor.AbortPolicy()
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 上传串行限流:全局仅允许并行处理3个文件,超出排队等待
|
||||||
|
*/
|
||||||
|
private static final Semaphore uploadSemaphore = new Semaphore(3);
|
||||||
|
|
||||||
@RequestMapping(value = "/getWord", method = RequestMethod.POST)
|
@RequestMapping(value = "/getWord", method = RequestMethod.POST)
|
||||||
@ApiOperation(value = "获取图片文字")
|
@ApiOperation(value = "获取图片文字")
|
||||||
public AjaxJson getWord(String tableName,Integer id){
|
public AjaxJson getWord(String tableName,Integer id){
|
||||||
@@ -246,48 +275,63 @@ public class AnjuanAndJuanneiController {
|
|||||||
int fileIndex = 0;
|
int fileIndex = 0;
|
||||||
|
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
fileIndex++;
|
boolean acquired = false;
|
||||||
String name = iterator.next();
|
try {
|
||||||
MultipartFile file0 = multipartRequest.getFile(name);
|
uploadSemaphore.acquire(); // 最多并行处理3个文件
|
||||||
|
acquired = true;
|
||||||
|
fileIndex++;
|
||||||
|
String name = iterator.next();
|
||||||
|
MultipartFile file0 = multipartRequest.getFile(name);
|
||||||
|
|
||||||
if (file0 == null || file0.isEmpty()) {
|
if (file0 == null || file0.isEmpty()) {
|
||||||
logger.warn("第{}个文件为空,跳过", fileIndex);
|
logger.warn("第{}个文件为空,跳过", fileIndex);
|
||||||
falseNum++;
|
falseNum++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 使用零拷贝方式处理单个文件,最大程度优化内存使用
|
// 使用零拷贝方式处理单个文件,最大程度优化内存使用
|
||||||
AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request);
|
AjaxJson json2 = uploadFilesByPathAnjuanZeroCopy(file0, fondscode, dir, recId, tableName, request);
|
||||||
|
|
||||||
if ("101".equals(json2.getCode())) {
|
if ("101".equals(json2.getCode())) {
|
||||||
falseNum++;
|
falseNum++;
|
||||||
}
|
}
|
||||||
if ("100".equals(json2.getCode())) {
|
if ("100".equals(json2.getCode())) {
|
||||||
successNum++;
|
successNum++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 异步处理OCR,避免阻塞
|
// 异步处理OCR,避免阻塞
|
||||||
String originalFilename = file0.getOriginalFilename();
|
String originalFilename = file0.getOriginalFilename();
|
||||||
if (originalFilename != null) {
|
if (originalFilename != null) {
|
||||||
int index = originalFilename.lastIndexOf(".") + 1;
|
int index = originalFilename.lastIndexOf(".") + 1;
|
||||||
if (index > 0 && index < originalFilename.length()) {
|
if (index > 0 && index < originalFilename.length()) {
|
||||||
String fileType = originalFilename.substring(index);
|
String fileType = originalFilename.substring(index);
|
||||||
if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) {
|
if (!fileType.equalsIgnoreCase("mp3") && !fileType.equalsIgnoreCase("mp4")) {
|
||||||
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService);
|
OCRProcessingTask ocrTask = new OCRProcessingTask(json2, tableName, youhongIntegrate, youhongBaseUrl, tessPath, ocrLogMapper, danganguanliService);
|
||||||
Thread ocrThread = new Thread(ocrTask, "OCR-Processing-" + fileIndex);
|
try {
|
||||||
ocrThread.setDaemon(true);
|
ocrExecutor.execute(ocrTask);
|
||||||
ocrThread.start();
|
} catch (RejectedExecutionException ex) {
|
||||||
|
logger.warn("OCR队列已满,跳过文件: {}", originalFilename);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// 显式释放资源
|
// 显式释放资源
|
||||||
try {
|
try {
|
||||||
if (file0.getInputStream() != null) {
|
if (file0.getInputStream() != null) {
|
||||||
file0.getInputStream().close();
|
file0.getInputStream().close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("关闭文件流时出错: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
logger.warn("获取上传许可被中断,终止处理");
|
||||||
|
return AjaxJson.returnExceptionInfo("上传处理中断");
|
||||||
|
} finally {
|
||||||
|
if (acquired) {
|
||||||
|
uploadSemaphore.release();
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
|
||||||
logger.warn("关闭文件流时出错: {}", e.getMessage());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user