128 lines
4.0 KiB
C#
128 lines
4.0 KiB
C#
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
|
using OpenCvSharp;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
using OpenCvSharp.Extensions;
|
|
using Spire.OCR;
|
|
using Spire.Pdf;
|
|
using System.Drawing.Imaging;
|
|
using System.Drawing;
|
|
|
|
namespace UnitTestProject3
|
|
{
|
|
[TestClass]
|
|
internal class PDFSacnQRCodeTest1
|
|
{
|
|
[TestMethod]
|
|
public void TestMethod1()
|
|
{
|
|
try
|
|
{
|
|
string dataDir = @"C:\Users\Fareoh\Desktop\工作清单\派诺\发货签收单\Test\";
|
|
string fileName = @"P2401001-XNZ01-POORD007585-珠海兴诺能源技术有限公司-500.pdf";
|
|
//Document pdfDocument = new Document(dataDir + "P2401001-XNZ01-POORD007585-珠海兴诺能源技术有限公司-500.pdf");
|
|
|
|
var data = ExtractTextFromScannedPDF(dataDir + fileName);
|
|
|
|
|
|
// 加载图像
|
|
//Mat src = Cv2.ImRead("path_to_your_image.jpg");
|
|
|
|
// 定义检测区域 (x, y, width, height)
|
|
Rect region = new Rect(2000, 400, 500, 500);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// 只识别二维码
|
|
/// </summary>
|
|
/// <param name="src"></param>
|
|
/// <param name="region"></param>
|
|
private static void DecodeQRCodes(Mat src, Rect region, out string[] qrCodeTexts)
|
|
{
|
|
qrCodeTexts = null;
|
|
|
|
// 裁剪指定区域
|
|
Mat roi = new Mat(src, region);
|
|
|
|
// 转为灰度图像
|
|
Mat gray = new Mat();
|
|
Cv2.CvtColor(roi, gray, ColorConversionCodes.BGR2GRAY);
|
|
|
|
// 二值化处理
|
|
Mat binary = new Mat();
|
|
Cv2.Threshold(gray, binary, 0, 255, ThresholdTypes.Binary | ThresholdTypes.Otsu);
|
|
|
|
//二维码识别
|
|
using (QRCodeDetector qRCodeDetector = new QRCodeDetector())
|
|
{
|
|
Point2f[] points;
|
|
var hasQRCode = qRCodeDetector.DetectMulti(binary, out points);
|
|
if (hasQRCode)
|
|
{
|
|
qRCodeDetector.DecodeMulti(binary, points, out qrCodeTexts);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// 从扫描的 PDF 中提取文本
|
|
public static List<string> ExtractTextFromScannedPDF(string pdfFilePath)
|
|
{
|
|
// 创建一个列表来存储提取的文本
|
|
List<string> extractedText = new List<string>();
|
|
|
|
// 创建 PdfDocument 类的实例
|
|
using (PdfDocument document = new PdfDocument())
|
|
{
|
|
// 加载 PDF 文档
|
|
document.LoadFromFile(pdfFilePath);
|
|
|
|
// 将页面转换为图片
|
|
using (Image image = document.SaveAsImage(0, 300, 300))
|
|
{
|
|
// 创建流来保存图片数据
|
|
using (MemoryStream stream = new MemoryStream())
|
|
{
|
|
// 将图片以 PNG 格式保存到流中
|
|
image.Save(stream, ImageFormat.Png);
|
|
stream.Position = 0;
|
|
|
|
// 从流中扫描文本并将其添加到列表中
|
|
string text = ScanTextFromImageStream(stream);
|
|
extractedText.Add(text);
|
|
}
|
|
}
|
|
}
|
|
|
|
// 返回列表
|
|
return extractedText;
|
|
}
|
|
|
|
// 从流中扫描文本
|
|
public static string ScanTextFromImageStream(Stream stream)
|
|
{
|
|
// 创建 OcrScanner 类的实例
|
|
using (OcrScanner ocrScanner = new OcrScanner())
|
|
{
|
|
// 从流中扫描文本
|
|
ocrScanner.Scan(stream, OCRImageFormat.Png);
|
|
IOCRText text = ocrScanner.Text;
|
|
|
|
// 返回文本
|
|
return text.ToString();
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|