最近研究C#相關(guān)的OCR技術(shù),圖像識別一般C和C++這種底層語言做的比較多,C#主要是依托一些封裝好的組件進行調(diào)用,這里介紹三種身份證識別的方法奋单。
一:調(diào)用大公司API接口,百度猫十、云脈览濒,文通科技都有相關(guān)的API介紹。
二:調(diào)用圖像處理類庫拖云,EmguCV是OpenCV的一個跨平臺的.Net封裝贷笛,該封裝也可以被編譯到Mono平臺和允許在Windows、Mac OS宙项、Android乏苦、iPhone、iPad等多個平臺上運行
三:調(diào)用Office2007 組件
<h2>一杉允、證件識別API接口</h2>
以聚合數(shù)據(jù)中的API接口為例邑贴,因為官方API沒有提供C#的調(diào)用方式,網(wǎng)址如下:證件識別接口
<pre>/// <summary>
/// 上傳圖片
/// </summary>
/// <returns></returns>
public static string CardUpload()
{
try
{
string appkey = "網(wǎng)站自己申請的key"; //配置您申請的appkey
HttpPostedFile file = HttpContext.Current.Request.Files[0];
string url = "http://api2.juheapi.com/cardrecon/upload";
var parameters = new Dictionary<string, string>();
parameters.Add("key", appkey);
parameters.Add("cardType", "2");
string result = HttpPostData(url, 60000, "pic", file.InputStream, parameters);
JObject info = JObject.Parse(JObject.Parse(result)["result"].ToString());
var cardInfo = new
{
name = info["姓名"],
card = info["公民身份號碼"]
};
return cardInfo.ToJson();
}
catch (Exception ex)
{
return ex.ToString();
}
}
/// <summary>
/// Post調(diào)用API
/// </summary>
/// <param name="url">api地址</param>
/// <param name="timeOut">訪問超時時間</param>
/// <param name="fileKeyName">文件參數(shù)名</param>
/// <param name="file">文件流</param>
/// <param name="stringDict">參數(shù)列表</param>
/// <returns>結(jié)果集</returns>
private static string HttpPostData(string url, int timeOut, string fileKeyName,
Stream file, Dictionary<string, string> stringDict)
{
string responseContent;
var memStream = new MemoryStream();
var webRequest = (HttpWebRequest)WebRequest.Create(url);
// 邊界符
var boundary = "---------------" + DateTime.Now.Ticks.ToString("x");
// 邊界符
var beginBoundary = Encoding.ASCII.GetBytes("--" + boundary + "\r\n");
// 最后的結(jié)束符
var endBoundary = Encoding.ASCII.GetBytes("--" + boundary + "--\r\n");
// 設(shè)置屬性
webRequest.Method = "POST";
webRequest.Timeout = timeOut;
webRequest.ContentType = "multipart/form-data; boundary=" + boundary;
//寫入開始邊界符
memStream.Write(beginBoundary, 0, beginBoundary.Length);
// 寫入文件
const string filePartHeader =
"Content-Disposition: form-data; name="{0}"; filename="{1}"\r\n" +
"Content-Type: application/octet-stream\r\n\r\n";
var header = string.Format(filePartHeader, fileKeyName, "card.jpg");
var headerbytes = Encoding.UTF8.GetBytes(header);
memStream.Write(headerbytes, 0, headerbytes.Length);
file.CopyTo(memStream);
// 寫入字符串的Key
var stringKeyHeader = "\r\n--" + boundary +
"\r\nContent-Disposition: form-data; name="{0}"" +
"\r\n\r\n{1}\r\n";
foreach (byte[] formitembytes in from string key in stringDict.Keys
select string.Format(stringKeyHeader, key, stringDict[key])
into formitem
select Encoding.UTF8.GetBytes(formitem))
{
memStream.Write(formitembytes, 0, formitembytes.Length);
}
// 寫入最后的結(jié)束邊界符
memStream.Write(endBoundary, 0, endBoundary.Length);
webRequest.ContentLength = memStream.Length;
// 構(gòu)造完畢叔磷,執(zhí)行POST方法
var requestStream = webRequest.GetRequestStream();
memStream.Position = 0;
var tempBuffer = new byte[memStream.Length];
memStream.Read(tempBuffer, 0, tempBuffer.Length);
memStream.Close();
requestStream.Write(tempBuffer, 0, tempBuffer.Length);
requestStream.Close();
var httpWebResponse = (HttpWebResponse)webRequest.GetResponse();
using (var httpStreamReader = new StreamReader(httpWebResponse.GetResponseStream(),
Encoding.GetEncoding("utf-8")))
{
responseContent = httpStreamReader.ReadToEnd();
}
httpWebResponse.Close();
webRequest.Abort();
return responseContent;
}</pre>
<h2>二拢驾、EmguCV類庫調(diào)用</h2>
<h3>環(huán)境搭建</h3>
下載地址:EmguCV官網(wǎng)
在File類別下下載這個EXE,進行安裝改基,安裝后在目錄下能找相應組件繁疤,還有些應用的案例。
C#進行識別秕狰,需進行圖片二值化處理和OCR調(diào)用相關(guān)DLL可在我整理的地址下載:360云盤 提取碼:89f4
dll文件夾中的dll引用到C#項目中稠腊,x64,x86鸣哀,tessdata對應OCR識別的類庫和語言庫架忌,我tessdata中已添加中文語言包,將這三個文件夾放入程序執(zhí)行文件夾中我衬。
<h3>Demo</h3>
自己做的小Demo如圖:身份證圖片是百度上下載的
相關(guān)代碼如下:
<pre>using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using Emgu.CV;
using Emgu.CV.OCR;
using Emgu.CV.Structure;
using System.IO;
namespace ImageManage
{
public partial class Form1 : Form
{
Image<Gray, Byte> imageThreshold;
public Form1()
{
InitializeComponent();
}
private void btn_convert_Click(object sender, EventArgs e)
{
//第一個參數(shù)是語言包文件夾的地址叹放,不寫默認在執(zhí)行文件夾下
Tesseract _ocr = new Tesseract("", "chi_sim", OcrEngineMode.TesseractOnly);
_ocr.Recognize(imageThreshold);
String text = _ocr.GetText();
this.textBox1.Text = text;
}
private void pictureBox1_Click(object sender, EventArgs e)
{
OpenFileDialog of = new OpenFileDialog();
of.Title = "請選擇圖片";
if (of.ShowDialog() == DialogResult.OK)
{
string file = of.FileName;
Image img = Image.FromFile(file);
pictureBox1.Image = img;
}
Bitmap bitmap = (Bitmap)this.pictureBox1.Image;
Image<Bgr, Byte> imageSource = new Image<Bgr, byte>(bitmap);
Image<Gray, Byte> imageGrayscale = imageSource.Convert<Gray, Byte>();
imageGrayscale = randon(imageGrayscale);
imageThreshold = imageGrayscale.ThresholdBinary(new Gray(100), new Gray(255));
this.pictureBox2.Image = imageThreshold.ToBitmap();
}
/// <summary>
/// 旋轉(zhuǎn)校正
/// </summary>
/// <param name="imageInput"></param>
/// <returns></returns>
private Image<Gray, Byte> randon(Image<Gray, Byte> imageInput)//圖像投影旋轉(zhuǎn)法傾斜校正子函數(shù)定義
{
int nwidth = imageInput.Width;
int nheight = imageInput.Height;
int sum;
int SumOfCha;
int SumOfChatemp = 0;
int[] sumhang = new int[nheight];
Image<Gray, Byte> resultImage = imageInput;
Image<Gray, Byte> ImrotaImage;
//20度范圍內(nèi)的調(diào)整
for (int ang = -20; ang < 20; ang = ang + 1)
{
ImrotaImage = imageInput.Rotate(ang, new Gray(1));
for (int i = 0; i < nheight; i++)
{
sum = 0;
for (int j = 0; j < nwidth; j++)
{
sum += ImrotaImage.Data[i, j, 0];
}
sumhang[i] = sum;
}
SumOfCha = 0;
for (int k = 0; k < nheight - 1; k++)
{
SumOfCha = SumOfCha + (Math.Abs(sumhang[k] - sumhang[k + 1]));
}
if (SumOfCha > SumOfChatemp)
{
resultImage = ImrotaImage;
SumOfChatemp = SumOfCha;
}
}
return resultImage;
}
}
}</pre>
<h2>三、Office 2007組件</h2>
該組件免費而且識別度比較高挠羔。
<h3>環(huán)境搭建</h3>
Office 2007組件MODI井仰,需要安裝Ofiice2007,且由于兼容性需要安裝補丁破加,SP1或者SP2都行俱恶,補丁下載地址如下:
SP1下載地址 SP2下載地址
安裝后控制面板-->卸載或更新程序-->選擇Office2007-->選擇更改-->選擇添加或修復功能-->彈出下面界面,運行相應組件。
將Office工具-->Microsoft Office Document Imaging 下的工具運行
在C#項目中引用Com組件即可:
如果Office組件應用不是在本地程序而需要部署在IIS上合是,還需將應用程序的應用池的權(quán)限設(shè)置為如下圖所示:程序應用池-->高級設(shè)置-->標識
<h3>Demo</h3>
<pre>StringBuilder sb = new StringBuilder();
MODI.Document doc = new MODI.Document();
doc.Create(fullFileName);
MODI.Image image;
MODI.Layout layout;
doc.OCR(MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED, true, true); // 識別文字類型
for (int i = 0; i < doc.Images.Count; i++)
{
image = (MODI.Image)doc.Images[i];
layout = image.Layout;
sb.Append(layout.Text);
}</pre>
以上即一些C#進行身份證識別的方法了罪,可根據(jù)自己項目的不同需求進行選用。