最近研究c#相关的orc技术,图像识别一般c和c++这种底层语言做的比较多,c#主要是依托一些封装好的组件进行调用,这里介绍三种身份证识别的方法。
一:调用大公司api接口,百度、云脉,文通科技都有相关的api介绍。
二:调用图像处理类库,emgucv是opencv的一个跨平台的.net封装,该封装也可以被编译到mono平台和允许在windows、mac os、android、iphone、ipad等多个平台上运行
三:调用office2007 组件
一、证件识别api接口
以聚合数据中的api接口为例,因为官方api没有提供c#的调用方式,网址如下:证件识别接口
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
/// <summary> /// 上传图片 /// </summary> /// <returns></returns> public static string cardupload() { try { string appkey = "网站自己申请的key" ; //配置您申请的appkey httppostedfile file = httpcontext.current.request.files[0]; string url = "http://api2.juheapi.com/cardrecon/upload" ; var parameters = new dictionary< string , string >(); parameters.add( "key" , appkey); parameters.add( "cardtype" , "2" ); string result = httppostdata(url, 60000, "pic" , file.inputstream, parameters); jobject info = jobject.parse(jobject.parse(result)[ "result" ].tostring()); var cardinfo = new { name = info[ "姓名" ], card = info[ "公民身份号码" ] }; return cardinfo.tojson(); } catch (exception ex) { return ex.tostring(); } } /// <summary> /// post调用api /// </summary> /// <param name="url">api地址</param> /// <param name="timeout">访问超时时间</param> /// <param name="filekeyname">文件参数名</param> /// <param name="file">文件流</param> /// <param name="stringdict">参数列表</param> /// <returns>结果集</returns> private static string httppostdata( string url, int timeout, string filekeyname, stream file, dictionary< string , string > stringdict) { string responsecontent; var memstream = new memorystream(); var webrequest = (httpwebrequest)webrequest.create(url); // 边界符 var boundary = "---------------" + datetime.now.ticks.tostring( "x" ); // 边界符 var beginboundary = encoding.ascii.getbytes( "--" + boundary + "\r\n" ); // 最后的结束符 var endboundary = encoding.ascii.getbytes( "--" + boundary + "--\r\n" ); // 设置属性 webrequest.method = "post" ; webrequest.timeout = timeout; webrequest.contenttype = "multipart/form-data; boundary=" + boundary; //写入开始边界符 memstream.write(beginboundary, 0, beginboundary.length); // 写入文件 const string filepartheader = "content-disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\n" + "content-type: application/octet-stream\r\n\r\n" ; var header = string .format(filepartheader, filekeyname, "card.jpg" ); var headerbytes = encoding.utf8.getbytes(header); memstream.write(headerbytes, 0, headerbytes.length); file.copyto(memstream); // 写入字符串的key var stringkeyheader = "\r\n--" + boundary + "\r\ncontent-disposition: form-data; name=\"{0}\"" + "\r\n\r\n{1}\r\n" ; foreach ( byte [] formitembytes in from string key in stringdict.keys select string .format(stringkeyheader, key, stringdict[key]) into formitem select encoding.utf8.getbytes(formitem)) { memstream.write(formitembytes, 0, formitembytes.length); } // 写入最后的结束边界符 memstream.write(endboundary, 0, endboundary.length); webrequest.contentlength = memstream.length; // 构造完毕,执行post方法 var requeststream = webrequest.getrequeststream(); memstream.position = 0; var tempbuffer = new byte [memstream.length]; memstream.read(tempbuffer, 0, tempbuffer.length); memstream.close(); requeststream.write(tempbuffer, 0, tempbuffer.length); requeststream.close(); var httpwebresponse = (httpwebresponse)webrequest.getresponse(); using (var httpstreamreader = new streamreader(httpwebresponse.getresponsestream(), encoding.getencoding( "utf-8" ))) { responsecontent = httpstreamreader.readtoend(); } httpwebresponse.close(); webrequest.abort(); return responsecontent; } |
二、emgucv类库调用
环境搭建
下载地址:emgucv官网
在file类别下下载这个exe,进行安装,安装后在目录下能找相应组件,还有些应用的案例。
c#进行识别,需进行图片二值化处理和ocr调用相关dll可在我整理的地址下载:360云盘 提取码:89f4
dll文件夹中的dll引用到c#项目中,x64,x86,tessdata对应ocr识别的类库和语言库,我tessdata中已添加中文语言包,将这三个文件夹放入程序执行文件夹中。
demo
自己做的小demo如图:身份证图片是百度上下载的
相关代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
using system; using system.collections.generic; using system.componentmodel; using system.data; using system.drawing; using system.linq; using system.text; using system.windows.forms; using emgu.cv; using emgu.cv.ocr; using emgu.cv.structure; using system.io; namespace imagemanage { public partial class form1 : form { image<gray, byte > imagethreshold; public form1() { initializecomponent(); } private void btn_convert_click( object sender, eventargs e) { //第一个参数是语言包文件夹的地址,不写默认在执行文件夹下 tesseract _ocr = new tesseract( "" , "chi_sim" , ocrenginemode.tesseractonly); _ocr.recognize(imagethreshold); string text = _ocr.gettext(); this .textbox1.text = text; } private void picturebox1_click( object sender, eventargs e) { openfiledialog of = new openfiledialog(); of.title = "请选择图片" ; if (of.showdialog() == dialogresult.ok) { string file = of.filename; image img = image.fromfile(file); picturebox1.image = img; } bitmap bitmap = (bitmap) this .picturebox1.image; image<bgr, byte > imagesource = new image<bgr, byte >(bitmap); image<gray, byte > imagegrayscale = imagesource.convert<gray, byte >(); imagegrayscale = randon(imagegrayscale); imagethreshold = imagegrayscale.thresholdbinary( new gray(100), new gray(255)); this .picturebox2.image = imagethreshold.tobitmap(); } /// <summary> /// 旋转校正 /// </summary> /// <param name="imageinput"></param> /// <returns></returns> private image<gray, byte > randon(image<gray, byte > imageinput) //图像投影旋转法倾斜校正子函数定义 { int nwidth = imageinput.width; int nheight = imageinput.height; int sum; int sumofcha; int sumofchatemp = 0; int [] sumhang = new int [nheight]; image<gray, byte > resultimage = imageinput; image<gray, byte > imrotaimage; //20度范围内的调整 for ( int ang = -20; ang < 20; ang = ang + 1) { imrotaimage = imageinput.rotate(ang, new gray(1)); for ( int i = 0; i < nheight; i++) { sum = 0; for ( int j = 0; j < nwidth; j++) { sum += imrotaimage.data[i, j, 0]; } sumhang[i] = sum; } sumofcha = 0; for ( int k = 0; k < nheight - 1; k++) { sumofcha = sumofcha + (math.abs(sumhang[k] - sumhang[k + 1])); } if (sumofcha > sumofchatemp) { resultimage = imrotaimage; sumofchatemp = sumofcha; } } return resultimage; } } } |
三、office 2007组件
该组件免费而且识别度比较高。
环境搭建
office 2007组件modi,需要安装ofiice2007,且由于兼容性需要安装补丁,sp1或者sp2都行,补丁下载地址如下:
sp1下载地址 sp2下载地址
安装后控制面板-->卸载或更新程序-->选择office2007-->选择更改-->选择添加或修复功能-->弹出下面界面,运行相应组件。
将office工具-->microsoft office document imaging 下的工具运行
在c#项目中引用com组件即可:
如果office组件应用不是在本地程序而需要部署在iis上,还需将应用程序的应用池的权限设置为如下图所示:程序应用池-->高级设置-->标识
demo
1
2
3
4
5
6
7
8
9
10
11
12
|
stringbuilder sb = new stringbuilder(); modi.document doc = new modi.document(); doc.create(fullfilename); modi.image image; modi.layout layout; doc.ocr(modi.milanguages.milang_chinese_simplified, true , true ); // 识别文字类型 for ( int i = 0; i < doc.images.count; i++) { image = (modi.image)doc.images[i]; layout = image.layout; sb.append(layout.text); } |
以上即一些c#进行身份证识别的方法,可根据自己项目的不同需求进行选用。
原文链接:http://www.cnblogs.com/kaoleba/p/5662575.html