python简单验证码识别的实现方法_Python

利用svm向量机进行4位数字验证码识别

主要是思路和步骤如下：

一，素材收集

检查环境是否包含有相应的库：

1.在cmd中，通过 pip list命令查看安装的库

2.再使用pip installrequests 安装requests库

3.再次使用pip list 命令

python简单验证码识别的实现方法

4.利用python获取验证码资源

编写代码：_downloadpic.py

				?

									#!/usr/bin/nev python3

									#利用python从站点下载验证码图片

									import requests

									## 1.在 http://www.xxx.com

									# 获取验证码url

									def downloads_pic(strpath, strname):

									 #设置url

									 url = 'http://www.xxx.com'

									 #以二进制方式发送get请求，

									 #将stream = true，

									 #数据读取完成前不要断开链接

									 rreq = requests.get(url, stream = true)

									 #尝试保存图片

									 with open(strpath + strname + '.png', 'wb') as fppic:

									  #循环读取1024byte到bychunk中，读完则跳出

									  for bychunk in rreq.iter_content(chunk_size = 1024):

									   if bychunk:

									    fppic.write(bychunk)

									    fppic.flush()

									  fppic.close()

									for i in range(1, 10 + 1):

									 strfilename = "%03d" % i

									 downloads_pic('d:/1/', strfilename)

二，素材处理

1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白

2.去除噪点

3.切割图片

python简单验证码识别的实现方法

编写代码：_picdealwith.py

				?

									#!/usr/bin/env python3

									import os

									import os.path

									from pil import image, imageenhance, imagefilter

									import random

									#二值化处理

									#strimgpath 图片路径

									def binaryzationimg(strimgpath):

									 #打开图片

									 imgoriimg = image.open(strimgpath)

									 #增加对比度

									 pocenhance = imageenhance.contrast(imgoriimg)

									 #增加255%对比度

									 imgoriimg = pocenhance.enhance(2.55)

									 #锐化

									 pocenhance = imageenhance.sharpness(imgoriimg)

									 #锐化200%

									 imgoriimg = pocenhance.enhance(2.0)

									 #增加亮度

									 pocenhance = imageenhance.brightness(imgoriimg)

									 #增加200%

									 imgoriimg = pocenhance.enhance(2.0)

									 #添加滤镜效果

									 imggryimg = imgoriimg.convert('l').filter(imagefilter.detail)

									 #二值化处理

									 imgbinimg = imggryimg.convert('1')

									 return imgbinimg

									#去除噪点

									def clearnoise(imgbinimg):

									 for x in range(1, (imgbinimg.size[0]-1)):

									  for y in range(1,(imgbinimg.size[1] - 1)):

									  #一个点为黑色，周围8个点为白色，则此点为噪点，设置为白色

									   if imgbinimg.getpixel((x, y)) == 0 \

									    and imgbinimg.getpixel(((x - 1), (y + 1))) == 255 \

									    and imgbinimg.getpixel(((x - 1), y)) == 255 \

									    and imgbinimg.getpixel(((x - 1), (y - 1))) == 255 \

									    and imgbinimg.getpixel(((x + 1), (y + 1))) == 255 \

									    and imgbinimg.getpixel(((x + 1), y)) == 255 \

									    and imgbinimg.getpixel(((x + 1), (y - 1))) == 255 \

									    and imgbinimg.getpixel((x, (y + 1))) == 255 \

									    and imgbinimg.getpixel((x, (y - 1))) == 255:

									    imgbinimg.putpixel([x, y], 255)

									 return imgbinimg

									#切割图片

									def getcropimgs(imgclrimg):

									 imglist = []

									 for i in range(4):

									  x = 6 + i * 13

									  y = 3

									  subimg = imgclrimg.crop((x, y, x + 13, y + 15))

									  imglist.append(subimg)

									 return imglist

									#调用部分

									def main():

									 g_count = 0

									 strstep1dir = 'd:/1/step1/'

									 strstep2dir = 'd:/1/step2/'

									 for parentpath, dirname, filenames in os.walk(strstep1dir):

									  for i in filenames:

									   #图片文件路径信息

									   strfullpath = os.path.join(parentpath, i)

									   imgbinimg = binaryzationimg(strfullpath)

									   imgclrimg = clearnoise(imgbinimg)

									   imglist = getcropimgs(imgclrimg)

									   for img in imglist:

									    strimgname = "%04d%04d.png" % (g_count, random.randint(0, 9999))

									    strimgpath = os.path.join(strstep2dir, strimgname)

									    img.save(strimgpath)

									    g_count += 1

									 print("ok！")

									if __name__ == '__mian__':

									 main()

三，手工分类

将第二步切割好的图片进行分类，体力活

python简单验证码识别的实现方法

四，利用svm向量机建立模型

1.安装svm库

下载libsvm库，并解压

将库中的windows目录的路径添加到path环境变量中

将libsvm下的python文件夹中的svm.py和svmutil.py文件拷贝到你的python的路径中lib文件夹中

				?

									from svmutil import *

2.生成模型文件

2.1.将分好类的图片信息进行提取，生成特征值

2.2.输出向量数据

python简单验证码识别的实现方法

2.3.根据数据输出svm模型文件

python简单验证码识别的实现方法

编写代码：_svmdemo.py

				?

									#!/usr/bin/env python3

									#svm,验证码识别

									import os

									import sys

									import random

									import os.path

									from pil import image, imageenhance, imagefilter

									from svmutil import *

									##记录像素点的值，描述特征，采用遍历每个像素点统计黑色点的数量

									def getfeature(imgcropimg, nimgheight, nimgwidth):

									 pixelcountlist = []

									 for y in range(nimgheight):

									  countx = 0

									  for x in range(nimgwidth):

									   if imgcropimg.getpixel((x, y)) == 0:

									    countx += 1

									  pixelcountlist.append(countx)

									 for x in range(nimgwidth):

									  county = 0

									  for y in range(nimgheight):

									   if imgcropimg.getpixel((x, y)) == 0:

									    county += 1

									  pixelcountlist.append(county)

									 return pixelcountlist

									##输出向量数据

									def outputvectordata(strid, strmaterialdir, stroutpath):

									 for parentpath, dirnames, filenames in os.walk(strmaterialdir):

									  with open(stroutpath, 'a') as fpfea:

									   for fp in filenames:

									    #图片文件路径信息

									    strfullpath = os.path.join(parentpath, fp)

									    #打开图片

									    imgoriimg = image.open(strfullpath)

									    #生成特征值

									    featurelist = getfeature(imgoriimg, 15, 13)

									    strfeature = strid + ' '

									    ncount = 1

									    for i in featurelist:

									     strfeature = '%s%d:%d ' % (strfeature, ncount, i)

									     ncount += 1

									    fpfea.write(strfeature + '\n')

									    fpfea.flush()

									  fpfea.close()

									#训练svm模型

									def trainsvmmodel(strproblempath, strmodelpath):

									 y, x = svm_read_problem(strproblempath)

									 model = svm_train(y, x)

									 svm_save_model(strmodelpath, model)

									#svm模型测试

									def svmmodeltest(strproblempath, strmodelpath):

									 testy, testx = svm_read_problem(strproblempath)

									 model = svm_load_model(strmodelpath)

									 #返回识别结果

									 plabel, pacc, pval = svm_predict(testy, testx, model)

									 return plabel

									##输出测试向量数据

									def outputtestvectordata(strid, strdir, stroutpath):

									 filelist = []

									 for parentpath, strdir, filename in os.walk(strdir):

									  filelist = filename

									 with open(stroutpath, 'a') as fpfea:

									  for fp in filelist:

									   #图片文件路径信息

									   strfullpath = os.path.join(parentpath, fp)

									   #打开图片

									   imgoriimg = image.open(strfullpath)

									   #生成特征值

									   featurelist = getfeature(imgoriimg, 15, 13)

									   strfeature = strid + ' '

									   ncount = 1

									   for i in featurelist:

									    strfeature = '%s%d:%d ' % (strfeature, ncount, i)

									    ncount += 1

									   fpfea.write(strfeature + '\n')

									   fpfea.flush()

									  fpfea.close()

									def main():

									# 1.循环输出向量文件

									 for i in range(0, 10):

									  strid = '%d' % i

									  outputvectordata(strid, 'd:/1/step3/' + strid, 'd:/1/step4/vector.txt')

									# 2.调用函数训练svm模型

									 trainsvmmodel('d:/1/step4/vector.txt', 'd:/1/step5/model.txt')

									# 3.调用函数识别结果

									 plabel = svmmodeltest('d:/1/step6/vector.txt', 'd:/1/step5/model.txt')

									 for i in plabel:

									  print('%d' % i)

									if __name__ == '__main__':

									 main()

五，测试

1.利用模型文件和向量文件进行测试验证码识别

##1.获取一张验证码图片

##2.对图片进行处理

## 2.1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白，

## 2.2.去除噪点

## 2.3.切割图片

##3.生成向量文件

##4.再利用之前的模型文件进行识别测试

编写代码：_svmtest.py

				?

									#!/usr/bin/env python3

									#对一张验证码图片进行识别测试

									##1.获取一张验证码图片

									##2.对图片进行处理

									## 2.1.二值化处理，增加对比度，锐化，增加亮度，滤镜，转为黑白，

									## 2.2.去除噪点

									## 2.3.切割图片

									##3.生成向量文件

									##4.再利用之前的模型文件进行识别测试

									################

									import _picdealwith

									import os

									import random

									import _svmdemo

									##测试

									g_count = 0

									strdirpath = 'd:/1/test/'

									strfilename = '001.png'

									#1.图片文件路径信息

									strfullpath = os.path.join(strdirpath, strfilename)

									#2.对图片进行处理

									#2.1二值化处理

									imgbinimg = _picdealwith.binaryzationimg(strfullpath)

									#2.2去除噪点

									imgclrimg = _picdealwith.clearnoise(imgbinimg)

									#2.3切割图片

									imglist = _picdealwith.getcropimgs(imgclrimg)

									#2.3循环写入文件

									for img in imglist:

									 strimgname = "%04d%04d.png" % (g_count, random.randint(0, 9999))

									 strimgpath = os.path.join(strdirpath, strimgname)

									 img.save(strimgpath)

									 g_count += 1

									print("ok")

									os.remove(strfullpath)

									#3.生成向量文件

									_svmdemo.outputtestvectordata('0', 'd:/1/test/', 'd:/1/test/vector.txt')

									#4.利用之前的模型文件进行识别测试

									plabel = _svmdemo.svmmodeltest('d:/1/test/vector.txt', 'd:/1/step5/model.txt')

									for i in plabel:

									 print('%d' % i, end = '')