在我们爬虫的时候经常会遇到验证码,新浪微博的验证码是四宫格形式。
可以采用模板验证码的破解方式,也就是把所有验证码的情况全部列出来,然后拿验证码的图片和这所有情况中的图片进行对比,然后获取验证码,再通过selenium自动拖拽点击,进行破解。
我们将验证码四个点标注为1234,那么所有的情况就是以下24种情况。
数字代表箭头指向:
1234 | 2134 | 3124 | 4321 |
1243 | 2143 | 3142 | 4312 |
1342 | 2314 | 3214 | 4123 |
1324 | 2341 | 3241 | 4132 |
1423 | 2413 | 3412 | 4213 |
1432 | 2431 | 3421 | 4231 |
所有的情况就是以上24种。我们将这24中验证码的情况放在一个文件夹内,当我们在登录的时候用获取的验证码截图去和所有的情况一一对比,然后获取完全相同的验证码,进行点击即可。代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.action_chains import ActionChains import time from PIL import Image from io import BytesIO from os import listdir USERNAME = '' PASSWORD = '' class CrackWeiboSlide(): def __init__( self ): self .url = 'https://passport.weibo.cn/signin/login' self .browser = webdriver.Chrome() self .wait = WebDriverWait( self .browser, 20 ) self .username = USERNAME self .password = PASSWORD def __del__( self ): self .browser.close() def open ( self ): """ 打开网页输入用户名密码登录 :return: None """ self .browser.get( self .url) username = self .wait.until(EC.presence_of_element_located((By. ID , 'loginName' ))) password = self .wait.until(EC.presence_of_element_located((By. ID , 'loginPassword' ))) submit = self .wait.until(EC.element_to_be_clickable((By. ID , 'loginAction' ))) username.send_keys( self .username) password.send_keys( self .password) submit.click() def get_position( self ): """ 获取验证码的位置 :return: 位置 """ try : img = self .wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'patt-shadow' ))) except TimeoutException: print ( '未出现验证码' ) self . open () time.sleep( 2 ) location = img.location size = img.size top = location[ 'y' ] bottom = location[ 'y' ] + size[ 'height' ] left = location[ 'x' ] right = location[ 'x' ] + size[ 'width' ] return (top,bottom,left,right) def get_screenshot( self ): """ 获取截图 :return:截图 """ screentshot = self .browser.get_screenshot_as_png() # BytesIO将网页截图转换成二进制 screentshot = Image. open (BytesIO(screentshot)) return screentshot def get_image( self ,name): """获取验证码图片""" top,bottom,left,right = self .get_position() print ( '验证码位置' ,top,bottom,left,right) screenshot = self .get_screenshot() # crop()将图片裁剪出来,后面需要一个参数 captcha = screenshot.crop((left,top,right,bottom)) captcha.save(name) return captcha def detect_image( self ,image): """ 匹配图片 :param self: :param image: 图片 :return: 拖动顺序 """ # 图片所在的文件夹 for template_name in listdir( 'templates/' ): print ( '正在匹配' ,template_name) template = Image. open ( 'templates/' + template_name) # 匹配图片 if self .same_img(image,template): # 将匹配到的文件名转换为列表 numbers = [ int (number) for number in list (template_name.split( '.' )[ 0 ])] print ( '拖动顺序' ,numbers) return numbers def is_pixel_equal( self ,image1,image2,x,y): """ 判断两个像素的相似度 :param image1: 图片1 :param image2: 图片2 :param x: 位置x :param y: 位置y :return: 像素是否相同 """ # 取像素点 pixel1 = image1.load()[x,y] pixel2 = image2.load()[x,y] # 偏差量等于60 threshold = 60 if abs (pixel1[ 0 ] - pixel2[ 0 ]) < threshold and abs (pixel1[ 1 ] - pixel2[ 1 ])<threshold and abs (pixel1[ 2 ] - pixel2[ 2 ])<threshold: return True else : return False def same_img( self ,image,template): """ 识别相似的验证码 :param image: 准备识别的验证码 :param template: 模板 :return: """ # 相似度阈值 threshold = 0.99 count = 0 # 匹配所有像素点 for x in range (image.width): for y in range (image.height): # 判断像素 if self .is_pixel_equal(image,template,x,y): count + = 1 result = float (count) / (image.width * image.height) if result>threshold: print ( '成功匹配' ) return True return False def move( self ,numbers): """ 根据顺序拖动,此处接收的参数为前面的验证码的顺序列表 :param numbers: :return: """ # 获取四宫格的四个点 circles = self .browser.find_elements_by_css_selector( '.patt-wrap .patt-circ' ) print ( '-----------------' ,circles) dx = dy = 0 for index in range ( 4 ): circle = circles[numbers[index] - 1 ] if index = = 0 : # 点击第一个点 ActionChains( self .browser).move_to_element_with_offset(circle,circle.size[ 'width' ] / 2 ,circle.size[ 'height' ] / 2 ).click_and_hold().perform() else : # 慢慢移动 times = 30 for i in range (times): ActionChains( self .browser).move_by_offset(dx / times,dy / times).perform() time.sleep( 1 / times) if index = = 3 : # 松开鼠标 ActionChains( self .browser).release().perform() else : # 计算下次的偏移 dx = circles[numbers[index + 1 ] - 1 ].location[ 'x' ] - circle.location[ 'x' ] dy = circles[numbers[index + 1 ] - 1 ].location[ 'y' ] - circle.location[ 'y' ] def crack( self ): """ 破解入口 :return: """ self . open () # 获取验证码图片 image = self .get_image( 'captcha.png' ) numbers = self .detect_image(image) self .move(numbers) time.sleep( 10 ) print ( '识别结束' ) if __name__ = = '__main__' : crack = CrackWeiboSlide() crack.crack() |
设置自己的账号密码即可实现。
有时候会匹配不上,图片相似度阈值达不到0.99以上,这个时候可能是我们收集的验证码图片过时了,重新开启图片收集程序,运行收集一下即可。
收集图片程序代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException import time from PIL import Image from io import BytesIO from os import listdir USERNAME = '18239831004' PASSWORD = 'qweqweqwe' class CrackWeiboSlide(): def __init__(self): self.url = 'https://passport.weibo.cn/signin/login' self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser,20) self.username = USERNAME self.password = PASSWORD def __del__(self): self.browser.close() def open(self): """ 打开网页输入用户名密码登录 :return: None """ self.browser.get(self.url) username = self.wait.until(EC.presence_of_element_located((By.ID,'loginName'))) password = self.wait.until(EC.presence_of_element_located((By.ID,'loginPassword'))) submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction'))) username.send_keys(self.username) password.send_keys(self.password) submit.click() def get_position(self): """ 获取验证码的位置 :return: 位置 """ try: img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'patt-shadow'))) except TimeoutException: print('未出现验证码') self.open() time.sleep(2) location = img.location size = img.size top=location['y'] bottom = location['y']+size['height'] left = location['x'] right = location['x']+size['width'] return (top,bottom,left,right) def get_screenshot(self): """ 获取截图 :return:截图 """ screentshot = self.browser.get_screenshot_as_png() # BytesIO将网页截图转换成二进制 screentshot = Image.open(BytesIO(screentshot)) return screentshot def get_image(self,name): """获取验证码图片""" top,bottom,left,right = self.get_position() print('验证码位置',top,bottom,left,right) screenshot = self.get_screenshot() # crop()将图片裁剪出来,后面需要一个参数 captcha = screenshot.crop((left,top,right,bottom)) captcha.save(name) return captcha # 获取所有的验证码 def main(self): count = 0 while True: name = str(count)+'.png' self.open() self.get_image(name) count+=1 if __name__ == '__main__': crack = CrackWeiboSlide() crack.main() |
总结
以上就是这篇文章的全部内容了,希望本文的内容对大家的学习或者工作具有一定的参考学习价值,谢谢大家对服务器之家的支持。如果你想了解更多相关内容请查看下面相关链接
原文链接:https://blog.csdn.net/qq_39138295/article/details/82888722