环境准备
下面的两个第三方模块都可以直接通过pip快速安装,这里使用py36作为运行环境。
思路
- 遍历目录
- 拉取数据集合
- 遍历集合取得exif
- exif信息整理,并获取实体地址
- 拷贝文件到结果样本目录
- 生成json报告文件
基础知识
下面是现今相片中会存在与GPS相关的关键字,大牛亦可一比带过~ [参考]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
{ "GPSVersionID" : "GPS版本" , "GPSLatitudeRef" : "南北纬" , "GPSLatitude" : "纬度" , "GPSLongitudeRef" : "东西经" , "GPSLongitude" : "经度" , "GPSAltitudeRef" : "海拔参照值" , "GPSAltitude" : "海拔" , "GPSTimeStamp" : "GPS时间戳" , "GPSSatellites" : "测量的卫星" , "GPSStatus" : "接收器状态" , "GPSMeasureMode" : "测量模式" , "GPSDOP" : "测量精度" , "GPSSpeedRef" : "速度单位" , "GPSSpeed" : "GPS接收器速度" , "GPSTrackRef" : "移动方位参照" , "GPSTrack" : "移动方位" , "GPSImgDirectionRef" : "图像方位参照" , "GPSImgDirection" : "图像方位" , "GPSMapDatum" : "地理测量资料" , "GPSDestLatitudeRef" : "目标纬度参照" , "GPSDestLatitude" : "目标纬度" , "GPSDestLongitudeRef" : "目标经度参照" , "GPSDestLongitude" : "目标经度" , "GPSDestBearingRef" : "目标方位参照" , "GPSDestBearing" : "目标方位" , "GPSDestDistanceRef" : "目标距离参照" , "GPSDestDistance" : "目标距离" , "GPSProcessingMethod" : "GPS处理方法名" , "GPSAreaInformation" : "GPS区功能变数名" , "GPSDateStamp" : "GPS日期" , "GPSDifferential" : "GPS修正" } |
初始化
考虑到exifread的模块中有大量的logging输出,这里将它的level级别调到最高。 然后下边的KEY是某站在高德地图API的时候遗留下来的 我也很尴尬。。就当福利了
1
2
3
4
5
6
7
8
9
|
import os import time import json import random import logging import requests import exifread logging.basicConfig(level = logging.CRITICAL) KEY = "169d2dd7829fe45690fabec812d05bc3" |
主逻辑函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
def main(): # 预设后缀列表 types = [ "bmp" , "jpg" , "tiff" , "gif" , "png" ] #结果数据集合 picex = [] # 文件存储路径 saves = "$" + input ( "| SavePath: " ).strip() # 文件搜索路径 并遍历所有文件返回文件路径列表 pools = jpgwalk( input ( "| FindPath: " ), types) #存储目录 savep = "%s/%s" % (os.getcwd().replace( "\\", " / "), saves) if savep in pools: pools.remove(savep) # 遍历数据集并获取exif信息 for path in pools: res = getEXIF(path) if res: picex.append(res) # 结果报告 print ( "| Result %s" % len (picex)) # 如果存在结果 保存结果到json并讲相关图片复制到该目录下 if picex: #创建目录 if not os.path.exists(saves): os.mkdir(saves) #生成一个4格缩进的json文件 with open ( "%s/%s.json" % (saves, saves), "wb" ) as f: f.write(json.dumps(picex, ensure_ascii = False , indent = 4 ).encode( "utf8" )) #copy图像到该目录 for item in picex: source_path = item[ "Filename" ] with open ( "%s/%s" % (saves, source_path.split( "/" )[ - 1 ]), "wb" ) as f_in: with open (source_path, "rb" ) as f_out: f_in.write(f_out.read()) |
遍历方法
遍历指定及其所有下级目录,并返回全部的图片的路径集合,这里要注意的是每次扫描后的拷贝行为都会生成缓存,所以通过指定 $ 来避开。
1
2
3
4
5
6
7
8
9
10
11
|
# 获取指导目录全部的图片路径 def jpgwalk(path, types): _start = time.time() _pools = [] # 遍历该目录 并判断files后缀 如符合规则则拼接路径 for _root, _dirs, _files in os.walk(path): _pools.extend([_root.replace( "\\", " / ") + " / " + _item for _item in _files if _item.split( "." )[ - 1 ].lower() in types and "$" not in _root]) #报告消耗时间 print ( "| Find %s \n| Time %.3fs" % ( len (_pools), time.time() - _start)) return _pools |
经纬度格式化
度分秒转浮点,方便api调用查询,因为存在一些诡异的数据比如 1/0,所以默认返回0
1
2
3
4
5
6
7
|
def cg(i): try : _ii = [ float ( eval (x)) for x in i[ 1 :][: - 1 ].split( ', ' )] _res = _ii[ 0 ] + _ii[ 1 ] / 60 + _ii[ 2 ] / 3600 return _res except ZeroDivisionError: return 0 |
EXIF信息整理
考虑到大部分的设备还未开始支持朝向、速度、测量依据等关键字,这里暂时只使用比较常见的,如有需要的朋友可以自行添加。毕竟得到的信息越多对社工有更大的帮助。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
def getEXIF(filepath): #基础关键字 _showlist = [ 'GPS GPSDOP' , 'GPS GPSMeasureMode' , 'GPS GPSAltitudeRef' , 'GPS GPSAltitude' , 'Image Software' , 'Image Model' , 'Image Make' ] #GPS关键字 _XYlist = [ "GPS GPSLatitude" , "GPS GPSLongitude" ] #时间关键字 _TimeList = [ "EXIF DateTimeOrigina" , "Image DateTime" , "GPS GPSDate" ] #初始化结果字典 _infos = { 'Filename' : filepath } with open (filepath, "rb" ) as _files: _tags = None # 尝试去的EXIF信息 try : _tags = exifread.process_file(_files) except KeyError: return # 判断是否存在地理位置信息 _tagkeys = _tags.keys() if _tags and len ( set (_tagkeys) & set (_XYlist)) = = 2 and cg( str (_tags[ "GPS GPSLongitude" ])) ! = 0.0 : for _item in sorted (_tagkeys): if _item in _showlist: _infos[_item.split()[ - 1 ]] = str (_tags[_item]).strip() # 经纬度取值 _infos[ "GPS" ] = (cg( str (_tags[ "GPS GPSLatitude" ])) * float ( 1.0 if str (_tags.get( "GPS GPSLatitudeRef" , "N" )) = = "N" else - 1.0 ), cg( str (_tags[ "GPS GPSLongitude" ])) * float ( 1.0 if str (_tags.get( "GPS GPSLongitudeRef" , "E" )) = = "E" else - 1.0 )) # 获取实体地址 _infos[ "address" ] = address(_infos[ "GPS" ]) # 获取照片海拔高度 if "GPS GPSAltitudeRef" in _tagkeys: try : _infos[ "GPSAltitude" ] = eval (_infos[ "GPSAltitude" ]) except ZeroDivisionError: _infos[ "GPSAltitude" ] = 0 _infos[ "GPSAltitude" ] = "距%s%.2f米" % ( "地面" if int ( _infos[ "GPSAltitudeRef" ]) = = 1 else "海平面" , _infos[ "GPSAltitude" ]) del _infos[ "GPSAltitudeRef" ] # 获取可用时间 _timeitem = list ( set (_TimeList) & set (_tagkeys)) if _timeitem: _infos[ "Dates" ] = str (_tags[_timeitem[ 0 ]]) return _infos |
地址转换
一个简单的爬虫,调用高德地图api进行坐标转换,考虑到原本是跨域,这里添加基础的反防爬代码。这里有个小细节,海外的一律都取不到(包括台湾),可以通过更换googlemap的api来实现全球查询。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
def address(gps): global KEY try : # 随机UA _ulist = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1" , "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0" , "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; InfoPath.2; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; 360SE)" , "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11" , "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50" , "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)" , "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)" , "Mozilla/5.0 (X11; U; Linux i686; rv:1.7.3) Gecko/20040913 Firefox/0.10" , "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; ja) Presto/2.10.289 Version/12.00" , "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36" ] # 伪造header _header = { "User-Agent" : random.choice(_ulist), "Accept" : "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01" , "Accept-Encoding" : "gzip, deflate, sdch" , "Accept-Language" : "zh-CN,zh;q=0.8" , "Referer" : "http://www.gpsspg.com" , } _res = requests.get( "http://restapi.amap.com/v3/geocode/regeo?key={2}&s=rsv3&location={1},{0}&platform=JS&logversion=2.0&sdkversion=1.3&appname=http%3A%2F%2Fwww.gpsspg.com%2Fiframe%2Fmaps%2Famap_161128.htm%3Fmapi%3D3&csid=945C5A2C-E67F-4362-B881-9608D9BC9913" . format (gps[ 0 ], gps[ 1 ], KEY), headers = _header, timeout = ( 5 , 5 )) _json = _res.json() # 判断是否取得数据 if _json and _json[ "status" ] = = "1" and _json[ "info" ] = = "OK" : # 返回对应地址 return _json.get( "regeocode" ).get( "formatted_address" ) except Exception as e: pass |
实例
运行该代码 然后输入保存文件夹名和扫描位置即可
这边可以看到8019张中有396张存在有效的地理位置,打码的地方就不解释了,各位老司机~后期打算加入图像识别,和相似度识别。
以上所述是小编给大家介绍的用python找出那些被“标记”的照片,希望对大家有所帮助,如果大家有任何疑问请给我留言,小编会及时回复大家的。在此也非常感谢大家对服务器之家网站的支持!