脚本如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
from openpyxl import load_workbook booksheet = workbook.active #获取当前活跃的sheet,默认是第一个sheet #如果想获取别的sheet页采取下面这种方式,先获取所有sheet页名,在通过指定那一页。 # sheets = workbook.get_sheet_names() # 从名称获取sheet # booksheet = workbook.get_sheet_by_name(sheets[0]) #获取sheet页的行数据 rows = booksheet.rows #获取sheet页的列数据 columns = booksheet.columns i = 0 # 迭代所有的行 for row in rows: i = i + 1 line = [col.value for col in row] cell_data_1 = booksheet.cell(row = i, column = 3 ).value #获取第i行1 列的数据 cell_data_2 = booksheet.cell(row = i, column = 4 ).value #获取第i行 2 列的数据 cell_data_3 = booksheet.cell(row = i, column = 8 ).value #获取第i行 3 列的数据 cell_data_4 = booksheet.cell(row = i, column = 18 ).value #获取第i行 4 列的数据 print (cell_data_1, cell_data_2, cell_data_3, cell_data_4) |
实例:python读取excel数据做分类统计
某excel中记录了某个人的通话记录,下面程序将按照通话地点,通话类型等统计通话时间
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# -*- coding:utf-8 -*- import xlrd import datetime infos = [] info_file = xlrd.open_workbook( 'src.xls' ) #打开excel文件 info_sheet = info_file.sheets()[ 0 ] #通过索引顺序获取工作表 row_count = info_sheet.nrows #获取行数,列数ncols for row in range ( 1 ,row_count): time_string = info_sheet.cell(row, 3 ).value time_s_sp = time_string.split( ':' ) infos.append( #该数组里放了row_count个字典 { 'type' :info_sheet.cell(row, 2 ).value, #获取单元格,通话类型,主叫被叫 'other_cellphone' :info_sheet.cell(row, 0 ).value, #对方号码, 'timespan' :datetime.timedelta(seconds = int (time_s_sp[ 2 ]),minutes = int (time_s_sp[ 1 ]),hours = int (time_s_sp[ 0 ])), 'gpscity' :info_sheet.cell(row, 5 ).value #通话是本地还是外地 } ) time_all = datetime.timedelta(seconds = 0 ) #初始化 time_types = {} time_classes = {} time_numbers = {} for infor in infos: #取出该数组里的字典 time_all + = infor[ 'timespan' ] #求总通话次数 infor_type = infor[ 'type' ] if infor_type in time_types: time_types[infor_type] + = infor[ 'timespan' ] else : time_types[infor_type] = infor[ 'timespan' ] #按通话类型统计通话时间 infor_class = infor[ 'gpscity' ] if infor_class in time_classes: time_classes[infor_class] + = infor[ 'timespan' ] else : time_classes[infor_class] = infor[ 'timespan' ] #这里相当于先分类赋值再++,按归属地统计通话时间 infor_number = infor[ 'other_cellphone' ] if infor_number in time_numbers: time_numbers[infor_number] + = infor[ 'timespan' ] else : time_numbers[infor_number] = infor[ 'timespan' ] #根据号码统计通话时间 print '总通话时间:%s' % time_all print print '总通话方式分类' for k,v in time_types.items(): print k.encode( 'utf-8' ),v print print '通话类型分类:' for k,v in time_classes.items(): print k.encode( 'utf-8' ),v print print '对方号码分类:' for k,v in time_numbers.items(): print k,v |
再优化下代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
# -*- coding:utf-8 -*- import xlrd from datetime import timedelta def read_excel(file_excel): #读excel并将需要的数据分类放在数组里 infos = [] info_file = xlrd.open_workbook(file_excel) info_sheet = info_file.sheets()[ 0 ] row_count = info_sheet.nrows for row in range ( 1 ,row_count): time_string = info_sheet.cell(row, 3 ).value time_s_sp = time_string.split( ':' ) infos.append( { 'type' :info_sheet.cell(row, 2 ).value, 'other_cellphone' :info_sheet.cell(row, 0 ).value, 'timespan' :timedelta(seconds = int (time_s_sp[ 2 ]),minutes = int (time_s_sp[ 1 ]),hours = int (time_s_sp[ 0 ])), 'gpscity' :info_sheet.cell(row, 5 ).value } ) return infos def count_cell(list_dirs,infotype): #统计总通话及分类统计结果,存在字典里 result_dir = {} time_all = timedelta(seconds = 0 ) for list_dir in list_dirs: time_all + = list_dir[ 'timespan' ] info_type = list_dir[infotype] if info_type not in result_dir: result_dir[info_type] = list_dir[ 'timespan' ] else : result_dir[info_type] + = list_dir[ 'timespan' ] return time_all,result_dir def print_result(result_dir): #打印数据 for k,v in result_dir.items(): print k.encode( 'utf-8' ),v if __name__ = = "__main__" : list_dirs = read_excel( 'src.xls' ) time_all,result_type = count_cell(list_dirs, 'type' ) result_cell = count_cell(list_dirs, 'other_cellphone' ) result_gpscity = count_cell(list_dirs, 'gpscity' ) print '总通话时间:%s' % time_all print '按照通话类型分类:' print_result(result_type) print '按照号码分类:' print_result(result_cell[ 1 ]) print '按照归属地分类:' print_result(result_gpscity[ 1 ]) |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/hui-shao/p/read_xlsx.html