最近发现python课器做很多事情,在监控服务器有其独特的优势,耗费资源少,开发周期短。
首先我们做一个定时或者实时脚本timedtask.py,让其定时监控目标服务器,两种方式:
第一种:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
#!/usr/bin/env python # -*- coding: utf-8 -*- # @time : 2017/11/27 15:59 # @desc : 定时任务,以需要的时间间隔执行某个命令 # @file : timedtask.py # @software: pycharm import time, os from monitorserver import alltask def roll_back(cmd, inc = 60 ): while true: #执行方法,函数 alltask() time.sleep(inc) roll_back( "echo %time%" , 5 ) |
第二种:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
#!/usr/bin/env python # -*- coding: utf-8 -*- # @time : 2017/11/27 15:59 # @desc : 定时任务,以需要的时间间隔执行某个命令 # @file : timedtask.py # @software: pycharm import time, os def roll_back(cmd, inc = 60 ): while true: #监控代码文件所在位置 os.system( 'python /home/../monitorserver.py' ); time.sleep(inc) roll_back( "echo %time%" , 5 ) |
做过监控应该都知道,我们主要监控服务器,负载均衡、磁盘、内存、cpu、网络接口(流量)、端口代码,主要针对这些,我做了以下远程监控,第一种和第二种监控代码一样,代码monitorserver.py如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
|
#!/usr/bin/env python # -*- coding: utf-8 -*- # @time : 2017/11/27 15:59 # @desc : 服务器监控代码 # @file : monitorserver.py # @software: pycharm import pexpect import re import time import threading """ 主方法 127.0.0.1#远程服务器ip地址 """ def ssh_command(user, host, password, command): ssh_new_key = 'are you sure you want to continue connecting' child = pexpect.spawn( 'ssh -l %s %s %s' % (user, host, command)) i = child.expect([pexpect.timeout, ssh_new_key, 'password: ' ]) if i = = 0 : print 'error!' print 'ssh could not login. here is what ssh said:' print child.before, child.after return none if i = = 1 : child.sendline( 'yes' ) child.expect( 'password: ' ) i = child.expect([pexpect.timeout, 'password: ' ]) if i = = 0 : print 'error!' print 'ssh could not login. here is what ssh said:' print child.before, child.after return none child.sendline(password) return child """ 内存监控 """ def mem_info(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "cat /proc/meminfo" ) child.expect(pexpect.eof) mem = child.before mem_values = re.findall( "(\d+)\ kb" , mem) memtotal = mem_values[ 0 ] memfree = mem_values[ 1 ] buffers = mem_values[ 2 ] cached = mem_values[ 3 ] swapcached = mem_values[ 4 ] swaptotal = mem_values[ 13 ] swapfree = mem_values[ 14 ] print '******************************内存监控*********************************' print "*******************时间:" , time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" print "总内存:" ,memtotal print "空闲内存:" , memfree print "给文件的缓冲大小:" ,buffers print "高速缓冲存储器使用的大小:" , cached print "被高速缓冲存储用的交换空间大小:" , swapcached print "给文件的缓冲大小:" , buffers if int (swaptotal) = = 0 : print u "交换内存总共为:0" else : rate_swap = 100 - 100 * int (swapfree) / float (swaptotal) print u "交换内存利用率:" , rate_swap free_mem = int (memfree) + int (buffers) + int (cached) used_mem = int (memtotal) - free_mem rate_mem = 100 * used_mem / float (memtotal) print u "内存利用率:" , str ( "%.2f" % rate_mem), "%" """ 内核线程、虚拟内存、磁盘、陷阱和 cpu 活动的统计信息 """ def vm_stat_info(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "vmstat 1 2 | tail -n 1" ) child.expect(pexpect.eof) vmstat_info = child.before.strip().split() processes_waiting = vmstat_info[ 0 ] processes_sleep = vmstat_info[ 1 ] swpd = vmstat_info[ 2 ] free = vmstat_info[ 3 ] buff = vmstat_info[ 4 ] cache = vmstat_info[ 5 ] si = vmstat_info[ 6 ] so = vmstat_info[ 7 ] io_bi = vmstat_info[ 8 ] io_bo = vmstat_info[ 9 ] system_interrupt = vmstat_info[ 10 ] system_context_switch = vmstat_info[ 11 ] cpu_user = vmstat_info[ 12 ] cpu_sys = vmstat_info[ 13 ] cpu_idle = vmstat_info[ 14 ] cpu_wait = vmstat_info[ 15 ] st = vmstat_info[ 16 ] print '****************************内核线程、虚拟内存、磁盘、陷阱和 cpu 活动的统计信息监控****************************' print "*******************时间:" , time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" print "等待运行进程的数量:" , processes_waiting print "处于不间断状态的进程:" , processes_sleep print "使用虚拟内存(swap)的总量:" , swpd print "空闲的内存总量:" , free print "用作缓冲的内存总量:" , buff print "用作缓存的内存总量:" , cache print "交换出内存总量 :" , si print "交换入内存总量 :" , so print "从一个块设备接收:" , io_bi print "发送到块设备:" , io_bo print "每秒的中断数:" , system_interrupt print "每秒的上下文切换数:" , system_context_switch print "用户空间上进程运行的时间百分比:" , cpu_user print "内核空间上进程运行的时间百分比:" , cpu_sys print "闲置时间百分比:" , cpu_idle print "等待io的时间百分比:" , cpu_wait print "从虚拟机偷取的时间百分比:" , st ''' cpu监控 ''' def cpu_info(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "cat /proc/cpuinfo" ) child.expect(pexpect.eof) cpuinfo = child.before cpu_num = re.findall( 'processor.*?(\d+)' , cpuinfo)[ - 1 ] cpu_num = str ( int (cpu_num) + 1 ) print '***************************************cpu监控***************************************' print "*******************时间:" , time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" print u "cpu数目:" , cpu_num li = cpuinfo.replace( '\t' , ' ').split(' \r') cpuinfo = {} procinfo = {} nprocs = 0 for line in li: if line.find( "processor" ) > - 1 : cpuinfo[ 'cpu%s' % nprocs] = procinfo nprocs = nprocs + 1 else : if len (line.split( ':' )) = = 2 : procinfo[line.split( ':' )[ 0 ].strip()] = line.split( ':' )[ 1 ].strip() else : procinfo[line.split( ':' )[ 0 ].strip()] = '' for processor in cpuinfo.keys(): print "cpu属于的名字及其编号、标称主频:" ,cpuinfo[processor][ 'model name' ] print "cpu属于其系列中的哪一代的代号:" , cpuinfo[processor][ 'model' ] print "cpu制造商:" , cpuinfo[processor][ 'vendor_id' ] print "cpu产品系列代号:" , cpuinfo[processor][ 'cpu family' ] print "cpu的实际使用主频:" , cpuinfo[processor][ 'cpu mhz' ] """ 负载均衡 """ def load_stat(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "cat /proc/loadavg" ) child.expect(pexpect.eof) loadavgs = child.before.strip().split() print '************************负载均衡监控****************************' print "*******************时间:" ,time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" print "系统5分钟前的平均负载:" , loadavgs[ 0 ] print "系统10分钟前的平均负载:" , loadavgs[ 1 ] print "系统15分钟前的平均负载:" , loadavgs[ 2 ] print "分子是正在运行的进程数,分母为总进程数:" ,loadavgs[ 3 ] print "最近运行的进程id:" , loadavgs[ 4 ] """ 获取网络接口的输入和输出 """ def ionetwork(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "cat /proc/net/dev" ) child.expect(pexpect.eof) netdata = child.before li = netdata.strip().split( '\n' ) print '************************获取网络接口的输入和输出监控****************************' print "*******************时间:" ,time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" net = {} for line in li[ 2 :]: line = line.split( ":" ) eth_name = line[ 0 ].strip() # if eth_name != 'lo': net_io = {} net_io[ 'receive' ] = round ( float (line[ 1 ].split()[ 0 ]) / ( 1024.0 * 1024.0 ), 2 ) net_io[ 'transmit' ] = round ( float (line[ 1 ].split()[ 8 ]) / ( 1024.0 * 1024.0 ), 2 ) net[eth_name] = net_io print net """ 磁盘空间监控 """ def disk_stat(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "df -h" ) child.expect(pexpect.eof) disk = child.before disklist = disk.strip().split( '\n' ) disklists = [] for disk in disklist: disklists.append(disk.strip().split()) print '************************磁盘空间监控****************************' print "*******************时间:" , time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" for i in disklists[ 1 :]: print "\t文件系统:" , i[ 0 ], print "\t容量:" , i[ 1 ], print "\t已用:" , i[ 2 ], print "\t可用:" , i[ 3 ], print "\t已用%挂载点:" , i[ 4 ] """ 端口监控 一般是远程服务器用户名用户 """ def getcomstr(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , "netstat -tpln" ) child.expect(pexpect.eof) com = child.before print '******************************端口监控*********************************' print "*******************时间:" , time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" print com """ 获取网络接口的输入和输出 """ def cpu(): child = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , 'cat /proc/stat | grep "cpu "' ) child.expect(pexpect.eof) child1 = ssh_command( "远程服务器用户名" , "127.0.0.1" , "远程服务器密码" , 'cat /proc/stat | grep "cpu "' ) child1.expect(pexpect.eof) cpus = child.before.strip().split() cpus1 = child1.before.strip().split() print '************************cpu使用情况****************************' print "*******************时间:" ,time.strftime( "%y-%m-%d %h:%m:%s" , time.localtime()), "******************" t1 = int (cpus[ 1 ]) + int (cpus[ 2 ]) + int (cpus[ 3 ]) + int (cpus[ 4 ]) + int (cpus[ 5 ]) + int (cpus[ 6 ]) + int (cpus[ 8 ]) + int (cpus[ 9 ]) t2 = int (cpus1[ 1 ]) + int (cpus1[ 2 ]) + int (cpus1[ 3 ]) + int (cpus1[ 4 ] ) + int (cpus1[ 5 ] ) + int ( cpus1[ 6 ] ) + int (cpus1[ 8 ] ) + int (cpus1[ 9 ]) tol = t2 - t1 idle = int (cpus1[ 4 ]) - int (cpus[ 4 ]) print '总的cpu时间1:' ,t1 print '总的cpu时间2:' , t2 print '时间间隔内的所有时间片:' , tol print '计算空闲时间idle:' , idle print "计算cpu使用率:" , 100 * (tol - idle) / tol, "%" """ 第一种执行 """ def alltask(): try : threads = [] t1 = threading.thread(target = mem_info) threads.append(t1) t2 = threading.thread(target = vm_stat_info) threads.append(t2) t3 = threading.thread(target = cpu_info) threads.append(t3) t4 = threading.thread(target = load_stat) threads.append(t4) t5 = threading.thread(target = ionetwork) threads.append(t5) t6 = threading.thread(target = disk_stat) threads.append(t6) t7 = threading.thread(target = getcomstr) threads.append(t7) t8 = threading.thread(target = cpu) threads.append(t8) for n in range ( len (threads)): threads[n].start() except exception, e: print str (e) """ 第二种执行 """ if __name__ = = '__main__' : try : threads = [] t1 = threading.thread(target = mem_info) threads.append(t1) t2 = threading.thread(target = vm_stat_info) threads.append(t2) t3 = threading.thread(target = cpu_info) threads.append(t3) t4 = threading.thread(target = load_stat) threads.append(t4) t5 = threading.thread(target = ionetwork) threads.append(t5) t6 = threading.thread(target = disk_stat) threads.append(t6) t7 = threading.thread(target = getcomstr) threads.append(t7) t8 = threading.thread(target = cpu) threads.append(t8) for n in range ( len (threads)): threads[n].start() except exception, e: print str (e) |
监控结果如下:
接下来做的是把监控结果可视化,即可,可惜没时间做,就交给各位了!!!
花了两天时间整理的,分享给大家,希望对各位有帮助!!!
以上所述是小编给大家介绍的用python实现自动化监控远程服务器详解整合,希望对大家有所帮助,如果大家有任何疑问请给我留言,小编会及时回复大家的。在此也非常感谢大家对服务器之家网站的支持!
原文链接:https://blog.csdn.net/ITLearnHall/article/details/80693913