1 | #! /usr/bin/env python |
---|
2 | # |
---|
3 | # This version of pbsmon is base on the new_rack_pbsmon.py |
---|
4 | # |
---|
5 | # Authors: |
---|
6 | # Bas van der Vlies |
---|
7 | # Dennis Stam |
---|
8 | # |
---|
9 | # SVN Info: |
---|
10 | # $Id: new_rack_pbsmon.py 288 2013-02-19 15:55:59Z dennis $ |
---|
11 | # $URL$ |
---|
12 | # |
---|
13 | |
---|
14 | """ |
---|
15 | Usage: pbsmon [hosts].... |
---|
16 | |
---|
17 | Specifying hostnames: |
---|
18 | To specify a range use the [] to indicate a range, a couple of examples: |
---|
19 | |
---|
20 | The first five nodes of rack 16 |
---|
21 | - gb-r16n[1-5] |
---|
22 | |
---|
23 | The first five nodes and node 12 and 18 of rack 16 to 20 |
---|
24 | - gb-r[16-20]n[1-5,12,18] |
---|
25 | |
---|
26 | The first five nodes de in rack 16 with padding enabled |
---|
27 | - gb-r[16]n[01-5] |
---|
28 | |
---|
29 | The ranges ([]) are not only limited to numbers, letters can also be used. |
---|
30 | """ |
---|
31 | |
---|
32 | import sys |
---|
33 | import re |
---|
34 | import re |
---|
35 | import types |
---|
36 | from optparse import OptionParser |
---|
37 | |
---|
38 | import pbs |
---|
39 | from PBSAdvancedParser import AdvancedParser |
---|
40 | from PBSQuery import PBSQuery |
---|
41 | from PBSQuery import PBSError |
---|
42 | |
---|
43 | # Remark: When both are True, extended view is being printed |
---|
44 | PRINT_TABLE = True |
---|
45 | PRINT_EXTENDED = False |
---|
46 | |
---|
47 | # Which nodes must be skipped |
---|
48 | EXCLUDE_NODES = [ 'login' ] |
---|
49 | |
---|
50 | # Some global OPTS |
---|
51 | OPT_SKIP_EMPTY_RACKS = True |
---|
52 | OPT_SERVERNAME = None |
---|
53 | |
---|
54 | ## Begin: TABLE view opts |
---|
55 | |
---|
56 | # A node has the following syntax gb-r10n10 |
---|
57 | # r10 is rack name -> skip one char --> gives us rack number = 10 |
---|
58 | # n10 is node name -> skip one char --> gives us node number = 10 |
---|
59 | # Then we have to set these variables to determine automatically the |
---|
60 | # number of nodes and racks |
---|
61 | # |
---|
62 | NODE_EXPR = "r(?P<racknr>[0-9]+)n(?P<nodenr>[0-9]+)" |
---|
63 | |
---|
64 | START_RACK = 1 |
---|
65 | |
---|
66 | ## End: TABLE view opts |
---|
67 | |
---|
68 | ## Begin: EXTENDED view opts |
---|
69 | |
---|
70 | LENGTH_NODE = 0 |
---|
71 | LENGTH_STATE = 0 |
---|
72 | |
---|
73 | EXTENDED_PATTERNS = { |
---|
74 | 'header' : ' %-*s | %-*s | %s', |
---|
75 | 'row': ' %-*s | %-*s | %s', |
---|
76 | 'line': ' %s', |
---|
77 | 'line_char': '-', |
---|
78 | } |
---|
79 | |
---|
80 | ## End: EXTENDED view opts |
---|
81 | |
---|
82 | pbs_ND_single = 'job (single)' |
---|
83 | pbs_ND_total = 'total' |
---|
84 | pbs_ND_free_serial = 'free serial' |
---|
85 | pbs_ND_free_parallel = 'free parallel' |
---|
86 | |
---|
87 | PBS_STATES = { |
---|
88 | pbs.ND_free : '_', |
---|
89 | pbs.ND_down : 'X', |
---|
90 | pbs.ND_offline : '.', |
---|
91 | pbs.ND_reserve : 'R', |
---|
92 | pbs.ND_job_exclusive : 'J', |
---|
93 | pbs.ND_job_sharing : 'S', |
---|
94 | pbs.ND_busy : '*', |
---|
95 | pbs.ND_state_unknown : '?', |
---|
96 | pbs.ND_timeshared : 'T', |
---|
97 | pbs.ND_cluster : 'C', |
---|
98 | pbs_ND_single : 'j', |
---|
99 | pbs_ND_free_serial : '_', |
---|
100 | pbs_ND_free_parallel : '_', |
---|
101 | pbs_ND_total : ' ' |
---|
102 | } |
---|
103 | |
---|
104 | def sanitize_jobs( jobs ): |
---|
105 | |
---|
106 | ljobs = list() |
---|
107 | |
---|
108 | for job in jobs: |
---|
109 | ljobs.extend( re.findall( r'[0-9]+\/([0-9]+)\.*.', job ) ) |
---|
110 | |
---|
111 | return list( set( ljobs ) ) |
---|
112 | |
---|
113 | def parse_nodename( nodename ): |
---|
114 | global NODE_EXPR |
---|
115 | |
---|
116 | parts = re.search( r'%s' % NODE_EXPR, nodename, re.VERBOSE ) |
---|
117 | |
---|
118 | try: |
---|
119 | racknr = parts.group( 'racknr' ) |
---|
120 | except Exception: |
---|
121 | racknr = 0 |
---|
122 | |
---|
123 | try: |
---|
124 | nodenr = parts.group( 'nodenr' ) |
---|
125 | except Exception: |
---|
126 | nodenr = 0 |
---|
127 | |
---|
128 | return int( racknr ), int( nodenr ) |
---|
129 | |
---|
130 | def get_nodes( racknode=False, hosts=None ): |
---|
131 | global LENGTH_NODE |
---|
132 | global LENGTH_STATE |
---|
133 | global OPT_SERVERNAME |
---|
134 | |
---|
135 | nodes_dict = dict() |
---|
136 | |
---|
137 | try: |
---|
138 | if not OPT_SERVERNAME: |
---|
139 | p = PBSQuery() |
---|
140 | else: |
---|
141 | p = PBSQuery( OPT_SERVERNAME ) |
---|
142 | except PBSError, reason: |
---|
143 | print 'Error: %s' % reason |
---|
144 | sys.exit( -1 ) |
---|
145 | |
---|
146 | p.new_data_structure() |
---|
147 | |
---|
148 | attr = [ 'state', 'jobs', 'properties' ] |
---|
149 | |
---|
150 | try: |
---|
151 | nodes = p.getnodes( attr ) |
---|
152 | except PBSError, reason: |
---|
153 | print 'Error: %s' % reason |
---|
154 | sys.exit( -1 ) |
---|
155 | |
---|
156 | number_of_racks = 0 |
---|
157 | nodes_per_rack = 0 |
---|
158 | hosts_list = list() |
---|
159 | |
---|
160 | for node, attr in nodes.items(): |
---|
161 | if node in EXCLUDE_NODES: |
---|
162 | continue |
---|
163 | |
---|
164 | if hosts and node not in hosts: |
---|
165 | continue |
---|
166 | |
---|
167 | if pbs.ND_down in attr.state: |
---|
168 | state = pbs.ND_down |
---|
169 | else: |
---|
170 | state = attr.state[ 0 ] |
---|
171 | |
---|
172 | state_char = PBS_STATES[ state ] |
---|
173 | |
---|
174 | if attr.is_free() and attr.has_job(): |
---|
175 | state = pbs.ND_busy |
---|
176 | state_char = PBS_STATES[ pbs_ND_single ] |
---|
177 | |
---|
178 | if not nodes_dict.has_key( node ): |
---|
179 | nodes_dict[ node ] = dict() |
---|
180 | |
---|
181 | # Setting the longest lenght |
---|
182 | if len( node ) > LENGTH_NODE: |
---|
183 | LENGTH_NODE = len( node ) |
---|
184 | |
---|
185 | if len( state ) > LENGTH_STATE: |
---|
186 | LENGTH_STATE = len( state ) |
---|
187 | |
---|
188 | if racknode: |
---|
189 | racknr, nodenr = parse_nodename( node ) |
---|
190 | |
---|
191 | if racknr > number_of_racks: |
---|
192 | number_of_racks = racknr |
---|
193 | |
---|
194 | if nodenr > nodes_per_rack: |
---|
195 | nodes_per_rack = nodenr |
---|
196 | |
---|
197 | if not nodes_dict.has_key( racknr ): |
---|
198 | nodes_dict[ racknr ] = dict() |
---|
199 | |
---|
200 | if not nodes_dict[ racknr ].has_key( nodenr ): |
---|
201 | nodes_dict[ racknr ][ nodenr ] = dict() |
---|
202 | |
---|
203 | nodes_dict[ racknr ][ nodenr ][ 'state_char' ] = state_char |
---|
204 | nodes_dict[ racknr ][ nodenr ][ 'state' ] = state |
---|
205 | |
---|
206 | if attr.has_key( 'jobs' ): |
---|
207 | nodes_dict[ racknr ][ nodenr ][ 'jobs' ] = sanitize_jobs( attr.jobs ) |
---|
208 | else: |
---|
209 | nodes_dict[ racknr ][ nodenr ][ 'jobs' ] = [] |
---|
210 | else: |
---|
211 | hosts_list.append( node ) |
---|
212 | nodes_dict[ node ][ 'state_char' ] = state_char |
---|
213 | nodes_dict[ node ][ 'state' ] = state |
---|
214 | |
---|
215 | if attr.has_key( 'jobs' ): |
---|
216 | nodes_dict[ node ][ 'jobs' ] = sanitize_jobs( attr.jobs ) |
---|
217 | else: |
---|
218 | nodes_dict[ node ][ 'jobs' ] = [] |
---|
219 | |
---|
220 | if not racknode: |
---|
221 | return nodes_dict, hosts_list |
---|
222 | |
---|
223 | return nodes_dict, number_of_racks, nodes_per_rack |
---|
224 | |
---|
225 | def _generate_index( str ): |
---|
226 | index = [] |
---|
227 | |
---|
228 | def _append( fragment, alist=index ): |
---|
229 | if fragment.isdigit(): |
---|
230 | fragment = int( fragment ) |
---|
231 | alist.append( fragment ) |
---|
232 | |
---|
233 | prev_isdigit = str[0].isdigit() |
---|
234 | current_fragment = '' |
---|
235 | |
---|
236 | for char in str: |
---|
237 | curr_isdigit = char.isdigit() |
---|
238 | |
---|
239 | if curr_isdigit == prev_isdigit: |
---|
240 | current_fragment += char |
---|
241 | else: |
---|
242 | _append( current_fragment ) |
---|
243 | current_fragment = char |
---|
244 | prev_isdigit = curr_isdigit |
---|
245 | |
---|
246 | _append( current_fragment ) |
---|
247 | |
---|
248 | return tuple( index ) |
---|
249 | |
---|
250 | def real_sort( inlist ): |
---|
251 | indices = map(_generate_index, inlist ) |
---|
252 | decorated = zip( indices, inlist ) |
---|
253 | decorated.sort() |
---|
254 | |
---|
255 | return [ item for index, item in decorated ] |
---|
256 | |
---|
257 | def print_table(): |
---|
258 | global START_RACK |
---|
259 | global OPT_SKIP_EMPTY_RACKS |
---|
260 | |
---|
261 | nodes, racknr, nodenr = get_nodes( True ) |
---|
262 | |
---|
263 | ## Code herebelow has been taken from the new_rack_pbsmon.py |
---|
264 | save_column = None |
---|
265 | |
---|
266 | print |
---|
267 | print ' ', |
---|
268 | for rack in xrange( START_RACK, racknr + 1 ): |
---|
269 | |
---|
270 | if not ( rack % 10 ): |
---|
271 | char = '%d' % ( rack / 10 ) |
---|
272 | save_column = char |
---|
273 | else: |
---|
274 | char = ' ' |
---|
275 | |
---|
276 | if OPT_SKIP_EMPTY_RACKS: |
---|
277 | if nodes.has_key( rack ): |
---|
278 | if save_column: |
---|
279 | char = save_column |
---|
280 | save_column = None |
---|
281 | print char, |
---|
282 | else: |
---|
283 | print char, |
---|
284 | print |
---|
285 | |
---|
286 | print ' ', |
---|
287 | for rack in xrange( START_RACK, racknr + 1 ): |
---|
288 | |
---|
289 | char = rack % 10 |
---|
290 | if OPT_SKIP_EMPTY_RACKS: |
---|
291 | if nodes.has_key( rack ): |
---|
292 | print char, |
---|
293 | else: |
---|
294 | print char, |
---|
295 | print |
---|
296 | |
---|
297 | for node in xrange( 1, nodenr + 1 ): |
---|
298 | print '%2d' % node, |
---|
299 | |
---|
300 | for rack in xrange( START_RACK, racknr + 1 ): |
---|
301 | if OPT_SKIP_EMPTY_RACKS: |
---|
302 | if not nodes.has_key( rack ): |
---|
303 | continue |
---|
304 | try: |
---|
305 | print nodes[ rack ][ node ][ 'state_char' ], |
---|
306 | except KeyError: |
---|
307 | print ' ', |
---|
308 | print |
---|
309 | print |
---|
310 | |
---|
311 | def print_table_summary(): |
---|
312 | global PBS_STATES |
---|
313 | global OPT_SERVERNAME |
---|
314 | |
---|
315 | try: |
---|
316 | if not OPT_SERVERNAME: |
---|
317 | p = PBSQuery() |
---|
318 | else: |
---|
319 | p = PBSQuery( OPT_SERVERNAME ) |
---|
320 | except PBSError, reason: |
---|
321 | print 'error: %s' % reason |
---|
322 | sys.exit(-1) |
---|
323 | |
---|
324 | # get the state of the nodes |
---|
325 | attr = [ 'state', 'jobs', 'properties' ] |
---|
326 | try: |
---|
327 | nodes = p.getnodes(attr) |
---|
328 | except PBSError, reason: |
---|
329 | print 'error: %s' % reason |
---|
330 | sys.exit(-1) |
---|
331 | |
---|
332 | node_dict = {} |
---|
333 | |
---|
334 | count_states = {} |
---|
335 | for key in PBS_STATES.keys(): |
---|
336 | count_states[key] = 0 |
---|
337 | |
---|
338 | for nodename, node in nodes.items(): |
---|
339 | |
---|
340 | # Skip login nodes in status display |
---|
341 | # |
---|
342 | if not nodename.find('login'): |
---|
343 | continue |
---|
344 | |
---|
345 | state = node['state'][ 0 ] |
---|
346 | |
---|
347 | state_char = PBS_STATES[state] |
---|
348 | count_states[state] += 1 |
---|
349 | count_states[pbs_ND_total] += 1 |
---|
350 | |
---|
351 | if node.is_free(): # can happen for single CPU jobs |
---|
352 | if node.has_job(): |
---|
353 | # print 'TD: %s' % nodename, node |
---|
354 | state_char = PBS_STATES[pbs_ND_single] |
---|
355 | count_states[pbs.ND_free] -= 1 |
---|
356 | count_states[pbs_ND_single] += 1 |
---|
357 | else: |
---|
358 | if 'infiniband' in node['properties']: |
---|
359 | count_states[pbs_ND_free_parallel] += 1 |
---|
360 | elif 'ifiniband' in node['properties']: |
---|
361 | count_states[pbs_ND_free_serial] += 1 |
---|
362 | #else: |
---|
363 | # count_states[pbs_ND_free_serial] += 1 |
---|
364 | |
---|
365 | # print 'TD: %s %s' % (nodename, state_char) |
---|
366 | dummy = nodename.split('-') |
---|
367 | if len( dummy ) > 1: |
---|
368 | node_dict[dummy[1]] = state_char |
---|
369 | else: |
---|
370 | node_dict[dummy[0]] = state_char |
---|
371 | |
---|
372 | legend = PBS_STATES.keys() |
---|
373 | legend.sort() |
---|
374 | |
---|
375 | n = 0 |
---|
376 | for state in legend: |
---|
377 | print ' %s %-13s : %-5d' % (PBS_STATES[state], state, count_states[state]), |
---|
378 | |
---|
379 | n = n + 1 |
---|
380 | if not (n & 1): |
---|
381 | print |
---|
382 | |
---|
383 | def print_extended( hosts=None ): |
---|
384 | global LENGTH_NODE |
---|
385 | global LENGTH_STATE |
---|
386 | global EXTENDED_PATTERNS |
---|
387 | |
---|
388 | nodes, ihosts = get_nodes( hosts=hosts ) |
---|
389 | row_header = EXTENDED_PATTERNS[ 'header' ] % ( ( LENGTH_NODE + 2 ), 'Node', ( LENGTH_STATE + 2 ), 'State', 'Jobs' ) |
---|
390 | LENGTH_ROW = len( row_header ) |
---|
391 | |
---|
392 | rows_str = list() |
---|
393 | ihosts = real_sort( ihosts ) |
---|
394 | |
---|
395 | for node in ihosts: |
---|
396 | attr = nodes[ node ] |
---|
397 | row_str = EXTENDED_PATTERNS[ 'row' ] % ( ( LENGTH_NODE + 2 ), node, ( LENGTH_STATE + 2 ), attr[ 'state' ], ','.join( attr[ 'jobs' ] ) ) |
---|
398 | |
---|
399 | if len( row_str ) > LENGTH_ROW: |
---|
400 | LENGTH_ROW = len( row_str ) |
---|
401 | |
---|
402 | rows_str.append( row_str ) |
---|
403 | |
---|
404 | print |
---|
405 | print row_header |
---|
406 | print EXTENDED_PATTERNS[ 'line' ] % ( EXTENDED_PATTERNS[ 'line_char' ] * LENGTH_ROW ) |
---|
407 | print '\n'.join( rows_str ) |
---|
408 | print |
---|
409 | |
---|
410 | if __name__ == '__main__': |
---|
411 | |
---|
412 | parser = AdvancedParser(usage=__doc__) |
---|
413 | |
---|
414 | parser.add_option( "-t", "--table", dest="table", action="store_true", help="Show an table" ) |
---|
415 | parser.add_option( "-l", "--list", dest="extended", action="store_true", help="Show node rows with state and jobinfo" ) |
---|
416 | parser.add_option( "-s", "--summary", dest="summary", action="store_true", help="Display a short summary" ) |
---|
417 | parser.add_option( "-a", "--all", dest="summary", action="store_true", help="Display a short summary" ) |
---|
418 | |
---|
419 | parser.add_option( "-w", "--wide", dest="wide", action="store_true", help="Wide display for node status ( only when -t is used )" ) |
---|
420 | parser.add_option( "-S", "--servername", dest="servername", help="Change the default servername" ) |
---|
421 | |
---|
422 | parser.set_defaults( table=PRINT_TABLE ) |
---|
423 | parser.set_defaults( summary=False ) |
---|
424 | parser.set_defaults( extended=PRINT_EXTENDED ) |
---|
425 | parser.set_defaults( servername=None ) |
---|
426 | |
---|
427 | ( options, args ) = parser.parse_args() |
---|
428 | |
---|
429 | if options.servername: |
---|
430 | OPT_SERVERNAME = options.servername |
---|
431 | |
---|
432 | if options.wide: |
---|
433 | OPT_SKIP_EMPTY_RACKS = False |
---|
434 | |
---|
435 | if args: |
---|
436 | options.extended = True |
---|
437 | |
---|
438 | if options.extended and PRINT_TABLE: |
---|
439 | options.table = False |
---|
440 | |
---|
441 | if options.table and PRINT_EXTENDED: |
---|
442 | options.extended = False |
---|
443 | |
---|
444 | if options.extended: |
---|
445 | print_extended( args ) |
---|
446 | elif options.table: |
---|
447 | print_table() |
---|
448 | else: |
---|
449 | print 'Something is wrong, bye!' |
---|
450 | sys.exit( -1 ) |
---|
451 | |
---|
452 | if options.summary: |
---|
453 | print_table_summary() |
---|