source: trunk/examples/pbs_jobmonitor @ 282

Last change on this file since 282 was 282, checked in by bas, 12 years ago

renamed some file and added documentation

  • Property svn:executable set to *
  • Property svn:keywords set to Id URL
File size: 2.5 KB
Line 
1#!/usr/bin/env python
2#
3# Author:
4#  Willem Vermin, SARA, April 2012
5#
6# SVN Info:
7#   $Id#
8#   $URL: trunk/examples/pbs_jobmonitor $
9#
10# pbs_jobmonitor, pbs_joblogin <jobnr> [nodenr]
11#    jobnr: the number of the job
12#    nodenr: the rank of the node in the job
13#
14# depending on the name with this script is called it performs the
15# following:
16
17# called as pbs_jobmonitor:
18#       shows the output of top -u user on the node
19#       - one cycle of top
20#       - user: the user the job belongs to
21#
22# called as pbs_joblogin:
23#       logs in to the node as the user who invokes this script
24#            (os.getenv('USER'))
25#
26from PBSQuery import PBSQuery
27import sys,os
28def uniq(seq, idfun=None): 
29  # http://www.peterbe.com/plog/uniqifiers-benchmark
30   # order preserving
31   if idfun is None:
32       def idfun(x): return x
33   seen = {}
34   result = []
35   for item in seq:
36       marker = idfun(item)
37       if marker in seen: continue
38       seen[marker] = 1
39       result.append(item)
40   return result
41
42def usage(a):
43  if a == 'pbs_jobmonitor':
44    print a,'shows the system usage of a node where a job is running'
45  if a == 'pbs_joblogin':
46    print a,'logs you in to a node where a job is running'
47   
48  print 'Usage:'
49  print a,'<jobnumber> [nodenumber]'
50  print 'where <jobnumber> is the number of the job'
51  print '      nodenumber is the rank number of the node allocated to the job'
52  print '      (default 0)'
53 
54me = sys.argv[0].split('/')[-1]
55print '['+me+']'
56p = PBSQuery()
57
58try:
59  j=sys.argv[1]
60except:
61  usage(me)
62  sys.exit(1)
63
64# check if numerical jobnumber. (Be aware: dependecy jobs have a '-')
65try:
66  nj = int(j.split('.')[0].split('-')[0]) 
67except:
68  usage(me)
69  sys.exit(1)
70
71if len(sys.argv) > 2:
72  try:
73    num = int(sys.argv[2])
74  except:
75    usage(me)
76    sys.exit(1)
77else:
78  num = 0
79
80job = p.getjob(j)
81
82try:
83  h = job['exec_host'][0]
84except:
85  print 'No such job:',j
86  sys.exit(1)
87
88hh = h.split('+')
89nodes=[]
90for h in hh:
91  nodes = nodes + [ h.split('/')[0]]
92
93nodes = uniq(nodes)
94print 'Job',j,'is running on',len(nodes),'nodes:'
95i=0
96for h in nodes:
97  print h,
98  i = i+1
99  if i > 7:
100    i=0
101    print
102if i != 0:
103  print
104
105if num >= len(nodes):
106  print 'No node number',num
107  sys.exit(1)
108
109if me == 'pbs_jobmonitor':
110  user=job['Job_Owner'][0].split('@')[0]
111  print 'top for node #',num,':',nodes[num],'user:',user
112  sys.stdout.flush()
113  os.system('ssh '+nodes[num]+' top -n1 -b -u ' + user)
114
115if me == 'pbs_joblogin':
116  user = os.getenv('USER')
117  print 'logging in to node #',num,':',nodes[num],'user:',user
118  sys.stdout.flush()
119  os.system('ssh -X '+nodes[num])
120
Note: See TracBrowser for help on using the repository browser.