source: trunk/examples/pbs_jobmonitor @ 297

Last change on this file since 297 was 297, checked in by bas, 11 years ago

new version 4.3.6

  • Property svn:executable set to *
  • Property svn:keywords set to Id URL
File size: 2.3 KB
Line 
1#!/usr/bin/env python
2#
3# Author:
4#  Willem Vermin, SARA, April 2012
5#
6# SVN Info:
7#   $Id#
8#   $URL: trunk/examples/pbs_jobmonitor $
9#
10# pbs_jobmonitor, pbs_joblogin <jobnr> [nodenr]
11#    jobnr: the number of the job
12#    nodenr: the rank of the node in the job
13#
14# depending on the name with this script is called it performs the
15# following:
16
17# called as pbs_jobmonitor:
18#       shows the output of top -u user on the node
19#       - one cycle of top
20#       - user: the user the job belongs to
21#
22# called as pbs_joblogin:
23#       logs in to the node as the user who invokes this script
24#            (os.getenv('USER'))
25#
26from PBSQuery import PBSQuery
27import sys,os
28def uniq(seq, idfun=None): 
29  # http://www.peterbe.com/plog/uniqifiers-benchmark
30   # order preserving
31   if idfun is None:
32       def idfun(x): return x
33   seen = {}
34   result = []
35   for item in seq:
36       marker = idfun(item)
37       if marker in seen: continue
38       seen[marker] = 1
39       result.append(item)
40   return result
41
42def usage(a):
43  if a == 'pbs_jobmonitor':
44    print a,'shows the system usage of a node where a job is running'
45  if a == 'pbs_joblogin':
46    print a,'logs you in to a node where a job is running'
47   
48  print 'Usage:'
49  print a,'<jobnumber> [nodenumber]'
50  print 'where <jobnumber> is the number of the job'
51  print '      nodenumber is the rank number of the node allocated to the job'
52  print '      (default 0)'
53 
54me = sys.argv[0].split('/')[-1]
55print '['+me+']'
56p = PBSQuery()
57
58try:
59  j=sys.argv[1]
60except:
61  usage(me)
62  sys.exit(1)
63
64if len(sys.argv) > 2:
65  try:
66    num = int(sys.argv[2])
67  except:
68    usage(me)
69    sys.exit(1)
70else:
71  num = 0
72
73job = p.getjob(j)
74
75try:
76  h = job['exec_host'][0]
77except:
78  print 'No such job:',j
79  sys.exit(1)
80
81hh = h.split('+')
82nodes=[]
83for h in hh:
84  nodes = nodes + [ h.split('/')[0]]
85
86nodes = uniq(nodes)
87print 'Job',j,'is running on',len(nodes),'nodes:'
88i=0
89for h in nodes:
90  print h,
91  i = i+1
92  if i > 7:
93    i=0
94    print
95if i != 0:
96  print
97
98if num >= len(nodes):
99  print 'No node number',num
100  sys.exit(1)
101
102if me == 'pbs_jobmonitor':
103  user=job['Job_Owner'][0].split('@')[0]
104  print 'top for node #',num,':',nodes[num],'user:',user
105  sys.stdout.flush()
106  os.system('ssh '+nodes[num]+' top -n1 -b -u ' + user)
107
108if me == 'pbs_joblogin':
109  user = os.getenv('USER')
110  print 'logging in to node #',num,':',nodes[num],'user:',user
111  sys.stdout.flush()
112  os.system('ssh -X '+nodes[num])
113
Note: See TracBrowser for help on using the repository browser.