Ticket #24: sge.diff
File sge.diff, 29.9 KB (added by anonymous, 16 years ago) |
---|
-
jobmond/jobmond.conf
Mon Dec 10 15:46:45 GMT 2007 Dave Love <fx@gnu.org> * Tidy/fix SGE changes. Fri Dec 7 10:57:55 GMT 2007 Dave Love <fx@gnu.org> * Add note about SGE tasks to overview template. Fri Nov 9 15:39:41 GMT 2007 Dave Love <fx@gnu.org> * Replace previous SGE implementation with a different one. This provides the full set of monarch data. There's some associated restructuring involving the PBS code. Fri Nov 9 15:27:01 GMT 2007 Dave Love <fx@gnu.org> * jobmond.conf comment fix. Fri Nov 9 15:21:25 GMT 2007 Dave Love <fx@gnu.org> * Somewhat modify RPM spec files. Wed Nov 7 17:49:37 GMT 2007 Dave Love <fx@gnu.org> * Modify jobmond for use with SGE. diff -rN -u old-jobmonarch/jobmond/jobmond.conf new-jobmonarch/jobmond/jobmond.conf
old new 20 20 BATCH_SERVER : localhost 21 21 22 22 # Which queue(s) to report jobs of 23 # (optional and only supported for pbs)23 # (optional) 24 24 # 25 25 #QUEUE : long, short 26 26 -
jobmond/jobmond.py
diff -rN -u old-jobmonarch/jobmond/jobmond.py new-jobmonarch/jobmond/jobmond.py
old new 3 3 # This file is part of Jobmonarch 4 4 # 5 5 # Copyright (C) 2006-2007 Ramon Bastiaans 6 # Copyright (C) 2007 Dave Love (SGE code) 6 7 # 7 8 # Jobmonarch is free software; you can redistribute it and/or modify 8 9 # it under the terms of the GNU General Public License as published by … … 23 24 24 25 import sys, getopt, ConfigParser 25 26 import time, os, socket, string, re 26 import xdrlib, s ocket, syslog27 import xdrlib, syslog 27 28 import xml, xml.sax 28 from xml.sax import saxutils, make_parser29 from xml.sax import make_parser30 29 from xml.sax.handler import feature_namespaces 31 30 32 31 def usage(): … … 236 235 sys.exit( 1 ) 237 236 else: 238 237 239 debug_msg( 0, "ERROR: GMETRIC_TARGET not set: intern el Gmetric handling aborted. Failing back to DEPRECATED use of gmond.conf/gmetric binary. This will slow down jobmond significantly!" )238 debug_msg( 0, "ERROR: GMETRIC_TARGET not set: internal Gmetric handling aborted. Failing back to DEPRECATED use of gmond.conf/gmetric binary. This will slow down jobmond significantly!" ) 240 239 241 240 return True 242 241 242 def fqdn_parts (fqdn): 243 """Return pair of host and domain for fully-qualified domain name arg.""" 244 parts = fqdn.split (".") 245 return (parts[0], string.join(parts[1:], ".")) 246 243 247 METRIC_MAX_VAL_LEN = 900 244 248 245 249 class DataProcessor: … … 365 369 366 370 except NameError: 367 371 368 debug_msg( 10, 'Assuming /etc/gmond.conf for gmetric cmd (om mitting)' )372 debug_msg( 10, 'Assuming /etc/gmond.conf for gmetric cmd (omitting)' ) 369 373 370 374 cmd = cmd + ' -n' + str( metricname )+ ' -v"' + str( metricval )+ '" -t' + str( valtype ) + ' -d' + str( self.dmax ) 371 375 … … 403 407 404 408 print '\t%s = %s' %( name, val ) 405 409 410 def getAttr( self, attrs, name ): 411 412 """Return certain attribute from dictionary, if exists""" 413 414 if attrs.has_key( name ): 415 416 return attrs[ name ] 417 else: 418 return '' 419 420 def jobDataChanged( self, jobs, job_id, attrs ): 421 422 """Check if job with attrs and job_id in jobs has changed""" 423 424 if jobs.has_key( job_id ): 425 426 oldData = jobs[ job_id ] 427 else: 428 return 1 429 430 for name, val in attrs.items(): 431 432 if oldData.has_key( name ): 433 434 if oldData[ name ] != attrs[ name ]: 435 436 return 1 437 438 else: 439 return 1 440 441 return 0 442 443 def submitJobData( self ): 444 445 """Submit job info list""" 446 447 self.dp.multicastGmetric( 'MONARCH-HEARTBEAT', str( int( int( self.cur_time ) + int( self.timeoffset ) ) ) ) 448 449 running_jobs = 0 450 queued_jobs = 0 451 452 for jobid, jobattrs in self.jobs.items(): 453 454 if jobattrs[ 'status' ] == 'Q': 455 456 queued_jobs += 1 457 458 elif jobattrs[ 'status' ] == 'R': 459 460 running_jobs += 1 461 462 self.dp.multicastGmetric( 'MONARCH-RJ', str( running_jobs ), 'uint32', 'jobs' ) 463 self.dp.multicastGmetric( 'MONARCH-QJ', str( queued_jobs ), 'uint32', 'jobs' ) 464 465 # Now let's spread the knowledge 466 # 467 for jobid, jobattrs in self.jobs.items(): 468 469 gmetric_val = self.compileGmetricVal( jobid, jobattrs ) 470 metric_increment = 0 471 472 for val in gmetric_val: 473 474 self.dp.multicastGmetric( 'MONARCH-JOB-' + jobid + '-' + str(metric_increment), val ) 475 476 metric_increment = metric_increment + 1 477 478 def compileGmetricVal( self, jobid, jobattrs ): 479 480 """Create a val string for gmetric of jobinfo""" 481 482 gval_lists = [ ] 483 val_list = { } 484 485 for val_name, val_value in jobattrs.items(): 486 487 val_list_names_len = len( string.join( val_list.keys() ) ) + len(val_list.keys()) 488 val_list_vals_len = len( string.join( val_list.values() ) ) + len(val_list.values()) 489 490 if val_name == 'nodes' and jobattrs['status'] == 'R': 491 492 node_str = None 493 494 for node in val_value: 495 496 if node_str: 497 498 node_str = node_str + ';' + node 499 else: 500 node_str = node 501 502 if (val_list_names_len + len(val_name) ) + (val_list_vals_len + len(node_str) ) > METRIC_MAX_VAL_LEN: 503 504 val_list[ val_name ] = node_str 505 506 gval_lists.append( val_list ) 507 508 val_list = { } 509 node_str = None 510 511 val_list[ val_name ] = node_str 512 513 gval_lists.append( val_list ) 514 515 val_list = { } 516 517 elif val_value != '': 518 519 if (val_list_names_len + len(val_name) ) + (val_list_vals_len + len(str(val_value)) ) > METRIC_MAX_VAL_LEN: 520 521 gval_lists.append( val_list ) 522 523 val_list = { } 524 525 val_list[ val_name ] = val_value 526 527 if len( val_list ) > 0: 528 529 gval_lists.append( val_list ) 530 531 str_list = [ ] 532 533 for val_list in gval_lists: 534 535 my_val_str = None 536 537 for val_name, val_value in val_list.items(): 538 539 if my_val_str: 540 541 my_val_str = my_val_str + ' ' + val_name + '=' + val_value 542 else: 543 my_val_str = val_name + '=' + val_value 544 545 str_list.append( my_val_str ) 546 547 return str_list 548 406 549 def daemon( self ): 407 550 408 551 """Run as daemon forever""" … … 450 593 self.submitJobData() 451 594 time.sleep( BATCH_POLL_INTERVAL ) 452 595 453 class SgeQstatXMLParser(xml.sax.handler.ContentHandler): 454 455 """Babu Sundaram's experimental SGE qstat XML parser""" 596 # SGE code by Dave Love <fx@gnu.org>. Tested with SGE 6.0u8 and 6.0u11. 597 # Probably needs modification for SGE 6.1. See also the fixmes. 456 598 457 def __init__(self, qstatinxml): 599 class NoJobs (Exception): 600 """Exception raised by empty job list in qstat output.""" 601 pass 458 602 459 self.qstatfile = qstatinxml 460 self.attribs = {} 461 self.value = '' 462 self.jobID = '' 463 self.currentJobInfo = '' 464 self.job_list = [] 465 self.EOFFlag = 0 466 self.jobinfoCount = 0 603 class SgeQstatXMLParser(xml.sax.handler.ContentHandler): 604 """SAX handler for XML output from Sun Grid Engine's `qstat'.""" 467 605 606 def __init__(self): 607 self.value = "" 608 self.joblist = [] 609 self.job = {} 610 self.queue = "" 611 self.in_joblist = False 612 self.lrequest = False 613 xml.sax.handler.ContentHandler.__init__(self) 614 615 # The structure of the output is as follows. Unfortunately 616 # it's voluminous, and probably doesn't scale to large 617 # clusters/queues. 618 619 # <detailed_job_info xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 620 # <djob_info> 621 # <qmaster_response> <!-- job --> 622 # ... 623 # <JB_ja_template> 624 # <ulong_sublist> 625 # ... <!-- start_time, state ... --> 626 # </ulong_sublist> 627 # </JB_ja_template> 628 # <JB_ja_tasks> 629 # <ulong_sublist> 630 # ... <!-- task info 631 # </ulong_sublist> 632 # ... 633 # </JB_ja_tasks> 634 # ... 635 # </qmaster_response> 636 # </djob_info> 637 # <messages> 638 # ... 639 640 # NB. We might treat each task as a separate job, like 641 # straight qstat output, but the web interface expects jobs to 642 # be identified by integers, not, say, <job number>.<task>. 643 644 # So, I lied. If the job list is empty, we get invalid XML 645 # like this, which we need to defend against: 646 647 # <unknown_jobs xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 648 # <> 649 # <ST_name>*</ST_name> 650 # </> 651 # </unknown_jobs> 468 652 469 653 def startElement(self, name, attrs): 470 471 if name == 'job_list': 472 self.currentJobInfo = 'Status=' + attrs.get('state', None) + ' ' 473 elif name == 'job_info': 474 self.job_list = [] 475 self.jobinfoCount += 1 654 self.value = "" 655 if name == "djob_info": # job list 656 self.in_joblist = True 657 elif name == "qmaster_response" and self.in_joblist: # job 658 self.job = {"job_state": "U", "slots": 0, 659 "nodes": [], "queued_timestamp": "", 660 "queued_timestamp": "", "queue": "", 661 "ppn": "0", "RN_max": 0, 662 # fixme in endElement 663 "requested_memory": 0, "requested_time": 0 664 } 665 self.joblist.append(self.job) 666 elif name == "qstat_l_requests": # resource request 667 self.lrequest = True 668 elif name == "unknown_jobs": 669 raise NoJobs 476 670 477 671 def characters(self, ch): 672 self.value += ch 478 673 479 self.value = self.value + ch 480 481 def endElement(self, name): 482 483 if len(self.value.strip()) > 0 : 484 485 self.currentJobInfo += name + '=' + self.value.strip() + ' ' 486 elif name != 'job_list': 487 488 self.currentJobInfo += name + '=Unknown ' 489 490 if name == 'JB_job_number': 491 492 self.jobID = self.value.strip() 493 self.job_list.append(self.jobID) 494 495 if name == 'job_list': 496 497 if self.attribs.has_key(self.jobID) == False: 498 self.attribs[self.jobID] = self.currentJobInfo 499 elif self.attribs.has_key(self.jobID) and self.attribs[self.jobID] != self.currentJobInfo: 500 self.attribs[self.jobID] = self.currentJobInfo 501 self.currentJobInfo = '' 502 self.jobID = '' 503 504 elif name == 'job_info' and self.jobinfoCount == 2: 505 506 deljobs = [] 507 for id in self.attribs: 508 try: 509 self.job_list.index(str(id)) 510 except ValueError: 511 deljobs.append(id) 512 for i in deljobs: 513 del self.attribs[i] 514 deljobs = [] 515 self.jobinfoCount = 0 516 517 self.value = '' 674 def endElement(self, name): 675 """Snarf job elements contents into job dictionary. 676 Translate keys if appropriate.""" 677 678 name_trans = { 679 "JB_job_number": "number", 680 "JB_job_name": "name", "JB_owner": "owner", 681 "queue_name": "queue", "JAT_start_time": "start_timestamp", 682 "JB_submission_time": "queued_timestamp" 683 } 684 value = self.value 685 686 if name == "djob_info": 687 self.in_joblist = False 688 self.job = {} 689 elif name == "JAT_master_queue": 690 self.job["queue"] = value.split("@")[0] 691 elif name == "JG_qhostname": 692 if not (value in self.job["nodes"]): 693 self.job["nodes"].append(value) 694 elif name == "JG_slots": # slots in use 695 self.job["slots"] += int(value) 696 elif name == "RN_max": # requested slots (tasks or parallel) 697 self.job["RN_max"] = max (self.job["RN_max"], 698 int(value)) 699 elif name == "JAT_state": # job state (bitwise or) 700 value = int (value) 701 # Status values from sge_jobL.h 702 #define JIDLE 0x00000000 703 #define JHELD 0x00000010 704 #define JMIGRATING 0x00000020 705 #define JQUEUED 0x00000040 706 #define JRUNNING 0x00000080 707 #define JSUSPENDED 0x00000100 708 #define JTRANSFERING 0x00000200 709 #define JDELETED 0x00000400 710 #define JWAITING 0x00000800 711 #define JEXITING 0x00001000 712 #define JWRITTEN 0x00002000 713 #define JSUSPENDED_ON_THRESHOLD 0x00010000 714 #define JFINISHED 0x00010000 715 if value & 0x80: 716 self.job["status"] = "R" 717 elif value & 0x40: 718 self.job["status"] = "Q" 719 else: 720 self.job["status"] = "O" # `other' 721 elif name == "CE_name" and self.lrequest and self.value in \ 722 ("h_cpu", "s_cpu", "cpu", "h_core", "s_core"): 723 # We're in a container for an interesting resource 724 # request; record which type. 725 self.lrequest = self.value 726 elif name == "CE_doubleval" and self.lrequest: 727 # if we're in a container for an interesting 728 # resource request, use the maxmimum of the hard 729 # and soft requests to record the requested CPU 730 # or core. Fixme: I'm not sure if this logic is 731 # right. 732 if self.lrequest in ("h_core", "s_core"): 733 self.job["requested_memory"] = \ 734 max (float (value), 735 self.job["requested_memory"]) 736 # Fixme: Check what cpu means, c.f [hs]_cpu. 737 elif self.lrequest in ("h_cpu", "s_cpu", "cpu"): 738 self.job["requested_time"] = \ 739 max (float (value), 740 self.job["requested_time"]) 741 elif name == "qstat_l_requests": 742 self.lrequest = False 743 elif self.job and self.in_joblist: 744 if name in name_trans: 745 name = name_trans[name] 746 self.job[name] = value 747 748 # Abstracted from PBS original. 749 # Fixme: Is it worth (or appropriate for PBS) sorting the result? 750 def do_nodelist (nodes): 751 """Translate node list as appropriate.""" 752 nodeslist = [ ] 753 my_domain = fqdn_parts(socket.getfqdn())[1] 754 for node in nodes: 755 host = node.split( '/' )[0] # not relevant for SGE 756 h, host_domain = fqdn_parts(host) 757 if host_domain == my_domain: 758 host = h 759 if nodeslist.count( host ) == 0: 760 for translate_pattern in BATCH_HOST_TRANSLATE: 761 if translate_pattern.find( '/' ) != -1: 762 translate_orig = \ 763 translate_pattern.split( '/' )[1] 764 translate_new = \ 765 translate_pattern.split( '/' )[2] 766 host = re.sub( translate_orig, 767 translate_new, host ) 768 if not host in nodeslist: 769 nodeslist.append( host ) 770 return nodeslist 518 771 519 772 class SgeDataGatherer(DataGatherer): 520 773 521 jobs = { } 522 SGE_QSTAT_XML_FILE = '/tmp/.jobmonarch.sge.qstat' 774 jobs = {} 523 775 524 776 def __init__( self ): 525 """Setup appropriate variables""" 526 527 self.jobs = { } 777 self.jobs = {} 528 778 self.timeoffset = 0 529 779 self.dp = DataProcessor() 530 self.initSgeJobInfo()531 532 def initSgeJobInfo( self ):533 """This is outside the scope of DRMAA; Get the current jobs in SGE"""534 """This is a hack because we cant get info about jobs beyond"""535 """those in the current DRMAA session"""536 537 self.qstatparser = SgeQstatXMLParser( self.SGE_QSTAT_XML_FILE )538 780 539 # Obtain the qstat information from SGE in XML format 540 # This would change to DRMAA-specific calls from 6.0u9 541 542 def getJobData(self): 781 def getJobData( self ): 543 782 """Gather all data on current jobs in SGE""" 544 783 545 # Get the information about the current jobs in the SGE queue 546 info = os.popen("qstat -ext -xml").readlines() 547 f = open(self.SGE_QSTAT_XML_FILE,'w') 548 for lines in info: 549 f.write(lines) 550 f.close() 551 552 # Parse the input 553 f = open(self.qstatparser.qstatfile, 'r') 554 xml.sax.parse(f, self.qstatparser) 555 f.close() 784 import popen2 556 785 786 self.cur_time = 0 787 queues = "" 788 if QUEUE: # only for specific queues 789 # Fixme: assumes queue names don't contain single 790 # quote or comma. Don't know what the SGE rules are. 791 queues = " -q '" + string.join (QUEUE, ",") + "'" 792 # Note the comment in SgeQstatXMLParser about scaling with 793 # this method of getting data. I haven't found better one. 794 # Output with args `-xml -ext -f -r' is easier to parse 795 # in some ways, harder in others, but it doesn't provide 796 # the submission time, at least. 797 piping = popen2.Popen3("qstat -u '*' -j '*' -xml" + queues, 798 True) 799 qstatparser = SgeQstatXMLParser() 800 parse_err = 0 801 try: 802 xml.sax.parse(piping.fromchild, qstatparser) 803 except NoJobs: 804 pass 805 except: 806 parse_err = 1 807 if piping.wait(): 808 debug_msg(10, 809 "qstat error, skipping until next polling interval: " 810 + piping.childerr.readline()) 811 return None 812 elif parse_err: 813 debug_msg(10, "Bad XML output from qstat"()) 814 exit (1) 815 for f in piping.fromchild, piping.tochild, piping.childerr: 816 f.close() 557 817 self.cur_time = time.time() 558 559 return self.qstatparser.attribs 560 561 def submitJobData(self): 562 """Submit job info list""" 563 564 self.dp.multicastGmetric( 'MONARCH-HEARTBEAT', str( int( int( self.cur_time ) + int( self.timeoffset ) ) ) ) 565 # Now let's spread the knowledge 566 # 567 metric_increment = 0 568 for jobid, jobattrs in self.qstatparser.attribs.items(): 569 570 self.dp.multicastGmetric( 'MONARCH-JOB-' + jobid + '-' + str(metric_increment), jobattrs) 818 jobs_processed = [] 819 for job in qstatparser.joblist: 820 job_id = job["number"] 821 if job["status"] in [ 'Q', 'R' ]: 822 jobs_processed.append(job_id) 823 if job["status"] == "R": 824 job["nodes"] = do_nodelist (job["nodes"]) 825 # Fixme: Is this right? 826 job["ppn"] = float(job["slots"]) / \ 827 len(job["nodes"]) 828 if DETECT_TIME_DIFFS: 829 # If a job start is later than our 830 # current date, that must mean 831 # the SGE server's time is later 832 # than our local time. 833 start_timestamp = \ 834 int (job["start_timestamp"]) 835 if start_timestamp > \ 836 int(self.cur_time) + \ 837 int(self.timeoffset): 838 839 self.timeoffset = \ 840 start_timestamp - \ 841 int(self.cur_time) 842 else: 843 # fixme: Note sure what this should be: 844 job["ppn"] = job["RN_max"] 845 job["nodes"] = "1" 846 847 myAttrs = {} 848 for attr in ["name", "queue", "owner", 849 "requested_time", "status", 850 "requested_memory", "ppn", 851 "start_timestamp", "queued_timestamp"]: 852 myAttrs[attr] = str(job[attr]) 853 myAttrs["nodes"] = job["nodes"] 854 myAttrs["reported"] = str(int(self.cur_time) + \ 855 int(self.timeoffset)) 856 myAttrs["domain"] = fqdn_parts(socket.getfqdn())[1] 857 myAttrs["poll_interval"] = str(BATCH_POLL_INTERVAL) 858 859 if self.jobDataChanged(self.jobs, job_id, myAttrs) \ 860 and myAttrs["status"] in ["R", "Q"]: 861 self.jobs[job_id] = myAttrs 862 for id, attrs in self.jobs.items(): 863 if id not in jobs_processed: 864 del self.jobs[id] 571 865 572 866 class PbsDataGatherer( DataGatherer ): 573 867 … … 595 889 else: 596 890 self.pq = PBSQuery() 597 891 598 def getAttr( self, attrs, name ):599 600 """Return certain attribute from dictionary, if exists"""601 602 if attrs.has_key( name ):603 604 return attrs[ name ]605 else:606 return ''607 608 def jobDataChanged( self, jobs, job_id, attrs ):609 610 """Check if job with attrs and job_id in jobs has changed"""611 612 if jobs.has_key( job_id ):613 614 oldData = jobs[ job_id ]615 else:616 return 1617 618 for name, val in attrs.items():619 620 if oldData.has_key( name ):621 622 if oldData[ name ] != attrs[ name ]:623 624 return 1625 626 else:627 return 1628 629 return 0630 631 892 def getJobData( self ): 632 893 633 894 """Gather all data on current jobs in Torque""" … … 646 907 647 908 jobs_processed = [ ] 648 909 649 my_domain = string.join( socket.getfqdn().split( '.' )[1:], '.' )650 651 910 for name, attrs in joblist.items(): 652 911 653 912 job_id = name.split( '.' )[0] … … 692 951 start_timestamp = self.getAttr( attrs, 'mtime' ) 693 952 nodes = self.getAttr( attrs, 'exec_host' ).split( '+' ) 694 953 695 nodeslist = [ ] 696 697 for node in nodes: 698 699 host = node.split( '/' )[0] 700 701 host_domain = string.join( host.split( '.' )[1:], '.' ) 702 703 if host_domain == my_domain: 704 705 host = host.split( '.' )[0] 706 707 if nodeslist.count( host ) == 0: 708 709 for translate_pattern in BATCH_HOST_TRANSLATE: 710 711 if translate_pattern.find( '/' ) != -1: 712 713 translate_orig = translate_pattern.split( '/' )[1] 714 translate_new = translate_pattern.split( '/' )[2] 715 716 host = re.sub( translate_orig, translate_new, host ) 717 718 if not host in nodeslist: 719 720 nodeslist.append( host ) 954 nodeslist = do_nodelist( nodes ) 721 955 722 956 if DETECT_TIME_DIFFS: 723 957 … … 778 1012 myAttrs[ 'queued_timestamp' ] = str( queued_timestamp ) 779 1013 myAttrs[ 'reported' ] = str( int( int( self.cur_time ) + int( self.timeoffset ) ) ) 780 1014 myAttrs[ 'nodes' ] = nodeslist 781 myAttrs[ 'domain' ] = string.join( socket.getfqdn().split( '.' )[1:], '.' )1015 myAttrs[ 'domain' ] = fqdn_parts( socket.getfqdn() )[1] 782 1016 myAttrs[ 'poll_interval' ] = str( BATCH_POLL_INTERVAL ) 783 1017 784 1018 if self.jobDataChanged( self.jobs, job_id, myAttrs ) and myAttrs['status'] in [ 'R', 'Q' ]: … … 793 1027 # 794 1028 del self.jobs[ id ] 795 1029 796 def submitJobData( self ):797 798 """Submit job info list"""799 800 self.dp.multicastGmetric( 'MONARCH-HEARTBEAT', str( int( int( self.cur_time ) + int( self.timeoffset ) ) ) )801 802 running_jobs = 0803 queued_jobs = 0804 805 for jobid, jobattrs in self.jobs.items():806 807 if jobattrs[ 'status' ] == 'Q':808 809 queued_jobs += 1810 811 elif jobattrs[ 'status' ] == 'R':812 813 running_jobs += 1814 815 self.dp.multicastGmetric( 'MONARCH-RJ', str( running_jobs ), 'uint32', 'jobs' )816 self.dp.multicastGmetric( 'MONARCH-QJ', str( queued_jobs ), 'uint32', 'jobs' )817 818 # Now let's spread the knowledge819 #820 for jobid, jobattrs in self.jobs.items():821 822 gmetric_val = self.compileGmetricVal( jobid, jobattrs )823 metric_increment = 0824 825 for val in gmetric_val:826 827 self.dp.multicastGmetric( 'MONARCH-JOB-' + jobid + '-' + str(metric_increment), val )828 829 metric_increment = metric_increment + 1830 831 def compileGmetricVal( self, jobid, jobattrs ):832 833 """Create a val string for gmetric of jobinfo"""834 835 gval_lists = [ ]836 mystr = None837 val_list = { }838 839 for val_name, val_value in jobattrs.items():840 841 val_list_names_len = len( string.join( val_list.keys() ) ) + len(val_list.keys())842 val_list_vals_len = len( string.join( val_list.values() ) ) + len(val_list.values())843 844 if val_name == 'nodes' and jobattrs['status'] == 'R':845 846 node_str = None847 848 for node in val_value:849 850 if node_str:851 852 node_str = node_str + ';' + node853 else:854 node_str = node855 856 if (val_list_names_len + len(val_name) ) + (val_list_vals_len + len(node_str) ) > METRIC_MAX_VAL_LEN:857 858 val_list[ val_name ] = node_str859 860 gval_lists.append( val_list )861 862 val_list = { }863 node_str = None864 865 val_list[ val_name ] = node_str866 867 gval_lists.append( val_list )868 869 val_list = { }870 871 elif val_value != '':872 873 if (val_list_names_len + len(val_name) ) + (val_list_vals_len + len(str(val_value)) ) > METRIC_MAX_VAL_LEN:874 875 gval_lists.append( val_list )876 877 val_list = { }878 879 val_list[ val_name ] = val_value880 881 if len( val_list ) > 0:882 883 gval_lists.append( val_list )884 885 str_list = [ ]886 887 for val_list in gval_lists:888 889 my_val_str = None890 891 for val_name, val_value in val_list.items():892 893 if my_val_str:894 895 my_val_str = my_val_str + ' ' + val_name + '=' + val_value896 else:897 my_val_str = val_name + '=' + val_value898 899 str_list.append( my_val_str )900 901 return str_list902 903 1030 # 904 1031 # Gmetric by Nick Galbreath - nickg(a.t)modp(d.o.t)com 905 1032 # Version 1.0 - 21-April2-2007 … … 1052 1179 1053 1180 elif BATCH_API == 'sge': 1054 1181 1055 debug_msg( 0, "FATAL ERROR: BATCH_API 'sge' implementation is currently broken, check future releases" ) 1182 # Tested with SGE 6.0u11. 1183 # debug_msg( 0, "FATAL ERROR: BATCH_API 'sge' implementation is currently broken, check future releases" ) 1056 1184 1057 sys.exit( 1 )1185 # sys.exit( 1 ) 1058 1186 1059 1187 gather = SgeDataGatherer() 1060 1188 -
pkg/rpm/jobmonarch-jobarchived.spec
diff -rN -u old-jobmonarch/pkg/rpm/jobmonarch-jobarchived.spec new-jobmonarch/pkg/rpm/jobmonarch-jobarchived.spec
old new 3 3 Version: 4 4 Release: 5 5 Summary: Job Archiving Daemon 6 License: see /usr/share/doc/jobmonarch-jobarchived/copyright7 Distribution: Debian8 Group: Converted/misc6 License: GPL 7 Distribution: Fedora 8 Group: Applications/System 9 9 10 10 %define _rpmdir ../ 11 11 %define _rpmfilename %%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm … … 19 19 if [ -x /etc/init.d/jobarchived ] 20 20 then 21 21 22 chkconfig --add jobarchived 22 23 chkconfig jobarchived on 23 24 24 25 fi … … 40 41 41 42 /etc/init.d/jobarchived stop 42 43 chkconfig jobarchived off 44 chkconfig --del jobarchived 43 45 44 46 45 47 %description -
pkg/rpm/jobmonarch-jobmond.spec
diff -rN -u old-jobmonarch/pkg/rpm/jobmonarch-jobmond.spec new-jobmonarch/pkg/rpm/jobmonarch-jobmond.spec
old new 3 3 Version: 4 4 Release: 5 5 Summary: Job Monitoring Daemon 6 License: see /usr/share/doc/jobmonarch-jobmond/copyright7 Distribution: Debian8 Group: Converted/misc6 License: GPL 7 Distribution: Fedora 8 Group: Applications/System 9 9 10 10 %define _rpmdir ../ 11 11 %define _rpmfilename %%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm … … 19 19 if [ -x /etc/init.d/jobmond ] 20 20 then 21 21 22 chkconfig --add jobmond 22 23 chkconfig jobmond on 24 /etc/init.d/jobmond restart 23 25 24 26 fi 25 27 26 /etc/init.d/jobmond restart27 28 29 28 %preun 30 29 #!/bin/sh 31 30 32 31 /etc/init.d/jobmond stop 33 32 chkconfig jobmond off 33 chkconfig --del jobmond 34 34 35 35 36 36 %description -
pkg/rpm/jobmonarch-webfrontend.spec
diff -rN -u old-jobmonarch/pkg/rpm/jobmonarch-webfrontend.spec new-jobmonarch/pkg/rpm/jobmonarch-webfrontend.spec
old new 2 2 Name: jobmonarch-webfrontend 3 3 Version: 4 4 Release: 5 Summary: Job MonArch 'sWeb Frontend6 License: see /usr/share/doc/jobmonarch-webfrontend/copyright7 Distribution: Debian8 Group: Converted/misc5 Summary: Job MonArch Web Frontend 6 License: GPL 7 Distribution: Fedora 8 Group: Applications/Internet 9 9 10 10 %define _rpmdir ../ 11 11 %define _rpmfilename %%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm … … 32 32 Job Monarch's web frontend. 33 33 34 34 %files 35 %dir "/ var/www/ganglia/templates/job_monarch/"36 %dir "/ var/www/ganglia/templates/job_monarch/images/"37 "/ var/www/ganglia/templates/job_monarch/cluster_extra.tpl"38 "/ var/www/ganglia/templates/job_monarch/host_extra.tpl"39 %dir "/ var/www/ganglia/addons/job_monarch/"40 %dir "/ var/www/ganglia/addons/job_monarch/clusterconf/"41 "/ var/www/ganglia/addons/job_monarch/clusterconf/example.php"42 %dir "/ var/www/ganglia/addons/job_monarch/templates/"43 "/ var/www/ganglia/addons/job_monarch/templates/overview.tpl"44 "/ var/www/ganglia/addons/job_monarch/templates/search.tpl"45 "/ var/www/ganglia/addons/job_monarch/templates/footer.tpl"46 "/ var/www/ganglia/addons/job_monarch/templates/header.tpl"47 "/ var/www/ganglia/addons/job_monarch/templates/host_view.tpl"48 "/ var/www/ganglia/addons/job_monarch/templates/index.tpl"49 %config "/ var/www/ganglia/addons/job_monarch/conf.php"50 "/ var/www/ganglia/addons/job_monarch/search.php"51 "/ var/www/ganglia/addons/job_monarch/libtoga.php"52 "/ var/www/ganglia/addons/job_monarch/version.php"53 "/ var/www/ganglia/addons/job_monarch/cal.gif"54 "/ var/www/ganglia/addons/job_monarch/document_archive.jpg"55 "/ var/www/ganglia/addons/job_monarch/graph.php"56 "/ var/www/ganglia/addons/job_monarch/header.php"57 "/ var/www/ganglia/addons/job_monarch/host_view.php"58 "/ var/www/ganglia/addons/job_monarch/image.php"59 "/ var/www/ganglia/addons/job_monarch/libtoga.js"60 "/ var/www/ganglia/addons/job_monarch/logo_ned.gif"61 "/ var/www/ganglia/addons/job_monarch/next.gif"62 "/ var/www/ganglia/addons/job_monarch/prev.gif"63 "/ var/www/ganglia/addons/job_monarch/redcross.jpg"64 "/ var/www/ganglia/addons/job_monarch/ts_picker.js"65 "/ var/www/ganglia/addons/job_monarch/ts_validatetime.js"66 "/ var/www/ganglia/addons/job_monarch/footer.php"67 "/ var/www/ganglia/addons/job_monarch/styles.css"68 "/ var/www/ganglia/addons/job_monarch/index.php"69 "/ var/www/ganglia/addons/job_monarch/overview.php"70 "/ var/www/ganglia/addons/job_monarch/jobmonarch.gif"71 "/ var/www/ganglia/templates/job_monarch/images/logo.jpg"35 %dir "/usr/share/ganglia/templates/job_monarch/" 36 %dir "/usr/share/ganglia/templates/job_monarch/images/" 37 "/usr/share/ganglia/templates/job_monarch/cluster_extra.tpl" 38 "/usr/share/ganglia/templates/job_monarch/host_extra.tpl" 39 %dir "/usr/share/ganglia/addons/job_monarch/" 40 %dir "/usr/share/ganglia/addons/job_monarch/clusterconf/" 41 "/usr/share/ganglia/addons/job_monarch/clusterconf/example.php" 42 %dir "/usr/share/ganglia/addons/job_monarch/templates/" 43 "/usr/share/ganglia/addons/job_monarch/templates/overview.tpl" 44 "/usr/share/ganglia/addons/job_monarch/templates/search.tpl" 45 "/usr/share/ganglia/addons/job_monarch/templates/footer.tpl" 46 "/usr/share/ganglia/addons/job_monarch/templates/header.tpl" 47 "/usr/share/ganglia/addons/job_monarch/templates/host_view.tpl" 48 "/usr/share/ganglia/addons/job_monarch/templates/index.tpl" 49 %config "/usr/share/ganglia/addons/job_monarch/conf.php" 50 "/usr/share/ganglia/addons/job_monarch/search.php" 51 "/usr/share/ganglia/addons/job_monarch/libtoga.php" 52 "/usr/share/ganglia/addons/job_monarch/version.php" 53 "/usr/share/ganglia/addons/job_monarch/cal.gif" 54 "/usr/share/ganglia/addons/job_monarch/document_archive.jpg" 55 "/usr/share/ganglia/addons/job_monarch/graph.php" 56 "/usr/share/ganglia/addons/job_monarch/header.php" 57 "/usr/share/ganglia/addons/job_monarch/host_view.php" 58 "/usr/share/ganglia/addons/job_monarch/image.php" 59 "/usr/share/ganglia/addons/job_monarch/libtoga.js" 60 "/usr/share/ganglia/addons/job_monarch/logo_ned.gif" 61 "/usr/share/ganglia/addons/job_monarch/next.gif" 62 "/usr/share/ganglia/addons/job_monarch/prev.gif" 63 "/usr/share/ganglia/addons/job_monarch/redcross.jpg" 64 "/usr/share/ganglia/addons/job_monarch/ts_picker.js" 65 "/usr/share/ganglia/addons/job_monarch/ts_validatetime.js" 66 "/usr/share/ganglia/addons/job_monarch/footer.php" 67 "/usr/share/ganglia/addons/job_monarch/styles.css" 68 "/usr/share/ganglia/addons/job_monarch/index.php" 69 "/usr/share/ganglia/addons/job_monarch/overview.php" 70 "/usr/share/ganglia/addons/job_monarch/jobmonarch.gif" 71 "/usr/share/ganglia/templates/job_monarch/images/logo.jpg" -
web/addons/job_monarch/templates/overview.tpl
diff -rN -u old-jobmonarch/web/addons/job_monarch/templates/overview.tpl new-jobmonarch/web/addons/job_monarch/templates/overview.tpl
old new 1 <BR><BR> 1 <P> 2 All tasks of parallel and array jobs appear as a single ‘job’. 3 <BR></P> 2 4 3 5 <CENTER> 4 6 <TABLE cellpadding="15"> … … 137 139 <BR> 138 140 139 141 <SCRIPT TYPE="text/javascript" SRC="libtoga.js"></SCRIPT> 142 <NOSCRIPT><P>[Sorting by column header requires JavaScript]<BR><BR></P></NOSCRIPT> 140 143 141 144 <INPUT TYPE="HIDDEN" NAME="sortby" VALUE="{sortby}"> 142 145 <INPUT TYPE="HIDDEN" NAME="sortorder" VALUE="{sortorder}">