source: trunk/lang/tcl/scripts/task.tcl @ 6217

Last change on this file since 6217 was 6217, checked in by ldelgass, 9 years ago

Fix signal reporting (from 1.6 branch)

File size: 24.5 KB
RevLine 
[6021]1# -*- mode: tcl; indent-tabs-mode: nil -*-
[4127]2# ----------------------------------------------------------------------
3#  COMPONENT: task - represents the executable part of a tool
4#
5#  This object is an executable version of a Rappture xml file.
6#  A tool is a task plus its graphical user interface.  Each task
7#  resides in an installation directory with other tool resources
8#  (libraries, examples, etc.).  Each task is defined by its inputs
9#  and outputs, and understands the context in which it executes
10#  (via exec, submit, mx, etc.).
11# ======================================================================
12#  AUTHOR:  Michael McLennan, Purdue University
13#  Copyright (c) 2004-2014  HUBzero Foundation, LLC
14#
15#  See the file "license.terms" for information on usage and
16#  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
17# ======================================================================
18package require BLT
19
20itcl::class Rappture::Task {
21    public variable logger ""
22    public variable jobstats Rappture::Task::MiddlewareTime
23    public variable resultdir "@default"
24
25    constructor {xmlobj installdir args} { # defined below }
26    destructor { # defined below }
27
28    public method installdir {} { return $_installdir }
29
30    public method run {args}
[6021]31    public method get_uq {args}
[4127]32    public method abort {}
33    public method reset {}
34    public method xml {args}
[4970]35    public method save {xmlobj {name ""}}
[4127]36
37    protected method _mkdir {dir}
38    protected method _output {data}
39    protected method _log {args}
[6021]40    protected method _build_submit_cmd {cmd tfile params_file}
41    protected method _get_params {varlist uq_type uq_args}
[6039]42    private method GetSignal { signal }
[4127]43
44    private variable _xmlobj ""      ;# XML object with inputs/outputs
45    private variable _origxml ""     ;# copy of original XML (for reset)
[4970]46    private variable _lastrun ""     ;# name of last run file
[4127]47    private variable _installdir ""  ;# installation directory for this tool
48    private variable _outputcb ""    ;# callback for tool output
49    private common job               ;# array var used for blt::bgexec jobs
50    private common jobnum 0          ;# counter for unique job number
51
52    # get global resources for this tool session
53    public proc resources {{option ""}}
54
55    public common _resources
56    public proc setAppName {name}   { set _resources(-appname) $name }
57    public proc setHubName {name}   { set _resources(-hubname) $name }
58    public proc setHubURL {name}    { set _resources(-huburl) $name }
59    public proc setSession {name}   { set _resources(-session) $name }
60    public proc setJobPrt {name}    { set _resources(-jobprotocol) $name }
61    public proc setResultDir {name} { set _resources(-resultdir) $name }
62
63    # default method for -jobstats control
64    public proc MiddlewareTime {args}
65}
66
67# must use this name -- plugs into Rappture::resources::load
68proc task_init_resources {} {
69    Rappture::resources::register \
70        application_name  Rappture::Task::setAppName \
71        application_id    Rappture::Task::setAppId \
72        hub_name          Rappture::Task::setHubName \
73        hub_url           Rappture::Task::setHubURL \
74        session_token     Rappture::Task::setSession \
75        job_protocol      Rappture::Task::setJobPrt \
76        results_directory Rappture::Task::setResultDir
77}
78
79# ----------------------------------------------------------------------
80# CONSTRUCTOR
81# ----------------------------------------------------------------------
82itcl::body Rappture::Task::constructor {xmlobj installdir args} {
83    if {![Rappture::library isvalid $xmlobj]} {
84        error "bad value \"$xmlobj\": should be Rappture::Library"
85    }
86    set _xmlobj $xmlobj
87
88    # stash a copy of the original XML for later "reset" operations
89    set _origxml [Rappture::LibraryObj ::#auto "<?xml version=\"1.0\"?><run/>"]
90    $_origxml copy "" from $_xmlobj ""
91
92    if {![file exists $installdir]} {
93        error "directory \"$installdir\" doesn't exist"
94    }
95    set _installdir $installdir
96
97    eval configure $args
98}
99
100# ----------------------------------------------------------------------
101# DESTRUCTOR
102# ----------------------------------------------------------------------
103itcl::body Rappture::Task::destructor {} {
104    itcl::delete object $_origxml
105}
106
107# ----------------------------------------------------------------------
108# USAGE: resources ?-option?
109#
110# Clients use this to query information about the tool.
111# ----------------------------------------------------------------------
112itcl::body Rappture::Task::resources {{option ""}} {
113    if {$option == ""} {
114        return [array get _resources]
115    }
116    if {[info exists _resources($option)]} {
117        return $_resources($option)
118    }
119    return ""
120}
121
[6039]122itcl::body Rappture::Task::GetSignal {code} {
123    set signals {
[6217]124        xxx HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV
[6039]125        USR2 PIPE ALRM TERM STKFLT CHLD CONT STOP TSTP TTIN
126        TTOU URG XCPU XFSZ VTALRM PROF WINCH POLL PWR SYS
127        RTMIN RTMIN+1 RTMIN+2 RTMIN+3 RTMAX-3 RTMAX-2 RTMAX-1 RTMAX
128    }
129    set sigNum [expr $code - 128]
130    if { $sigNum > 0 && $sigNum < [llength $signals] } {
131        return [lindex $signals $sigNum]
132    }
133    return "unknown exit code \"$code\""
134}
135
[6021]136itcl::body Rappture::Task::get_uq {args} {
137    foreach {path val} $args {
138        if {$path == "-uq_type"} {
139            set uq_type $val
140        } elseif {$path == "-uq_args"} {
141            set uq_args $val
142        }
143    }
144    #set varlist [$_xmlobj uq_get_vars]
145    foreach {varlist num} [$_xmlobj uq_get_vars] break
146    return [Rappture::UQ ::#auto $varlist $num $uq_type $uq_args]
147}
148
[4127]149# ----------------------------------------------------------------------
150# USAGE: run ?<path1> <value1> <path2> <value2> ...? ?-output <callbk>?
151#
152# This method causes the tool to run.  A "driver.xml" file is created
153# as the input for the run.  That file is fed to the executable
154# according to the <tool><command> string, and the job is executed.
155#
156# Any "<path> <value>" arguments are used to override the current
157# settings from the GUI.  This is useful, for example, when filling
158# in missing simulation results from the analyzer.
159#
160# If the -output argument is included, then the next arg is a
161# callback command for output messages.  Any output that comes in
162# while the tool is running is sent back to the caller, so the user
163# can see progress running the tool.
164#
165# Returns a list of the form {status result}, where status is an
166# integer status code (0=success) and result is the output from the
167# simulator.  Successful output is something like {0 run1293921.xml},
168# where 0=success and run1293921.xml is the name of the file containing
169# results.
170# ----------------------------------------------------------------------
171itcl::body Rappture::Task::run {args} {
172    global env errorInfo
173
174    #
175    # Make sure that we save the proper application name.
176    # Actually, the best place to get this information is
177    # straight from the "installtool" script, but just in
178    # case we have an older tool, we should insert the
179    # tool name from the resources config file.
180    #
181    if {[info exists _resources(-appname)]
182          && $_resources(-appname) ne ""
183          && [$_xmlobj get tool.name] eq ""} {
184        $_xmlobj put tool.name $_resources(-appname)
185    }
186
187    # if there are any args, use them to override parameters
188    set _outputcb ""
[6021]189    set uq_type ""
[4127]190    foreach {path val} $args {
191        if {$path == "-output"} {
192            set _outputcb $val
[6021]193        } elseif {$path == "-uq_type"} {
194            set uq_type $val
195        } elseif {$path == "-uq_args"} {
196            set uq_args $val
[4127]197        } else {
198            $_xmlobj put $path.current $val
199        }
200    }
201
202    foreach item {control output error} { set job($item) "" }
203
204    # Set limits for cpu time
205    set limit [$_xmlobj get tool.limits.cputime]
206    if { $limit == "unlimited" } {
207        set limit 43200;                # 12 hours
208    } else {
209        if { [scan $limit "%d" dum] != 1 } {
210            set limit 14400;            # 4 hours by default
211        } elseif { $limit > 43200 } {
212            set limit 43200;            # limit to 12 hrs.
213        } elseif { $limit < 10 } {
214            set limit 10;               # lower bound is 10 seconds.
215        }
[6021]216    }
217    Rappture::rlimit set cputime $limit
218
219    # write out the driver.xml file for the tool
220    set file "driver[pid].xml"
221    set status [catch {
222        set fid [open $file w]
223        puts $fid "<?xml version=\"1.0\"?>"
224        puts $fid [$_xmlobj xml]
225        close $fid
226    } result]
227
228    if {$uq_type != ""} {
229        # Copy xml into a new file
230        set tfile "template[pid].xml"
231        set fid [open $tfile w]
232        puts $fid "<?xml version=\"1.0\"?>"
233        puts $fid [$_xmlobj xml]
234        close $fid
235
236        # Return a list of the UQ variables and their PDFs.
237        # Also turns $tfile into a template file.
238        set uq_varlist [lindex [$_xmlobj uq_get_vars $tfile] 0]
239    }
240
241
[4127]242    # execute the tool using the path from the tool description
243    if {$status == 0} {
244        set cmd [$_xmlobj get tool.command]
245        regsub -all @tool $cmd $_installdir cmd
246        set cmd [string trimleft $cmd " "]
247
[6021]248        if { $cmd == "" } {
249            puts stderr "cmd is empty"
250            return [list 1 "Command is empty.\n\nThere is no command specified by\n\n <command>\n </command>\n\nin the tool.xml file."]
251        }
252
253        if {$uq_type == ""} {
254            regsub -all @driver $cmd $file cmd
255
256            switch -glob -- [resources -jobprotocol] {
257                "submit*" {
258                    # if job_protocol is "submit", then use use submit command
259                    set cmd "submit --local $cmd"
260                }
261                "mx" {
262                    # metachory submission
263                    set cmd "mx $cmd"
264                }
265                "exec" {
266                    # default -- nothing special
267                }
[4127]268            }
[6021]269        } else {
270            set params_file [_get_params $uq_varlist $uq_type $uq_args]
271            set cmd [_build_submit_cmd $cmd $tfile $params_file]
272            file delete -force puq
[4127]273        }
274
[6021]275        $_xmlobj put tool.execute $cmd
276
[4127]277        # starting job...
[4970]278        set _lastrun ""
[4127]279        _log run started
280        Rappture::rusage mark
281
282        if {0 == [string compare -nocase -length 5 $cmd "ECHO "] } {
283            set status 0;
[6021]284            set job(output) [string range $cmd 5 end]
[4127]285        } else {
286            set status [catch {
[6021]287                set ::Rappture::Task::job(control) ""
288                eval blt::bgexec \
289                ::Rappture::Task::job(control) \
290                -keepnewline yes \
291                -killsignal SIGTERM \
292                -onoutput [list [itcl::code $this _output]] \
293                -output ::Rappture::Task::job(output) \
294                -error ::Rappture::Task::job(error) \
295                $cmd
296            } result]
[4127]297
298            if { $status != 0 } {
[6021]299                # We're here because the exec-ed program failed
300                set logmesg $result
301                if { $::Rappture::Task::job(control) ne "" } {
302                    foreach { token pid code mesg } \
303                    $::Rappture::Task::job(control) break
304                    if { $token == "EXITED" } {
305                       # This means that the program exited normally but
306                       # returned a non-zero exitcode.  Consider this an
307                       # invalid result from the program.  Append the stderr
308                       # from the program to the message.
[6039]309                       if {$code > 128} {
[6111]310                          set logmesg "Program signaled: signal was [GetSignal $code]"
[6039]311                       } else {
312                          set logmesg "Program finished: exit code is $code"
313                       }
[6021]314                       set result "$logmesg\n\n$::Rappture::Task::job(error)"
315                    } elseif { $token == "abort" }  {
316                        # The user pressed the abort button.
[4127]317                        set logmesg "Program terminated by user."
[6021]318                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
319                    } else {
320                        # Abnormal termination
321                        set logmesg "Abnormal program termination: $mesg"
322                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
323                    }
324                }
[4127]325                _log run failed [list $logmesg]
[6021]326                return [list $status $result]
[4127]327            }
328        }
329        # ...job is finished
330        array set times [Rappture::rusage measure]
331
332        if {[resources -jobprotocol] ne "submit"} {
333            set id [$_xmlobj get tool.id]
334            set vers [$_xmlobj get tool.version.application.revision]
335            set simulation simulation
336            if { $id ne "" && $vers ne "" } {
337                set pid [pid]
338                set simulation ${pid}_${id}_r${vers}
339            }
340
341            # need to save job info? then invoke the callback
342            if {[string length $jobstats] > 0} {
343                uplevel #0 $jobstats [list job [incr jobnum] \
[6021]344                event $simulation start $times(start) \
345                walltime $times(walltime) cputime $times(cputime) \
346                status $status]
[4127]347            }
348
349            #
350            # Scan through stderr channel and look for statements that
351            # represent grid jobs that were executed.  The statements
352            # look like this:
353            #
354            # MiddlewareTime: job=1 event=simulation start=3.001094 ...
355            #
356            set subjobs 0
357            while {[regexp -indices {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} $job(error) match]} {
358                foreach {p0 p1} $match break
359                if {[string index $job(error) $p0] == "\n"} { incr p0 }
360
361                catch {unset data}
362                array set data {
363                    job 1
364                    event simulation
365                    start 0
366                    walltime 0
367                    cputime 0
368                    status 0
369                }
370                foreach arg [lrange [string range $job(error) $p0 $p1] 1 end] {
371                    foreach {key val} [split $arg =] break
372                    set data($key) $val
373                }
374                set data(job) [expr {$jobnum+$data(job)}]
375                set data(event) "subsimulation"
376                set data(start) [expr {$times(start)+$data(start)}]
377
378                set details ""
379                foreach key {job event start walltime cputime status} {
380                    # add required keys in a particular order
381                    lappend details $key $data($key)
382                    unset data($key)
383                }
384                foreach key [array names data] {
385                    # add anything else that the client gave -- venue, etc.
386                    lappend details $key $data($key)
387                }
388
389                if {[string length $jobstats] > 0} {
390                    uplevel #0 $jobstats $details
391                }
392
393                incr subjobs
394
395                # done -- remove this statement
396                set job(error) [string replace $job(error) $p0 $p1]
397            }
398            incr jobnum $subjobs
399        }
400
401    } else {
402        set job(error) "$result\n$errorInfo"
403    }
404    if {$status == 0} {
[6021]405        # file delete -force -- $file
[4127]406    }
407
408    # see if the job was aborted
409    if {[regexp {^KILLED} $job(control)]} {
410        _log run aborted
411        return [list 0 "ABORT"]
412    }
413
414    #
415    # If successful, return the output, which should include
416    # a reference to the run.xml file containing results.
417    #
[6021]418
[4127]419    if {$status == 0} {
420        set result [string trim $job(output)]
[6021]421
422        if {$uq_type != ""} {
423            # UQ. Collect data from all jobs and put it in one xml run file.
424            file delete -force -- run_uq.xml
[6191]425            if {[catch {exec puq.sh analyze puq_[pid].hdf5} res]} {
[6021]426                set fp [open "uq_debug.err" r]
427                set rdata [read $fp]
428                close $fp
429                puts "PUQ analysis failed: $res\n$rdata"
430                error "UQ analysis failed: $res\n$rdata"
431            } else {
432                append result "\n" $res
433            }
434        }
[4127]435        if {[regexp {=RAPPTURE-RUN=>([^\n]+)} $result match file]} {
[4970]436            set _lastrun $file
437
[4127]438            set status [catch {Rappture::library $file} result]
439            if {$status == 0} {
440                # add cputime info to run.xml file
441                $result put output.walltime $times(walltime)
442                $result put output.cputime $times(cputime)
443                if {[info exists env(SESSION)]} {
444                    $result put output.session $env(SESSION)
445                }
446            } else {
447                global errorInfo
448                set result "$result\n$errorInfo"
449            }
450
[4970]451            file delete -force -- $file
[4127]452        } else {
453            set status 1
454            set result "Can't find result file in output.\nDid you call Rappture
455::result in your simulator?"
456        }
457    } elseif {$job(output) ne "" || $job(error) ne ""} {
458        set result [string trim "$job(output)\n$job(error)"]
459    }
460
461    # log final status for the run
462    if {$status == 0} {
463        _log run finished
464    } else {
465        _log run failed [list $result]
466    }
467
468    return [list $status $result]
469}
470
471# ----------------------------------------------------------------------
[6021]472#  Turn the command string from tool.xml into the proper syntax to use
473#  with a submit parameter sweep with a temlate file.  Proper quoting
474# of the template file is necessary to prevent submit from being too smart
475# and converting it to a full pathname.
476# ----------------------------------------------------------------------
477itcl::body Rappture::Task::_build_submit_cmd {cmd tfile params_file} {
478    set quote_next 0
479    set newcmd "submit --progress submit --runName=puq -l -i @:$tfile -d $params_file"
480    set cmds [split $cmd " "]
481    for {set i 0} {$i < [llength $cmds]} {incr i} {
482        set arg [lindex $cmds $i]
483        if {$quote_next == 1} {
484            set nc [string range $arg 0 0]
485            if {$nc != "\""} {
486                set arg "\"\\\"$arg\\\"\""
487            }
488        }
489        if {$arg == "--eval"} {
490            set quote_next 1
491        } else {
492            set quote_next 0
493        }
494        if {$arg == "@driver"} {
495            set arg "\"\\\"$tfile\\\"\""
496        }
497        append newcmd " " $arg
498    }
499    regsub -all @driver $newcmd $tfile newcmd
500    return $newcmd
501}
502
503# ----------------------------------------------------------------------
[4127]504# USAGE: _mkdir <directory>
505#
506# Used internally to create the <directory> in the file system.
507# The parent directory is also created, as needed.
508# ----------------------------------------------------------------------
509itcl::body Rappture::Task::_mkdir {dir} {
510    set parent [file dirname $dir]
511    if {$parent ne "." && $parent ne "/"} {
512        if {![file exists $parent]} {
513            _mkdir $parent
514        }
515    }
516    file mkdir $dir
517}
518
519
520# ----------------------------------------------------------------------
521# USAGE: abort
522#
523# Clients use this during a "run" to abort the current job.
524# Kills the job and forces the "run" method to return.
525# ----------------------------------------------------------------------
526itcl::body Rappture::Task::abort {} {
527    _log run abort
528    set job(control) "abort"
529}
530
531# ----------------------------------------------------------------------
532# USAGE: reset
533#
534# Resets all input values to their defaults.  Sometimes used just
535# before a run to reset to a clean state.
536# ----------------------------------------------------------------------
537itcl::body Rappture::Task::reset {} {
538    $_xmlobj copy "" from $_origxml ""
539    foreach path [Rappture::entities -as path $_xmlobj input] {
540        if {[$_xmlobj element -as type $path.default] ne ""} {
541            set defval [$_xmlobj get $path.default]
542            $_xmlobj put $path.current $defval
543        }
544    }
545}
546
547# ----------------------------------------------------------------------
548# USAGE: xml <subcommand> ?<arg> <arg> ...?
549# USAGE: xml object
550#
551# Used by clients to manipulate the underlying XML data for this
552# tool.  The <subcommand> can be any operation supported by a
553# Rappture::library object.  Clients can also request the XML object
554# directly by using the "object" subcommand.
555# ----------------------------------------------------------------------
556itcl::body Rappture::Task::xml {args} {
557    if {"object" == $args} {
558        return $_xmlobj
559    }
560    return [eval $_xmlobj $args]
561}
562
563# ----------------------------------------------------------------------
[4970]564# USAGE: save <xmlobj> ?<filename>?
565#
566# Used by clients to save the contents of an <xmlobj> representing
567# a run out to the given file.  If <filename> is not specified, then
568# it uses the -resultsdir and other settings to do what Rappture
569# would normally do with the output.
570# ----------------------------------------------------------------------
571itcl::body Rappture::Task::save {xmlobj {filename ""}} {
572    if {$filename eq ""} {
573        # if there's a results_directory defined in the resources
574        # file, then move the run.xml file there for storage
575        set rdir ""
576        if {$resultdir eq "@default"} {
577            if {[info exists _resources(-resultdir)]} {
578                set rdir $_resources(-resultdir)
579            } else {
580                set rdir "."
581            }
582        } elseif {$resultdir ne ""} {
583            set rdir $resultdir
584        }
585
586        # use the runfile name generated by the last run
587        if {$_lastrun ne ""} {
588            set filename [file join $rdir $_lastrun]
589        } else {
590            set filename [file join $rdir run.xml]
591        }
592    }
593
594    # add any last-minute metadata
595    $xmlobj put output.time [clock format [clock seconds]]
596
597    $xmlobj put tool.version.rappture.version $::Rappture::version
598    $xmlobj put tool.version.rappture.revision $::Rappture::build
599
600    if {[info exists ::tcl_platform(user)]} {
601        $xmlobj put output.user $::tcl_platform(user)
602    }
603
604    # save the output
605    set rdir [file dirname $filename]
606    if {![file exists $rdir]} {
607        _mkdir $rdir
608    }
609
610    set fid [open $filename w]
611    puts $fid "<?xml version=\"1.0\"?>"
612    puts $fid [$xmlobj xml]
613    close $fid
614
615    _log output saved in $filename
616}
617
618# ----------------------------------------------------------------------
[4127]619# USAGE: _output <data>
620#
621# Used internally to send each bit of output <data> coming from the
622# tool onto the caller, so the user can see progress.
623# ----------------------------------------------------------------------
624itcl::body Rappture::Task::_output {data} {
625    if {[string length $_outputcb] > 0} {
626        uplevel #0 $_outputcb [list $data]
627    }
628}
629
630# ----------------------------------------------------------------------
631# USAGE: _log <cmd> <arg> <arg> ...
632#
633# Used internally to log interesting events during the run.  If the
634# -logger option is set (to Rappture::Logger::log, or something like
635# that), then the arguments to this method are passed along to the
636# logger and written out to a log file.  Logging is off by default,
637# so this method does nothing unless -logger is set.
638# ----------------------------------------------------------------------
639itcl::body Rappture::Task::_log {args} {
640    if {[string length $logger] > 0} {
641        uplevel #0 $logger [list $args]
642    }
643}
644
645# ----------------------------------------------------------------------
646# USAGE: MiddlewareTime <key> <value> ...
647#
648# Used as the default method for reporting job status information.
649# Implements the old HUBzero method of reporting job status info to
650# stderr, which can then be picked up by the tool session container.
651# Most tools use the "submit" command, which talks directly to a
652# database to log job information, so this isn't really needed.  But
653# it doesn't hurt to have this and it can be useful in some cases.
654# ----------------------------------------------------------------------
655itcl::body Rappture::Task::MiddlewareTime {args} {
656    set line "MiddlewareTime:"
657    foreach {key val} $args {
658        append line " $key=$val"
659    }
660    puts stderr $line
661}
[6021]662
663
664#
665# Send the list of parameters to a python program so it can call PUQ
666# and get a CSV file containing the parameter values to use for the runs.
667itcl::body Rappture::Task::_get_params {varlist uq_type uq_args} {
668    set pid [pid]
[6191]669    # puts "puq.sh get_params $pid $varlist $uq_type $uq_args"
[6208]670    if { [catch {
671        exec puq.sh get_params $pid $varlist $uq_type $uq_args
672    } errs] != 0 } {
673        set contents {}
674        if { [file exists "uq_debug.err"] } {
675            set fp [open "uq_debug.err" r]
676            set contents [read $fp]
677            close $fp
678        }
679        puts "get_params.py failed: $errs\n$contents"
680        error "get_params.py failed: $errs\n$contents"
[6021]681    }
682    return params[pid].csv
683}
Note: See TracBrowser for help on using the repository browser.