source: trunk/lang/tcl/scripts/task.tcl @ 6039

Last change on this file since 6039 was 6039, checked in by ldelgass, 7 years ago

merge r5982 from 1.4 branch (report signal in task)

File size: 24.3 KB
Line 
1# -*- mode: tcl; indent-tabs-mode: nil -*-
2# ----------------------------------------------------------------------
3#  COMPONENT: task - represents the executable part of a tool
4#
5#  This object is an executable version of a Rappture xml file.
6#  A tool is a task plus its graphical user interface.  Each task
7#  resides in an installation directory with other tool resources
8#  (libraries, examples, etc.).  Each task is defined by its inputs
9#  and outputs, and understands the context in which it executes
10#  (via exec, submit, mx, etc.).
11# ======================================================================
12#  AUTHOR:  Michael McLennan, Purdue University
13#  Copyright (c) 2004-2014  HUBzero Foundation, LLC
14#
15#  See the file "license.terms" for information on usage and
16#  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
17# ======================================================================
18package require BLT
19
20itcl::class Rappture::Task {
21    public variable logger ""
22    public variable jobstats Rappture::Task::MiddlewareTime
23    public variable resultdir "@default"
24
25    constructor {xmlobj installdir args} { # defined below }
26    destructor { # defined below }
27
28    public method installdir {} { return $_installdir }
29
30    public method run {args}
31    public method get_uq {args}
32    public method abort {}
33    public method reset {}
34    public method xml {args}
35    public method save {xmlobj {name ""}}
36
37    protected method _mkdir {dir}
38    protected method _output {data}
39    protected method _log {args}
40    protected method _build_submit_cmd {cmd tfile params_file}
41    protected method _get_params {varlist uq_type uq_args}
42    private method GetSignal { signal }
43
44    private variable _xmlobj ""      ;# XML object with inputs/outputs
45    private variable _origxml ""     ;# copy of original XML (for reset)
46    private variable _lastrun ""     ;# name of last run file
47    private variable _installdir ""  ;# installation directory for this tool
48    private variable _outputcb ""    ;# callback for tool output
49    private common job               ;# array var used for blt::bgexec jobs
50    private common jobnum 0          ;# counter for unique job number
51
52    # get global resources for this tool session
53    public proc resources {{option ""}}
54
55    public common _resources
56    public proc setAppName {name}   { set _resources(-appname) $name }
57    public proc setHubName {name}   { set _resources(-hubname) $name }
58    public proc setHubURL {name}    { set _resources(-huburl) $name }
59    public proc setSession {name}   { set _resources(-session) $name }
60    public proc setJobPrt {name}    { set _resources(-jobprotocol) $name }
61    public proc setResultDir {name} { set _resources(-resultdir) $name }
62
63    # default method for -jobstats control
64    public proc MiddlewareTime {args}
65}
66
67# must use this name -- plugs into Rappture::resources::load
68proc task_init_resources {} {
69    Rappture::resources::register \
70        application_name  Rappture::Task::setAppName \
71        application_id    Rappture::Task::setAppId \
72        hub_name          Rappture::Task::setHubName \
73        hub_url           Rappture::Task::setHubURL \
74        session_token     Rappture::Task::setSession \
75        job_protocol      Rappture::Task::setJobPrt \
76        results_directory Rappture::Task::setResultDir
77}
78
79# ----------------------------------------------------------------------
80# CONSTRUCTOR
81# ----------------------------------------------------------------------
82itcl::body Rappture::Task::constructor {xmlobj installdir args} {
83    if {![Rappture::library isvalid $xmlobj]} {
84        error "bad value \"$xmlobj\": should be Rappture::Library"
85    }
86    set _xmlobj $xmlobj
87
88    # stash a copy of the original XML for later "reset" operations
89    set _origxml [Rappture::LibraryObj ::#auto "<?xml version=\"1.0\"?><run/>"]
90    $_origxml copy "" from $_xmlobj ""
91
92    if {![file exists $installdir]} {
93        error "directory \"$installdir\" doesn't exist"
94    }
95    set _installdir $installdir
96
97    eval configure $args
98}
99
100# ----------------------------------------------------------------------
101# DESTRUCTOR
102# ----------------------------------------------------------------------
103itcl::body Rappture::Task::destructor {} {
104    itcl::delete object $_origxml
105}
106
107# ----------------------------------------------------------------------
108# USAGE: resources ?-option?
109#
110# Clients use this to query information about the tool.
111# ----------------------------------------------------------------------
112itcl::body Rappture::Task::resources {{option ""}} {
113    if {$option == ""} {
114        return [array get _resources]
115    }
116    if {[info exists _resources($option)]} {
117        return $_resources($option)
118    }
119    return ""
120}
121
122itcl::body Rappture::Task::GetSignal {code} {
123    set signals {
124        HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV
125        USR2 PIPE ALRM TERM STKFLT CHLD CONT STOP TSTP TTIN
126        TTOU URG XCPU XFSZ VTALRM PROF WINCH POLL PWR SYS
127        RTMIN RTMIN+1 RTMIN+2 RTMIN+3 RTMAX-3 RTMAX-2 RTMAX-1 RTMAX
128    }
129    set sigNum [expr $code - 128]
130    if { $sigNum > 0 && $sigNum < [llength $signals] } {
131        return [lindex $signals $sigNum]
132    }
133    return "unknown exit code \"$code\""
134}
135
136itcl::body Rappture::Task::get_uq {args} {
137    foreach {path val} $args {
138        if {$path == "-uq_type"} {
139            set uq_type $val
140        } elseif {$path == "-uq_args"} {
141            set uq_args $val
142        }
143    }
144    #set varlist [$_xmlobj uq_get_vars]
145    foreach {varlist num} [$_xmlobj uq_get_vars] break
146    return [Rappture::UQ ::#auto $varlist $num $uq_type $uq_args]
147}
148
149# ----------------------------------------------------------------------
150# USAGE: run ?<path1> <value1> <path2> <value2> ...? ?-output <callbk>?
151#
152# This method causes the tool to run.  A "driver.xml" file is created
153# as the input for the run.  That file is fed to the executable
154# according to the <tool><command> string, and the job is executed.
155#
156# Any "<path> <value>" arguments are used to override the current
157# settings from the GUI.  This is useful, for example, when filling
158# in missing simulation results from the analyzer.
159#
160# If the -output argument is included, then the next arg is a
161# callback command for output messages.  Any output that comes in
162# while the tool is running is sent back to the caller, so the user
163# can see progress running the tool.
164#
165# Returns a list of the form {status result}, where status is an
166# integer status code (0=success) and result is the output from the
167# simulator.  Successful output is something like {0 run1293921.xml},
168# where 0=success and run1293921.xml is the name of the file containing
169# results.
170# ----------------------------------------------------------------------
171itcl::body Rappture::Task::run {args} {
172    global env errorInfo
173
174    #
175    # Make sure that we save the proper application name.
176    # Actually, the best place to get this information is
177    # straight from the "installtool" script, but just in
178    # case we have an older tool, we should insert the
179    # tool name from the resources config file.
180    #
181    if {[info exists _resources(-appname)]
182          && $_resources(-appname) ne ""
183          && [$_xmlobj get tool.name] eq ""} {
184        $_xmlobj put tool.name $_resources(-appname)
185    }
186
187    # if there are any args, use them to override parameters
188    set _outputcb ""
189    set uq_type ""
190    foreach {path val} $args {
191        if {$path == "-output"} {
192            set _outputcb $val
193        } elseif {$path == "-uq_type"} {
194            set uq_type $val
195        } elseif {$path == "-uq_args"} {
196            set uq_args $val
197        } else {
198            $_xmlobj put $path.current $val
199        }
200    }
201
202    foreach item {control output error} { set job($item) "" }
203
204    # Set limits for cpu time
205    set limit [$_xmlobj get tool.limits.cputime]
206    if { $limit == "unlimited" } {
207        set limit 43200;                # 12 hours
208    } else {
209        if { [scan $limit "%d" dum] != 1 } {
210            set limit 14400;            # 4 hours by default
211        } elseif { $limit > 43200 } {
212            set limit 43200;            # limit to 12 hrs.
213        } elseif { $limit < 10 } {
214            set limit 10;               # lower bound is 10 seconds.
215        }
216    }
217    Rappture::rlimit set cputime $limit
218
219    # write out the driver.xml file for the tool
220    set file "driver[pid].xml"
221    set status [catch {
222        set fid [open $file w]
223        puts $fid "<?xml version=\"1.0\"?>"
224        puts $fid [$_xmlobj xml]
225        close $fid
226    } result]
227
228    if {$uq_type != ""} {
229        # Copy xml into a new file
230        set tfile "template[pid].xml"
231        set fid [open $tfile w]
232        puts $fid "<?xml version=\"1.0\"?>"
233        puts $fid [$_xmlobj xml]
234        close $fid
235
236        # Return a list of the UQ variables and their PDFs.
237        # Also turns $tfile into a template file.
238        set uq_varlist [lindex [$_xmlobj uq_get_vars $tfile] 0]
239    }
240
241
242    # execute the tool using the path from the tool description
243    if {$status == 0} {
244        set cmd [$_xmlobj get tool.command]
245        regsub -all @tool $cmd $_installdir cmd
246        set cmd [string trimleft $cmd " "]
247
248        if { $cmd == "" } {
249            puts stderr "cmd is empty"
250            return [list 1 "Command is empty.\n\nThere is no command specified by\n\n <command>\n </command>\n\nin the tool.xml file."]
251        }
252
253        if {$uq_type == ""} {
254            regsub -all @driver $cmd $file cmd
255
256            switch -glob -- [resources -jobprotocol] {
257                "submit*" {
258                    # if job_protocol is "submit", then use use submit command
259                    set cmd "submit --local $cmd"
260                }
261                "mx" {
262                    # metachory submission
263                    set cmd "mx $cmd"
264                }
265                "exec" {
266                    # default -- nothing special
267                }
268            }
269        } else {
270            set params_file [_get_params $uq_varlist $uq_type $uq_args]
271            set cmd [_build_submit_cmd $cmd $tfile $params_file]
272            file delete -force puq
273        }
274
275        $_xmlobj put tool.execute $cmd
276
277        # starting job...
278        set _lastrun ""
279        _log run started
280        Rappture::rusage mark
281
282        if {0 == [string compare -nocase -length 5 $cmd "ECHO "] } {
283            set status 0;
284            set job(output) [string range $cmd 5 end]
285        } else {
286            set status [catch {
287                set ::Rappture::Task::job(control) ""
288                eval blt::bgexec \
289                ::Rappture::Task::job(control) \
290                -keepnewline yes \
291                -killsignal SIGTERM \
292                -onoutput [list [itcl::code $this _output]] \
293                -output ::Rappture::Task::job(output) \
294                -error ::Rappture::Task::job(error) \
295                $cmd
296            } result]
297
298            if { $status != 0 } {
299                # We're here because the exec-ed program failed
300                set logmesg $result
301                if { $::Rappture::Task::job(control) ne "" } {
302                    foreach { token pid code mesg } \
303                    $::Rappture::Task::job(control) break
304                    if { $token == "EXITED" } {
305                       # This means that the program exited normally but
306                       # returned a non-zero exitcode.  Consider this an
307                       # invalid result from the program.  Append the stderr
308                       # from the program to the message.
309                       if {$code > 128} {
310                          set logmesg "Program signaled: signal was [GetSignal]"
311                       } else {
312                          set logmesg "Program finished: exit code is $code"
313                       }
314                       set result "$logmesg\n\n$::Rappture::Task::job(error)"
315                    } elseif { $token == "abort" }  {
316                        # The user pressed the abort button.
317                        set logmesg "Program terminated by user."
318                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
319                    } else {
320                        # Abnormal termination
321                        set logmesg "Abnormal program termination: $mesg"
322                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
323                    }
324                }
325                _log run failed [list $logmesg]
326                return [list $status $result]
327            }
328        }
329        # ...job is finished
330        array set times [Rappture::rusage measure]
331
332        if {[resources -jobprotocol] ne "submit"} {
333            set id [$_xmlobj get tool.id]
334            set vers [$_xmlobj get tool.version.application.revision]
335            set simulation simulation
336            if { $id ne "" && $vers ne "" } {
337                set pid [pid]
338                set simulation ${pid}_${id}_r${vers}
339            }
340
341            # need to save job info? then invoke the callback
342            if {[string length $jobstats] > 0} {
343                uplevel #0 $jobstats [list job [incr jobnum] \
344                event $simulation start $times(start) \
345                walltime $times(walltime) cputime $times(cputime) \
346                status $status]
347            }
348
349            #
350            # Scan through stderr channel and look for statements that
351            # represent grid jobs that were executed.  The statements
352            # look like this:
353            #
354            # MiddlewareTime: job=1 event=simulation start=3.001094 ...
355            #
356            set subjobs 0
357            while {[regexp -indices {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} $job(error) match]} {
358                foreach {p0 p1} $match break
359                if {[string index $job(error) $p0] == "\n"} { incr p0 }
360
361                catch {unset data}
362                array set data {
363                    job 1
364                    event simulation
365                    start 0
366                    walltime 0
367                    cputime 0
368                    status 0
369                }
370                foreach arg [lrange [string range $job(error) $p0 $p1] 1 end] {
371                    foreach {key val} [split $arg =] break
372                    set data($key) $val
373                }
374                set data(job) [expr {$jobnum+$data(job)}]
375                set data(event) "subsimulation"
376                set data(start) [expr {$times(start)+$data(start)}]
377
378                set details ""
379                foreach key {job event start walltime cputime status} {
380                    # add required keys in a particular order
381                    lappend details $key $data($key)
382                    unset data($key)
383                }
384                foreach key [array names data] {
385                    # add anything else that the client gave -- venue, etc.
386                    lappend details $key $data($key)
387                }
388
389                if {[string length $jobstats] > 0} {
390                    uplevel #0 $jobstats $details
391                }
392
393                incr subjobs
394
395                # done -- remove this statement
396                set job(error) [string replace $job(error) $p0 $p1]
397            }
398            incr jobnum $subjobs
399        }
400
401    } else {
402        set job(error) "$result\n$errorInfo"
403    }
404    if {$status == 0} {
405        # file delete -force -- $file
406    }
407
408    # see if the job was aborted
409    if {[regexp {^KILLED} $job(control)]} {
410        _log run aborted
411        return [list 0 "ABORT"]
412    }
413
414    #
415    # If successful, return the output, which should include
416    # a reference to the run.xml file containing results.
417    #
418
419    if {$status == 0} {
420        set result [string trim $job(output)]
421
422        if {$uq_type != ""} {
423            # UQ. Collect data from all jobs and put it in one xml run file.
424            file delete -force -- run_uq.xml
425            if {[catch {exec puq analyze puq_[pid].hdf5} res]} {
426                set fp [open "uq_debug.err" r]
427                set rdata [read $fp]
428                close $fp
429                puts "PUQ analysis failed: $res\n$rdata"
430                error "UQ analysis failed: $res\n$rdata"
431            } else {
432                append result "\n" $res
433            }
434        }
435        if {[regexp {=RAPPTURE-RUN=>([^\n]+)} $result match file]} {
436            set _lastrun $file
437
438            set status [catch {Rappture::library $file} result]
439            if {$status == 0} {
440                # add cputime info to run.xml file
441                $result put output.walltime $times(walltime)
442                $result put output.cputime $times(cputime)
443                if {[info exists env(SESSION)]} {
444                    $result put output.session $env(SESSION)
445                }
446            } else {
447                global errorInfo
448                set result "$result\n$errorInfo"
449            }
450
451            file delete -force -- $file
452        } else {
453            set status 1
454            set result "Can't find result file in output.\nDid you call Rappture
455::result in your simulator?"
456        }
457    } elseif {$job(output) ne "" || $job(error) ne ""} {
458        set result [string trim "$job(output)\n$job(error)"]
459    }
460
461    # log final status for the run
462    if {$status == 0} {
463        _log run finished
464    } else {
465        _log run failed [list $result]
466    }
467
468    return [list $status $result]
469}
470
471# ----------------------------------------------------------------------
472#  Turn the command string from tool.xml into the proper syntax to use
473#  with a submit parameter sweep with a temlate file.  Proper quoting
474# of the template file is necessary to prevent submit from being too smart
475# and converting it to a full pathname.
476# ----------------------------------------------------------------------
477itcl::body Rappture::Task::_build_submit_cmd {cmd tfile params_file} {
478    set quote_next 0
479    set newcmd "submit --progress submit --runName=puq -l -i @:$tfile -d $params_file"
480    set cmds [split $cmd " "]
481    for {set i 0} {$i < [llength $cmds]} {incr i} {
482        set arg [lindex $cmds $i]
483        if {$quote_next == 1} {
484            set nc [string range $arg 0 0]
485            if {$nc != "\""} {
486                set arg "\"\\\"$arg\\\"\""
487            }
488        }
489        if {$arg == "--eval"} {
490            set quote_next 1
491        } else {
492            set quote_next 0
493        }
494        if {$arg == "@driver"} {
495            set arg "\"\\\"$tfile\\\"\""
496        }
497        append newcmd " " $arg
498    }
499    regsub -all @driver $newcmd $tfile newcmd
500    return $newcmd
501}
502
503# ----------------------------------------------------------------------
504# USAGE: _mkdir <directory>
505#
506# Used internally to create the <directory> in the file system.
507# The parent directory is also created, as needed.
508# ----------------------------------------------------------------------
509itcl::body Rappture::Task::_mkdir {dir} {
510    set parent [file dirname $dir]
511    if {$parent ne "." && $parent ne "/"} {
512        if {![file exists $parent]} {
513            _mkdir $parent
514        }
515    }
516    file mkdir $dir
517}
518
519
520# ----------------------------------------------------------------------
521# USAGE: abort
522#
523# Clients use this during a "run" to abort the current job.
524# Kills the job and forces the "run" method to return.
525# ----------------------------------------------------------------------
526itcl::body Rappture::Task::abort {} {
527    _log run abort
528    set job(control) "abort"
529}
530
531# ----------------------------------------------------------------------
532# USAGE: reset
533#
534# Resets all input values to their defaults.  Sometimes used just
535# before a run to reset to a clean state.
536# ----------------------------------------------------------------------
537itcl::body Rappture::Task::reset {} {
538    $_xmlobj copy "" from $_origxml ""
539    foreach path [Rappture::entities -as path $_xmlobj input] {
540        if {[$_xmlobj element -as type $path.default] ne ""} {
541            set defval [$_xmlobj get $path.default]
542            $_xmlobj put $path.current $defval
543        }
544    }
545}
546
547# ----------------------------------------------------------------------
548# USAGE: xml <subcommand> ?<arg> <arg> ...?
549# USAGE: xml object
550#
551# Used by clients to manipulate the underlying XML data for this
552# tool.  The <subcommand> can be any operation supported by a
553# Rappture::library object.  Clients can also request the XML object
554# directly by using the "object" subcommand.
555# ----------------------------------------------------------------------
556itcl::body Rappture::Task::xml {args} {
557    if {"object" == $args} {
558        return $_xmlobj
559    }
560    return [eval $_xmlobj $args]
561}
562
563# ----------------------------------------------------------------------
564# USAGE: save <xmlobj> ?<filename>?
565#
566# Used by clients to save the contents of an <xmlobj> representing
567# a run out to the given file.  If <filename> is not specified, then
568# it uses the -resultsdir and other settings to do what Rappture
569# would normally do with the output.
570# ----------------------------------------------------------------------
571itcl::body Rappture::Task::save {xmlobj {filename ""}} {
572    if {$filename eq ""} {
573        # if there's a results_directory defined in the resources
574        # file, then move the run.xml file there for storage
575        set rdir ""
576        if {$resultdir eq "@default"} {
577            if {[info exists _resources(-resultdir)]} {
578                set rdir $_resources(-resultdir)
579            } else {
580                set rdir "."
581            }
582        } elseif {$resultdir ne ""} {
583            set rdir $resultdir
584        }
585
586        # use the runfile name generated by the last run
587        if {$_lastrun ne ""} {
588            set filename [file join $rdir $_lastrun]
589        } else {
590            set filename [file join $rdir run.xml]
591        }
592    }
593
594    # add any last-minute metadata
595    $xmlobj put output.time [clock format [clock seconds]]
596
597    $xmlobj put tool.version.rappture.version $::Rappture::version
598    $xmlobj put tool.version.rappture.revision $::Rappture::build
599
600    if {[info exists ::tcl_platform(user)]} {
601        $xmlobj put output.user $::tcl_platform(user)
602    }
603
604    # save the output
605    set rdir [file dirname $filename]
606    if {![file exists $rdir]} {
607        _mkdir $rdir
608    }
609
610    set fid [open $filename w]
611    puts $fid "<?xml version=\"1.0\"?>"
612    puts $fid [$xmlobj xml]
613    close $fid
614
615    _log output saved in $filename
616}
617
618# ----------------------------------------------------------------------
619# USAGE: _output <data>
620#
621# Used internally to send each bit of output <data> coming from the
622# tool onto the caller, so the user can see progress.
623# ----------------------------------------------------------------------
624itcl::body Rappture::Task::_output {data} {
625    if {[string length $_outputcb] > 0} {
626        uplevel #0 $_outputcb [list $data]
627    }
628}
629
630# ----------------------------------------------------------------------
631# USAGE: _log <cmd> <arg> <arg> ...
632#
633# Used internally to log interesting events during the run.  If the
634# -logger option is set (to Rappture::Logger::log, or something like
635# that), then the arguments to this method are passed along to the
636# logger and written out to a log file.  Logging is off by default,
637# so this method does nothing unless -logger is set.
638# ----------------------------------------------------------------------
639itcl::body Rappture::Task::_log {args} {
640    if {[string length $logger] > 0} {
641        uplevel #0 $logger [list $args]
642    }
643}
644
645# ----------------------------------------------------------------------
646# USAGE: MiddlewareTime <key> <value> ...
647#
648# Used as the default method for reporting job status information.
649# Implements the old HUBzero method of reporting job status info to
650# stderr, which can then be picked up by the tool session container.
651# Most tools use the "submit" command, which talks directly to a
652# database to log job information, so this isn't really needed.  But
653# it doesn't hurt to have this and it can be useful in some cases.
654# ----------------------------------------------------------------------
655itcl::body Rappture::Task::MiddlewareTime {args} {
656    set line "MiddlewareTime:"
657    foreach {key val} $args {
658        append line " $key=$val"
659    }
660    puts stderr $line
661}
662
663
664#
665# Send the list of parameters to a python program so it can call PUQ
666# and get a CSV file containing the parameter values to use for the runs.
667itcl::body Rappture::Task::_get_params {varlist uq_type uq_args} {
668    set pid [pid]
669    # puts "puq get_params $pid $varlist $uq_type $uq_args"
670    if {[catch {exec puq get_params $pid $varlist $uq_type $uq_args}]} {
671        set fp [open "uq_debug.err" r]
672        set rdata [read $fp]
673        close $fp
674        puts "get_params.py failed: $rdata"
675        error "get_params.py: $rdata"
676    }
677    return params[pid].csv
678}
Note: See TracBrowser for help on using the repository browser.