source: branches/1.5/lang/tcl/scripts/task.tcl @ 6112

Last change on this file since 6112 was 6112, checked in by ldelgass, 9 years ago

merge r6111 from trunk (GetSignal? needs argument)

File size: 24.5 KB
Line 
1# -*- mode: tcl; indent-tabs-mode: nil -*-
2# ----------------------------------------------------------------------
3#  COMPONENT: task - represents the executable part of a tool
4#
5#  This object is an executable version of a Rappture xml file.
6#  A tool is a task plus its graphical user interface.  Each task
7#  resides in an installation directory with other tool resources
8#  (libraries, examples, etc.).  Each task is defined by its inputs
9#  and outputs, and understands the context in which it executes
10#  (via exec, submit, mx, etc.).
11# ======================================================================
12#  AUTHOR:  Michael McLennan, Purdue University
13#  Copyright (c) 2004-2014  HUBzero Foundation, LLC
14#
15#  See the file "license.terms" for information on usage and
16#  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
17# ======================================================================
18package require BLT
19
20itcl::class Rappture::Task {
21    public variable logger ""
22    public variable jobstats Rappture::Task::MiddlewareTime
23    public variable resultdir "@default"
24
25    constructor {xmlobj installdir args} { # defined below }
26    destructor { # defined below }
27
28    public method installdir {} { return $_installdir }
29
30    public method run {args}
31    public method get_uq {args}
32    public method abort {}
33    public method reset {}
34    public method xml {args}
35    public method save {xmlobj {name ""}}
36
37    protected method _mkdir {dir}
38    protected method _output {data}
39    protected method _log {args}
40    protected method _build_submit_cmd {cmd tfile params_file}
41    protected method _get_params {varlist uq_type uq_args}
42    private method GetSignal { signal }
43
44    private variable _xmlobj ""      ;# XML object with inputs/outputs
45    private variable _origxml ""     ;# copy of original XML (for reset)
46    private variable _lastrun ""     ;# name of last run file
47    private variable _installdir ""  ;# installation directory for this tool
48    private variable _outputcb ""    ;# callback for tool output
49    private common job               ;# array var used for blt::bgexec jobs
50    private common jobnum 0          ;# counter for unique job number
51
52    # get global resources for this tool session
53    public proc resources {{option ""}}
54
55    public common _resources
56    public proc setAppName {name}   { set _resources(-appname) $name }
57    public proc setHubName {name}   { set _resources(-hubname) $name }
58    public proc setHubURL {name}    { set _resources(-huburl) $name }
59    public proc setSession {name}   { set _resources(-session) $name }
60    public proc setJobPrt {name}    { set _resources(-jobprotocol) $name }
61    public proc setResultDir {name} { set _resources(-resultdir) $name }
62
63    # default method for -jobstats control
64    public proc MiddlewareTime {args}
65}
66
67# must use this name -- plugs into Rappture::resources::load
68proc task_init_resources {} {
69    Rappture::resources::register \
70        application_name  Rappture::Task::setAppName \
71        application_id    Rappture::Task::setAppId \
72        hub_name          Rappture::Task::setHubName \
73        hub_url           Rappture::Task::setHubURL \
74        session_token     Rappture::Task::setSession \
75        job_protocol      Rappture::Task::setJobPrt \
76        results_directory Rappture::Task::setResultDir
77}
78
79# ----------------------------------------------------------------------
80# CONSTRUCTOR
81# ----------------------------------------------------------------------
82itcl::body Rappture::Task::constructor {xmlobj installdir args} {
83    if {![Rappture::library isvalid $xmlobj]} {
84        error "bad value \"$xmlobj\": should be Rappture::Library"
85    }
86    set _xmlobj $xmlobj
87
88    # stash a copy of the original XML for later "reset" operations
89    set _origxml [Rappture::LibraryObj ::#auto "<?xml version=\"1.0\"?><run/>"]
90    $_origxml copy "" from $_xmlobj ""
91
92    if {![file exists $installdir]} {
93        error "directory \"$installdir\" doesn't exist"
94    }
95    set _installdir $installdir
96
97    eval configure $args
98}
99
100# ----------------------------------------------------------------------
101# DESTRUCTOR
102# ----------------------------------------------------------------------
103itcl::body Rappture::Task::destructor {} {
104    itcl::delete object $_origxml
105}
106
107# ----------------------------------------------------------------------
108# USAGE: resources ?-option?
109#
110# Clients use this to query information about the tool.
111# ----------------------------------------------------------------------
112itcl::body Rappture::Task::resources {{option ""}} {
113    if {$option == ""} {
114        return [array get _resources]
115    }
116    if {[info exists _resources($option)]} {
117        return $_resources($option)
118    }
119    return ""
120}
121
122itcl::body Rappture::Task::GetSignal {code} {
123    set signals {
124        HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV
125        USR2 PIPE ALRM TERM STKFLT CHLD CONT STOP TSTP TTIN
126        TTOU URG XCPU XFSZ VTALRM PROF WINCH POLL PWR SYS
127        RTMIN RTMIN+1 RTMIN+2 RTMIN+3 RTMAX-3 RTMAX-2 RTMAX-1 RTMAX
128    }
129    set sigNum [expr $code - 128]
130    if { $sigNum > 0 && $sigNum < [llength $signals] } {
131        return [lindex $signals $sigNum]
132    }
133    return "unknown exit code \"$code\""
134}
135
136itcl::body Rappture::Task::get_uq {args} {
137    foreach {path val} $args {
138        if {$path == "-uq_type"} {
139            set uq_type $val
140        } elseif {$path == "-uq_args"} {
141            set uq_args $val
142        }
143    }
144    #set varlist [$_xmlobj uq_get_vars]
145    foreach {varlist num} [$_xmlobj uq_get_vars] break
146    return [Rappture::UQ ::#auto $varlist $num $uq_type $uq_args]
147}
148
149# ----------------------------------------------------------------------
150# USAGE: run ?<path1> <value1> <path2> <value2> ...? ?-output <callbk>?
151#
152# This method causes the tool to run.  A "driver.xml" file is created
153# as the input for the run.  That file is fed to the executable
154# according to the <tool><command> string, and the job is executed.
155#
156# Any "<path> <value>" arguments are used to override the current
157# settings from the GUI.  This is useful, for example, when filling
158# in missing simulation results from the analyzer.
159#
160# If the -output argument is included, then the next arg is a
161# callback command for output messages.  Any output that comes in
162# while the tool is running is sent back to the caller, so the user
163# can see progress running the tool.
164#
165# Returns a list of the form {status result}, where status is an
166# integer status code (0=success) and result is the output from the
167# simulator.  Successful output is something like {0 run1293921.xml},
168# where 0=success and run1293921.xml is the name of the file containing
169# results.
170# ----------------------------------------------------------------------
171itcl::body Rappture::Task::run {args} {
172    global env errorInfo
173
174    #
175    # Make sure that we save the proper application name.
176    # Actually, the best place to get this information is
177    # straight from the "installtool" script, but just in
178    # case we have an older tool, we should insert the
179    # tool name from the resources config file.
180    #
181    if {[info exists _resources(-appname)]
182          && $_resources(-appname) ne ""
183          && [$_xmlobj get tool.name] eq ""} {
184        $_xmlobj put tool.name $_resources(-appname)
185    }
186
187    # if there are any args, use them to override parameters
188    set _outputcb ""
189    set uq_type ""
190    foreach {path val} $args {
191        if {$path == "-output"} {
192            set _outputcb $val
193        } elseif {$path == "-uq_type"} {
194            set uq_type $val
195        } elseif {$path == "-uq_args"} {
196            set uq_args $val
197        } else {
198            $_xmlobj put $path.current $val
199        }
200    }
201
202    foreach item {control output error} { set job($item) "" }
203
204    # Set limits for cpu time
205    set limit [$_xmlobj get tool.limits.cputime]
206    if { $limit == "unlimited" } {
207        set limit 43200;                # 12 hours
208    } else {
209        if { [scan $limit "%d" dum] != 1 } {
210            set limit 14400;            # 4 hours by default
211        } elseif { $limit > 43200 } {
212            set limit 43200;            # limit to 12 hrs.
213        } elseif { $limit < 10 } {
214            set limit 10;               # lower bound is 10 seconds.
215        }
216    }
217    Rappture::rlimit set cputime $limit
218
219    # write out the driver.xml file for the tool
220    global rapptureInfo
221    set file [file join $rapptureInfo(cwd) "driver[pid].xml"]
222    set status [catch {
223        set fid [open $file w]
224        puts $fid "<?xml version=\"1.0\"?>"
225        puts $fid [$_xmlobj xml]
226        close $fid
227    } result]
228
229    if {$uq_type != ""} {
230        # Copy xml into a new file
231        set tfile "template[pid].xml"
232        set fid [open $tfile w]
233        puts $fid "<?xml version=\"1.0\"?>"
234        puts $fid [$_xmlobj xml]
235        close $fid
236
237        # Return a list of the UQ variables and their PDFs.
238        # Also turns $tfile into a template file.
239        set uq_varlist [lindex [$_xmlobj uq_get_vars $tfile] 0]
240    }
241
242
243    # execute the tool using the path from the tool description
244    if {$status == 0} {
245        set cmd [$_xmlobj get tool.command]
246        regsub -all @tool $cmd $_installdir cmd
247        set cmd [string trimleft $cmd " "]
248
249        if { $cmd == "" } {
250            puts stderr "cmd is empty"
251            return [list 1 "Command is empty.\n\nThere is no command specified by\n\n <command>\n </command>\n\nin the tool.xml file."]
252        }
253
254        if {$uq_type == ""} {
255            regsub -all @driver $cmd $file cmd
256
257            switch -glob -- [resources -jobprotocol] {
258                "submit*" {
259                    # if job_protocol is "submit", then use use submit command
260                    set cmd "submit --local $cmd"
261                }
262                "mx" {
263                    # metachory submission
264                    set cmd "mx $cmd"
265                }
266                "exec" {
267                    # default -- nothing special
268                }
269            }
270        } else {
271            set params_file [_get_params $uq_varlist $uq_type $uq_args]
272            set cmd [_build_submit_cmd $cmd $tfile $params_file]
273            file delete -force puq
274        }
275
276        $_xmlobj put tool.execute $cmd
277
278        # starting job...
279        set _lastrun ""
280        _log run started
281        Rappture::rusage mark
282
283        if {0 == [string compare -nocase -length 5 $cmd "ECHO "] } {
284            set status 0;
285            set job(output) [string range $cmd 5 end]
286        } else {
287            set status [catch {
288                set ::Rappture::Task::job(control) ""
289                eval blt::bgexec \
290                ::Rappture::Task::job(control) \
291                -keepnewline yes \
292                -killsignal SIGTERM \
293                -onoutput [list [itcl::code $this _output]] \
294                -output ::Rappture::Task::job(output) \
295                -error ::Rappture::Task::job(error) \
296                $cmd
297            } result]
298
299            if { $status != 0 } {
300                # We're here because the exec-ed program failed
301                set logmesg $result
302                if { $::Rappture::Task::job(control) ne "" } {
303                    foreach { token pid code mesg } \
304                    $::Rappture::Task::job(control) break
305                    if { $token == "EXITED" } {
306                       # This means that the program exited normally but
307                       # returned a non-zero exitcode.  Consider this an
308                       # invalid result from the program.  Append the stderr
309                       # from the program to the message.
310                       if {$code > 128} {
311                          set logmesg "Program signaled: signal was [GetSignal $code]"
312                       } else {
313                          set logmesg "Program finished: exit code is $code"
314                       }
315                       set result "$logmesg\n\n$::Rappture::Task::job(error)"
316                    } elseif { $token == "abort" }  {
317                        # The user pressed the abort button.
318                        set logmesg "Program terminated by user."
319                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
320                    } else {
321                        # Abnormal termination
322                        set logmesg "Abnormal program termination: $mesg"
323                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
324                    }
325                }
326                _log run failed [list $logmesg]
327                return [list $status $result]
328            }
329        }
330        # ...job is finished
331        array set times [Rappture::rusage measure]
332
333        if {[resources -jobprotocol] ne "submit"} {
334            set id [$_xmlobj get tool.id]
335            set vers [$_xmlobj get tool.version.application.revision]
336            set simulation simulation
337            if { $id ne "" && $vers ne "" } {
338                set pid [pid]
339                set simulation ${pid}_${id}_r${vers}
340            }
341
342            # need to save job info? then invoke the callback
343            if {[string length $jobstats] > 0} {
344                uplevel #0 $jobstats [list job [incr jobnum] \
345                event $simulation start $times(start) \
346                walltime $times(walltime) cputime $times(cputime) \
347                status $status]
348            }
349
350            #
351            # Scan through stderr channel and look for statements that
352            # represent grid jobs that were executed.  The statements
353            # look like this:
354            #
355            # MiddlewareTime: job=1 event=simulation start=3.001094 ...
356            #
357            set subjobs 0
358            while {[regexp -indices {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} $job(error) match]} {
359                foreach {p0 p1} $match break
360                if {[string index $job(error) $p0] == "\n"} { incr p0 }
361
362                catch {unset data}
363                array set data {
364                    job 1
365                    event simulation
366                    start 0
367                    walltime 0
368                    cputime 0
369                    status 0
370                }
371                foreach arg [lrange [string range $job(error) $p0 $p1] 1 end] {
372                    foreach {key val} [split $arg =] break
373                    set data($key) $val
374                }
375                set data(job) [expr {$jobnum+$data(job)}]
376                set data(event) "subsimulation"
377                set data(start) [expr {$times(start)+$data(start)}]
378
379                set details ""
380                foreach key {job event start walltime cputime status} {
381                    # add required keys in a particular order
382                    lappend details $key $data($key)
383                    unset data($key)
384                }
385                foreach key [array names data] {
386                    # add anything else that the client gave -- venue, etc.
387                    lappend details $key $data($key)
388                }
389
390                if {[string length $jobstats] > 0} {
391                    uplevel #0 $jobstats $details
392                }
393
394                incr subjobs
395
396                # done -- remove this statement
397                set job(error) [string replace $job(error) $p0 $p1]
398            }
399            incr jobnum $subjobs
400        }
401
402    } else {
403        set job(error) "$result\n$errorInfo"
404    }
405    if {$status == 0} {
406        # file delete -force -- $file
407    }
408
409    # see if the job was aborted
410    if {[regexp {^KILLED} $job(control)]} {
411        _log run aborted
412        return [list 0 "ABORT"]
413    }
414
415    #
416    # If successful, return the output, which should include
417    # a reference to the run.xml file containing results.
418    #
419
420    if {$status == 0} {
421        set result [string trim $job(output)]
422
423        if {$uq_type != ""} {
424            # UQ. Collect data from all jobs and put it in one xml run file.
425            file delete -force -- run_uq.xml
426            if {[catch {exec puq analyze puq_[pid].hdf5} res]} {
427                set fp [open "uq_debug.err" r]
428                set rdata [read $fp]
429                close $fp
430                puts "PUQ analysis failed: $res\n$rdata"
431                error "UQ analysis failed: $res\n$rdata"
432            } else {
433                append result "\n" $res
434            }
435        }
436        if {[regexp {=RAPPTURE-RUN=>([^\n]+)} $result match file]} {
437            set _lastrun $file
438
439            set status [catch {Rappture::library $file} result]
440            if {$status == 0} {
441                # add cputime info to run.xml file
442                $result put output.walltime $times(walltime)
443                $result put output.cputime $times(cputime)
444                if {[info exists env(SESSION)]} {
445                    $result put output.session $env(SESSION)
446                }
447            } else {
448                global errorInfo
449                set result "$result\n$errorInfo"
450            }
451
452            file delete -force -- $file
453        } else {
454            set status 1
455            set result "Can't find result file in output.\nDid you call Rappture
456::result in your simulator?"
457        }
458    } elseif {$job(output) ne "" || $job(error) ne ""} {
459        set result [string trim "$job(output)\n$job(error)"]
460    }
461
462    # log final status for the run
463    if {$status == 0} {
464        _log run finished
465    } else {
466        _log run failed [list $result]
467    }
468
469    return [list $status $result]
470}
471
472# ----------------------------------------------------------------------
473#  Turn the command string from tool.xml into the proper syntax to use
474#  with a submit parameter sweep with a temlate file.  Proper quoting
475# of the template file is necessary to prevent submit from being too smart
476# and converting it to a full pathname.
477# ----------------------------------------------------------------------
478itcl::body Rappture::Task::_build_submit_cmd {cmd tfile params_file} {
479    set quote_next 0
480    set newcmd "submit --progress submit --runName=puq -l -i @:$tfile -d $params_file"
481    set cmds [split $cmd " "]
482    for {set i 0} {$i < [llength $cmds]} {incr i} {
483        set arg [lindex $cmds $i]
484        if {$quote_next == 1} {
485            set nc [string range $arg 0 0]
486            if {$nc != "\""} {
487                set arg "\"\\\"$arg\\\"\""
488            }
489        }
490        if {$arg == "--eval"} {
491            set quote_next 1
492        } else {
493            set quote_next 0
494        }
495        if {$arg == "@driver"} {
496            set arg "\"\\\"$tfile\\\"\""
497        }
498        append newcmd " " $arg
499    }
500    regsub -all @driver $newcmd $tfile newcmd
501    return $newcmd
502}
503
504# ----------------------------------------------------------------------
505# USAGE: _mkdir <directory>
506#
507# Used internally to create the <directory> in the file system.
508# The parent directory is also created, as needed.
509# ----------------------------------------------------------------------
510itcl::body Rappture::Task::_mkdir {dir} {
511    set parent [file dirname $dir]
512    if {$parent ne "." && $parent ne "/"} {
513        if {![file exists $parent]} {
514            _mkdir $parent
515        }
516    }
517    file mkdir $dir
518}
519
520
521# ----------------------------------------------------------------------
522# USAGE: abort
523#
524# Clients use this during a "run" to abort the current job.
525# Kills the job and forces the "run" method to return.
526# ----------------------------------------------------------------------
527itcl::body Rappture::Task::abort {} {
528    _log run abort
529    set job(control) "abort"
530}
531
532# ----------------------------------------------------------------------
533# USAGE: reset
534#
535# Resets all input values to their defaults.  Sometimes used just
536# before a run to reset to a clean state.
537# ----------------------------------------------------------------------
538itcl::body Rappture::Task::reset {} {
539    $_xmlobj copy "" from $_origxml ""
540    foreach path [Rappture::entities -as path $_xmlobj input] {
541        if {[$_xmlobj element -as type $path.default] ne ""} {
542            set defval [$_xmlobj get $path.default]
543            $_xmlobj put $path.current $defval
544        }
545    }
546}
547
548# ----------------------------------------------------------------------
549# USAGE: xml <subcommand> ?<arg> <arg> ...?
550# USAGE: xml object
551#
552# Used by clients to manipulate the underlying XML data for this
553# tool.  The <subcommand> can be any operation supported by a
554# Rappture::library object.  Clients can also request the XML object
555# directly by using the "object" subcommand.
556# ----------------------------------------------------------------------
557itcl::body Rappture::Task::xml {args} {
558    if {"object" == $args} {
559        return $_xmlobj
560    }
561    return [eval $_xmlobj $args]
562}
563
564# ----------------------------------------------------------------------
565# USAGE: save <xmlobj> ?<filename>?
566#
567# Used by clients to save the contents of an <xmlobj> representing
568# a run out to the given file.  If <filename> is not specified, then
569# it uses the -resultsdir and other settings to do what Rappture
570# would normally do with the output.
571# ----------------------------------------------------------------------
572itcl::body Rappture::Task::save {xmlobj {filename ""}} {
573    if {$filename eq ""} {
574        # if there's a results_directory defined in the resources
575        # file, then move the run.xml file there for storage
576        set rdir ""
577        if {$resultdir eq "@default"} {
578            if {[info exists _resources(-resultdir)]} {
579                set rdir $_resources(-resultdir)
580            } else {
581                global rapptureInfo
582                set rdir $rapptureInfo(cwd)
583            }
584        } elseif {$resultdir ne ""} {
585            set rdir $resultdir
586        }
587
588        # use the runfile name generated by the last run
589        if {$_lastrun ne ""} {
590            set filename [file join $rdir $_lastrun]
591        } else {
592            set filename [file join $rdir run.xml]
593        }
594    }
595
596    # add any last-minute metadata
597    $xmlobj put output.time [clock format [clock seconds]]
598
599    $xmlobj put tool.version.rappture.version $::Rappture::version
600    $xmlobj put tool.version.rappture.revision $::Rappture::build
601    $xmlobj put output.filename $filename
602    $xmlobj put output.version $Rappture::version
603   
604    if {[info exists ::tcl_platform(user)]} {
605        $xmlobj put output.user $::tcl_platform(user)
606    }
607
608    # save the output
609    set rdir [file dirname $filename]
610    if {![file exists $rdir]} {
611        _mkdir $rdir
612    }
613
614    set fid [open $filename w]
615    puts $fid "<?xml version=\"1.0\"?>"
616    puts $fid [$xmlobj xml]
617    close $fid
618
619    _log output saved in $filename
620}
621
622# ----------------------------------------------------------------------
623# USAGE: _output <data>
624#
625# Used internally to send each bit of output <data> coming from the
626# tool onto the caller, so the user can see progress.
627# ----------------------------------------------------------------------
628itcl::body Rappture::Task::_output {data} {
629    if {[string length $_outputcb] > 0} {
630        uplevel #0 $_outputcb [list $data]
631    }
632}
633
634# ----------------------------------------------------------------------
635# USAGE: _log <cmd> <arg> <arg> ...
636#
637# Used internally to log interesting events during the run.  If the
638# -logger option is set (to Rappture::Logger::log, or something like
639# that), then the arguments to this method are passed along to the
640# logger and written out to a log file.  Logging is off by default,
641# so this method does nothing unless -logger is set.
642# ----------------------------------------------------------------------
643itcl::body Rappture::Task::_log {args} {
644    if {[string length $logger] > 0} {
645        uplevel #0 $logger [list $args]
646    }
647}
648
649# ----------------------------------------------------------------------
650# USAGE: MiddlewareTime <key> <value> ...
651#
652# Used as the default method for reporting job status information.
653# Implements the old HUBzero method of reporting job status info to
654# stderr, which can then be picked up by the tool session container.
655# Most tools use the "submit" command, which talks directly to a
656# database to log job information, so this isn't really needed.  But
657# it doesn't hurt to have this and it can be useful in some cases.
658# ----------------------------------------------------------------------
659itcl::body Rappture::Task::MiddlewareTime {args} {
660    set line "MiddlewareTime:"
661    foreach {key val} $args {
662        append line " $key=$val"
663    }
664    puts stderr $line
665}
666
667
668#
669# Send the list of parameters to a python program so it can call PUQ
670# and get a CSV file containing the parameter values to use for the runs.
671itcl::body Rappture::Task::_get_params {varlist uq_type uq_args} {
672    set pid [pid]
673    # puts "puq get_params $pid $varlist $uq_type $uq_args"
674    if {[catch {exec puq get_params $pid $varlist $uq_type $uq_args}]} {
675        set fp [open "uq_debug.err" r]
676        set rdata [read $fp]
677        close $fp
678        puts "get_params.py failed: $rdata"
679        error "get_params.py: $rdata"
680    }
681    return params[pid].csv
682}
Note: See TracBrowser for help on using the repository browser.