source: branches/uq/lang/tcl/scripts/task.tcl @ 5169

Last change on this file since 5169 was 5169, checked in by mmh, 10 years ago

fix multiple simulations

File size: 24.2 KB
Line 
1# -*- mode: tcl; indent-tabs-mode: nil -*-
2# ----------------------------------------------------------------------
3#  COMPONENT: task - represents the executable part of a tool
4#
5#  This object is an executable version of a Rappture xml file.
6#  A tool is a task plus its graphical user interface.  Each task
7#  resides in an installation directory with other tool resources
8#  (libraries, examples, etc.).  Each task is defined by its inputs
9#  and outputs, and understands the context in which it executes
10#  (via exec, submit, mx, etc.).
11# ======================================================================
12#  AUTHOR:  Michael McLennan, Purdue University
13#  Copyright (c) 2004-2014  HUBzero Foundation, LLC
14#
15#  See the file "license.terms" for information on usage and
16#  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
17# ======================================================================
18package require BLT
19
20itcl::class Rappture::Task {
21    public variable logger ""
22    public variable jobstats Rappture::Task::MiddlewareTime
23    public variable resultdir "@default"
24
25    constructor {xmlobj installdir args} { # defined below }
26    destructor { # defined below }
27
28    public method installdir {} { return $_installdir }
29
30    public method run {args}
31    public method get_uq {args}
32    public method abort {}
33    public method reset {}
34    public method xml {args}
35    public method save {xmlobj {name ""}}
36
37    protected method _mkdir {dir}
38    protected method _output {data}
39    protected method _log {args}
40    protected method _build_submit_cmd {cmd tfile params_file}
41    protected method _get_params {varlist uq_type uq_args}
42
43    private variable _xmlobj ""      ;# XML object with inputs/outputs
44    private variable _origxml ""     ;# copy of original XML (for reset)
45    private variable _lastrun ""     ;# name of last run file
46    private variable _installdir ""  ;# installation directory for this tool
47    private variable _outputcb ""    ;# callback for tool output
48    private common job               ;# array var used for blt::bgexec jobs
49    private common jobnum 0          ;# counter for unique job number
50
51    # get global resources for this tool session
52    public proc resources {{option ""}}
53
54    public common _resources
55    public proc setAppName {name}   { set _resources(-appname) $name }
56    public proc setHubName {name}   { set _resources(-hubname) $name }
57    public proc setHubURL {name}    { set _resources(-huburl) $name }
58    public proc setSession {name}   { set _resources(-session) $name }
59    public proc setJobPrt {name}    { set _resources(-jobprotocol) $name }
60    public proc setResultDir {name} { set _resources(-resultdir) $name }
61
62    # default method for -jobstats control
63    public proc MiddlewareTime {args}
64}
65
66# must use this name -- plugs into Rappture::resources::load
67proc task_init_resources {} {
68    Rappture::resources::register \
69        application_name  Rappture::Task::setAppName \
70        application_id    Rappture::Task::setAppId \
71        hub_name          Rappture::Task::setHubName \
72        hub_url           Rappture::Task::setHubURL \
73        session_token     Rappture::Task::setSession \
74        job_protocol      Rappture::Task::setJobPrt \
75        results_directory Rappture::Task::setResultDir
76}
77
78# ----------------------------------------------------------------------
79# CONSTRUCTOR
80# ----------------------------------------------------------------------
81itcl::body Rappture::Task::constructor {xmlobj installdir args} {
82    puts "Task Init"
83    if {![Rappture::library isvalid $xmlobj]} {
84        error "bad value \"$xmlobj\": should be Rappture::Library"
85    }
86    set _xmlobj $xmlobj
87
88    # stash a copy of the original XML for later "reset" operations
89    set _origxml [Rappture::LibraryObj ::#auto "<?xml version=\"1.0\"?><run/>"]
90    $_origxml copy "" from $_xmlobj ""
91
92    if {![file exists $installdir]} {
93        error "directory \"$installdir\" doesn't exist"
94    }
95    set _installdir $installdir
96
97    eval configure $args
98}
99
100# ----------------------------------------------------------------------
101# DESTRUCTOR
102# ----------------------------------------------------------------------
103itcl::body Rappture::Task::destructor {} {
104    itcl::delete object $_origxml
105}
106
107# ----------------------------------------------------------------------
108# USAGE: resources ?-option?
109#
110# Clients use this to query information about the tool.
111# ----------------------------------------------------------------------
112itcl::body Rappture::Task::resources {{option ""}} {
113    if {$option == ""} {
114        return [array get _resources]
115    }
116    if {[info exists _resources($option)]} {
117        return $_resources($option)
118    }
119    return ""
120}
121
122itcl::body Rappture::Task::get_uq {args} {
123    puts "Task::get_uq $args"
124    foreach {path val} $args {
125        if {$path == "-uq_type"} {
126            set uq_type $val
127        } elseif {$path == "-uq_args"} {
128            set uq_args $val
129        }
130    }
131    set varlist [$_xmlobj uq_get_vars]
132    return [Rappture::UQ ::#auto $varlist $uq_type $uq_args]
133}
134
135# ----------------------------------------------------------------------
136# USAGE: run ?<path1> <value1> <path2> <value2> ...? ?-output <callbk>?
137#
138# This method causes the tool to run.  A "driver.xml" file is created
139# as the input for the run.  That file is fed to the executable
140# according to the <tool><command> string, and the job is executed.
141#
142# Any "<path> <value>" arguments are used to override the current
143# settings from the GUI.  This is useful, for example, when filling
144# in missing simulation results from the analyzer.
145#
146# If the -output argument is included, then the next arg is a
147# callback command for output messages.  Any output that comes in
148# while the tool is running is sent back to the caller, so the user
149# can see progress running the tool.
150#
151# Returns a list of the form {status result}, where status is an
152# integer status code (0=success) and result is the output from the
153# simulator.  Successful output is something like {0 run1293921.xml},
154# where 0=success and run1293921.xml is the name of the file containing
155# results.
156# ----------------------------------------------------------------------
157itcl::body Rappture::Task::run {args} {
158    global env errorInfo
159    puts "task run $args"
160    #
161    # Make sure that we save the proper application name.
162    # Actually, the best place to get this information is
163    # straight from the "installtool" script, but just in
164    # case we have an older tool, we should insert the
165    # tool name from the resources config file.
166    #
167    if {[info exists _resources(-appname)]
168          && $_resources(-appname) ne ""
169          && [$_xmlobj get tool.name] eq ""} {
170        $_xmlobj put tool.name $_resources(-appname)
171    }
172
173    # if there are any args, use them to override parameters
174    set _outputcb ""
175    set uq_type ""
176    foreach {path val} $args {
177        if {$path == "-output"} {
178            set _outputcb $val
179        } elseif {$path == "-uq_type"} {
180            set uq_type $val
181        } elseif {$path == "-uq_args"} {
182            set uq_args $val
183        } else {
184            $_xmlobj put $path.current $val
185        }
186    }
187
188    foreach item {control output error} { set job($item) "" }
189
190    # Set limits for cpu time
191    set limit [$_xmlobj get tool.limits.cputime]
192    if { $limit == "unlimited" } {
193        set limit 43200;                # 12 hours
194    } else {
195        if { [scan $limit "%d" dum] != 1 } {
196            set limit 14400;            # 4 hours by default
197        } elseif { $limit > 43200 } {
198            set limit 43200;            # limit to 12 hrs.
199        } elseif { $limit < 10 } {
200            set limit 10;               # lower bound is 10 seconds.
201        }
202    }
203    Rappture::rlimit set cputime $limit
204
205    # write out the driver.xml file for the tool
206    set file "driver[pid].xml"
207    set status [catch {
208        set fid [open $file w]
209        puts $fid "<?xml version=\"1.0\"?>"
210        puts $fid [$_xmlobj xml]
211        close $fid
212    } result]
213
214    if {$uq_type != ""} {
215        # Copy xml into a new file
216        set tfile "template[pid].xml"
217        set fid [open $tfile w]
218        puts $fid "<?xml version=\"1.0\"?>"
219        puts $fid [$_xmlobj xml]
220        close $fid
221
222        # Return a list of the UQ variables and their PDFs.
223        # Also turns $tfile into a template file.
224        set uq_varlist [$_xmlobj uq_get_vars $tfile]
225    }
226
227
228    # execute the tool using the path from the tool description
229    if {$status == 0} {
230        set cmd [$_xmlobj get tool.command]
231        regsub -all @tool $cmd $_installdir cmd
232        set cmd [string trimleft $cmd " "]
233        puts "1. cmd=$cmd"
234
235        if { $cmd == "" } {
236            puts stderr "cmd is empty"
237            return [list 1 "Command is empty.\n\nThere is no command specified by\n\n <command>\n </command>\n\nin the tool.xml file."]
238        }
239
240        if {$uq_type == ""} {
241            regsub -all @driver $cmd $file cmd
242
243            switch -glob -- [resources -jobprotocol] {
244                "submit*" {
245                    # if job_protocol is "submit", then use use submit command
246                    set cmd "submit --local $cmd"
247                }
248                "mx" {
249                    # metachory submission
250                    set cmd "mx $cmd"
251                }
252                "exec" {
253                    # default -- nothing special
254                }
255            }
256        } else {
257            set params_file [_get_params $uq_varlist $uq_type $uq_args]
258            set cmd [_build_submit_cmd $cmd $tfile $params_file]
259            file delete -force puq
260        }
261
262        $_xmlobj put tool.execute $cmd
263
264        puts "cmd=$cmd"
265        # starting job...
266        set _lastrun ""
267        _log run started
268        Rappture::rusage mark
269
270        if {0 == [string compare -nocase -length 5 $cmd "ECHO "] } {
271            set status 0;
272            set job(output) [string range $cmd 5 end]
273        } else {
274            set status [catch {
275                set ::Rappture::Task::job(control) ""
276                eval blt::bgexec \
277                ::Rappture::Task::job(control) \
278                -keepnewline yes \
279                -killsignal SIGTERM \
280                -onoutput [list [itcl::code $this _output]] \
281                -output ::Rappture::Task::job(output) \
282                -error ::Rappture::Task::job(error) \
283                $cmd
284            } result]
285
286            if { $status != 0 } {
287                # We're here because the exec-ed program failed
288                set logmesg $result
289                if { $::Rappture::Task::job(control) ne "" } {
290                    foreach { token pid code mesg } \
291                    $::Rappture::Task::job(control) break
292                    if { $token == "EXITED" } {
293                       # This means that the program exited normally but
294                       # returned a non-zero exitcode.  Consider this an
295                       # invalid result from the program.  Append the stderr
296                       # from the program to the message.
297                       set logmesg "Program finished: exit code is $code"
298                       set result "$logmesg\n\n$::Rappture::Task::job(error)"
299                    } elseif { $token == "abort" }  {
300                        # The user pressed the abort button.
301                        set logmesg "Program terminated by user."
302                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
303                    } else {
304                        # Abnormal termination
305                        set logmesg "Abnormal program termination: $mesg"
306                        set result "$logmesg\n\n$::Rappture::Task::job(output)"
307                    }
308                }
309                _log run failed [list $logmesg]
310                return [list $status $result]
311            }
312        }
313        # ...job is finished
314        array set times [Rappture::rusage measure]
315
316        if {[resources -jobprotocol] ne "submit"} {
317            set id [$_xmlobj get tool.id]
318            set vers [$_xmlobj get tool.version.application.revision]
319            set simulation simulation
320            if { $id ne "" && $vers ne "" } {
321                set pid [pid]
322                set simulation ${pid}_${id}_r${vers}
323            }
324
325            # need to save job info? then invoke the callback
326            if {[string length $jobstats] > 0} {
327                uplevel #0 $jobstats [list job [incr jobnum] \
328                event $simulation start $times(start) \
329                walltime $times(walltime) cputime $times(cputime) \
330                status $status]
331            }
332
333            #
334            # Scan through stderr channel and look for statements that
335            # represent grid jobs that were executed.  The statements
336            # look like this:
337            #
338            # MiddlewareTime: job=1 event=simulation start=3.001094 ...
339            #
340            set subjobs 0
341            while {[regexp -indices {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} $job(error) match]} {
342                foreach {p0 p1} $match break
343                if {[string index $job(error) $p0] == "\n"} { incr p0 }
344
345                catch {unset data}
346                array set data {
347                    job 1
348                    event simulation
349                    start 0
350                    walltime 0
351                    cputime 0
352                    status 0
353                }
354                foreach arg [lrange [string range $job(error) $p0 $p1] 1 end] {
355                    foreach {key val} [split $arg =] break
356                    set data($key) $val
357                }
358                set data(job) [expr {$jobnum+$data(job)}]
359                set data(event) "subsimulation"
360                set data(start) [expr {$times(start)+$data(start)}]
361
362                set details ""
363                foreach key {job event start walltime cputime status} {
364                    # add required keys in a particular order
365                    lappend details $key $data($key)
366                    unset data($key)
367                }
368                foreach key [array names data] {
369                    # add anything else that the client gave -- venue, etc.
370                    lappend details $key $data($key)
371                }
372
373                if {[string length $jobstats] > 0} {
374                    uplevel #0 $jobstats $details
375                }
376
377                incr subjobs
378
379                # done -- remove this statement
380                set job(error) [string replace $job(error) $p0 $p1]
381            }
382            incr jobnum $subjobs
383        }
384
385    } else {
386        set job(error) "$result\n$errorInfo"
387    }
388    if {$status == 0} {
389        # file delete -force -- $file
390    }
391
392    # see if the job was aborted
393    if {[regexp {^KILLED} $job(control)]} {
394        _log run aborted
395        return [list 0 "ABORT"]
396    }
397
398    #
399    # If successful, return the output, which should include
400    # a reference to the run.xml file containing results.
401    #
402
403    if {$status == 0} {
404        set result [string trim $job(output)]
405
406        if {$uq_type != ""} {
407            # UQ. Collect data from all jobs and put it in one xml run file.
408            #file delete -force -- run_uq.xml
409            set res [exec puq_analyze.py puq_[pid].hdf5]
410            append result "\n" $res
411        }
412        if {[regexp {=RAPPTURE-RUN=>([^\n]+)} $result match file]} {
413            set _lastrun $file
414
415            set status [catch {Rappture::library $file} result]
416            puts "STATUS=$status"
417            if {$status == 0} {
418                # add cputime info to run.xml file
419                $result put output.walltime $times(walltime)
420                $result put output.cputime $times(cputime)
421                if {[info exists env(SESSION)]} {
422                    $result put output.session $env(SESSION)
423                }
424            } else {
425                global errorInfo
426                set result "$result\n$errorInfo"
427            }
428
429            file delete -force -- $file
430        } else {
431            set status 1
432            set result "Can't find result file in output.\nDid you call Rappture
433::result in your simulator?"
434        }
435    } elseif {$job(output) ne "" || $job(error) ne ""} {
436        set result [string trim "$job(output)\n$job(error)"]
437    }
438
439    # log final status for the run
440    if {$status == 0} {
441        _log run finished
442    } else {
443        _log run failed [list $result]
444    }
445
446    return [list $status $result]
447}
448
449# ----------------------------------------------------------------------
450#  Turn the command string from tool.xml into the proper syntax to use
451#  with a submit parameter sweep with a temlate file.  Proper quoting
452# of the template file is necessary to prevent submit from being too smart
453# and converting it to a full pathname.
454# ----------------------------------------------------------------------
455itcl::body Rappture::Task::_build_submit_cmd {cmd tfile params_file} {
456    puts "BSC $cmd $tfile $params_file"
457
458    set quote_next 0
459    set newcmd "submit --runName=puq -l -i @:$tfile -d $params_file"
460    set cmds [split $cmd " "]
461    for {set i 0} {$i < [llength $cmds]} {incr i} {
462        set arg [lindex $cmds $i]
463        if {$quote_next == 1} {
464            set nc [string range $arg 0 0]
465            if {$nc != "\""} {
466                set arg "\"\\\"$arg\\\"\""
467            }
468        }
469        if {$arg == "--eval"} {
470            set quote_next 1
471        } else {
472            set quote_next 0
473        }
474        if {$arg == "@driver"} {
475            set arg "\"\\\"$tfile\\\"\""
476        }
477        append newcmd " " $arg
478    }
479    puts "BSC2: $newcmd"
480    regsub -all @driver $newcmd $tfile newcmd
481    puts "BSC returning: $newcmd"
482    return $newcmd
483}
484
485# ----------------------------------------------------------------------
486# USAGE: _mkdir <directory>
487#
488# Used internally to create the <directory> in the file system.
489# The parent directory is also created, as needed.
490# ----------------------------------------------------------------------
491itcl::body Rappture::Task::_mkdir {dir} {
492    set parent [file dirname $dir]
493    if {$parent ne "." && $parent ne "/"} {
494        if {![file exists $parent]} {
495            _mkdir $parent
496        }
497    }
498    file mkdir $dir
499}
500
501
502# ----------------------------------------------------------------------
503# USAGE: abort
504#
505# Clients use this during a "run" to abort the current job.
506# Kills the job and forces the "run" method to return.
507# ----------------------------------------------------------------------
508itcl::body Rappture::Task::abort {} {
509    _log run abort
510    set job(control) "abort"
511}
512
513# ----------------------------------------------------------------------
514# USAGE: reset
515#
516# Resets all input values to their defaults.  Sometimes used just
517# before a run to reset to a clean state.
518# ----------------------------------------------------------------------
519itcl::body Rappture::Task::reset {} {
520    $_xmlobj copy "" from $_origxml ""
521    foreach path [Rappture::entities -as path $_xmlobj input] {
522        if {[$_xmlobj element -as type $path.default] ne ""} {
523            set defval [$_xmlobj get $path.default]
524            $_xmlobj put $path.current $defval
525        }
526    }
527}
528
529# ----------------------------------------------------------------------
530# USAGE: xml <subcommand> ?<arg> <arg> ...?
531# USAGE: xml object
532#
533# Used by clients to manipulate the underlying XML data for this
534# tool.  The <subcommand> can be any operation supported by a
535# Rappture::library object.  Clients can also request the XML object
536# directly by using the "object" subcommand.
537# ----------------------------------------------------------------------
538itcl::body Rappture::Task::xml {args} {
539    if {"object" == $args} {
540        return $_xmlobj
541    }
542    return [eval $_xmlobj $args]
543}
544
545# ----------------------------------------------------------------------
546# USAGE: save <xmlobj> ?<filename>?
547#
548# Used by clients to save the contents of an <xmlobj> representing
549# a run out to the given file.  If <filename> is not specified, then
550# it uses the -resultsdir and other settings to do what Rappture
551# would normally do with the output.
552# ----------------------------------------------------------------------
553itcl::body Rappture::Task::save {xmlobj {filename ""}} {
554    if {$filename eq ""} {
555        # if there's a results_directory defined in the resources
556        # file, then move the run.xml file there for storage
557        set rdir ""
558        if {$resultdir eq "@default"} {
559            if {[info exists _resources(-resultdir)]} {
560                set rdir $_resources(-resultdir)
561            } else {
562                set rdir "."
563            }
564        } elseif {$resultdir ne ""} {
565            set rdir $resultdir
566        }
567
568        # use the runfile name generated by the last run
569        if {$_lastrun ne ""} {
570            set filename [file join $rdir $_lastrun]
571        } else {
572            set filename [file join $rdir run.xml]
573        }
574    }
575
576    # add any last-minute metadata
577    $xmlobj put output.time [clock format [clock seconds]]
578
579    $xmlobj put tool.version.rappture.version $::Rappture::version
580    $xmlobj put tool.version.rappture.revision $::Rappture::build
581
582    if {[info exists ::tcl_platform(user)]} {
583        $xmlobj put output.user $::tcl_platform(user)
584    }
585
586    # save the output
587    set rdir [file dirname $filename]
588    if {![file exists $rdir]} {
589        _mkdir $rdir
590    }
591
592    set fid [open $filename w]
593    puts $fid "<?xml version=\"1.0\"?>"
594    puts $fid [$xmlobj xml]
595    close $fid
596
597    _log output saved in $filename
598}
599
600# ----------------------------------------------------------------------
601# USAGE: _output <data>
602#
603# Used internally to send each bit of output <data> coming from the
604# tool onto the caller, so the user can see progress.
605# ----------------------------------------------------------------------
606itcl::body Rappture::Task::_output {data} {
607    if {[string length $_outputcb] > 0} {
608        uplevel #0 $_outputcb [list $data]
609    }
610}
611
612# ----------------------------------------------------------------------
613# USAGE: _log <cmd> <arg> <arg> ...
614#
615# Used internally to log interesting events during the run.  If the
616# -logger option is set (to Rappture::Logger::log, or something like
617# that), then the arguments to this method are passed along to the
618# logger and written out to a log file.  Logging is off by default,
619# so this method does nothing unless -logger is set.
620# ----------------------------------------------------------------------
621itcl::body Rappture::Task::_log {args} {
622    if {[string length $logger] > 0} {
623        uplevel #0 $logger [list $args]
624    }
625}
626
627# ----------------------------------------------------------------------
628# USAGE: MiddlewareTime <key> <value> ...
629#
630# Used as the default method for reporting job status information.
631# Implements the old HUBzero method of reporting job status info to
632# stderr, which can then be picked up by the tool session container.
633# Most tools use the "submit" command, which talks directly to a
634# database to log job information, so this isn't really needed.  But
635# it doesn't hurt to have this and it can be useful in some cases.
636# ----------------------------------------------------------------------
637itcl::body Rappture::Task::MiddlewareTime {args} {
638    set line "MiddlewareTime:"
639    foreach {key val} $args {
640        append line " $key=$val"
641    }
642    puts stderr $line
643}
644
645
646#
647# Send the list of parameters to a python program so it can call PUQ
648# and get a CSV file containing the parameter values to use for the runs.
649itcl::body Rappture::Task::_get_params {varlist uq_type uq_args} {
650    puts "tcl get_params $varlist uq_type=$uq_type args=$uq_args"
651    # convert tcl list of variables to json so python can read it
652    proc varlist2py {inlist} {
653        set ovar "\["
654        set first 1
655        foreach a $inlist {
656            foreach {var val} $a break
657            if {$first == 1} {
658                append ovar \[\"$var\",
659                set first 0
660            } else {
661                append ovar \],\[\"$var\",
662            }
663            switch [lindex $val 0] {
664                gaussian {
665                    append ovar "\[\"gaussian\",[lindex $val 1],[lindex $val 2]\]"
666                }
667                uniform {
668                    append ovar "\[\"uniform\",[lindex $val 1],[lindex $val 2]\]"
669                }
670                default {
671                    append ovar $val
672                }
673            }
674        }
675        append ovar "\]\]"
676        return $ovar
677    }
678
679    set varlist [varlist2py $varlist]
680    set pid [pid]
681    puts "get_params.py $pid $varlist $uq_type $uq_args"
682    exec get_params.py $pid $varlist $uq_type $uq_args
683    return params[pid].csv
684}
Note: See TracBrowser for help on using the repository browser.