[6021] | 1 | # -*- mode: tcl; indent-tabs-mode: nil -*- |
---|
[4127] | 2 | # ---------------------------------------------------------------------- |
---|
| 3 | # COMPONENT: task - represents the executable part of a tool |
---|
| 4 | # |
---|
| 5 | # This object is an executable version of a Rappture xml file. |
---|
| 6 | # A tool is a task plus its graphical user interface. Each task |
---|
| 7 | # resides in an installation directory with other tool resources |
---|
| 8 | # (libraries, examples, etc.). Each task is defined by its inputs |
---|
| 9 | # and outputs, and understands the context in which it executes |
---|
| 10 | # (via exec, submit, mx, etc.). |
---|
| 11 | # ====================================================================== |
---|
| 12 | # AUTHOR: Michael McLennan, Purdue University |
---|
| 13 | # Copyright (c) 2004-2014 HUBzero Foundation, LLC |
---|
| 14 | # |
---|
| 15 | # See the file "license.terms" for information on usage and |
---|
| 16 | # redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
| 17 | # ====================================================================== |
---|
| 18 | package require BLT |
---|
| 19 | |
---|
| 20 | itcl::class Rappture::Task { |
---|
[6497] | 21 | private method CheckForCachedRunFile { driverFile } |
---|
| 22 | private method CollectUQResults {} |
---|
| 23 | private method ExecuteSimulationCommand { cmd } |
---|
| 24 | private method GetCommand {} |
---|
| 25 | private method GetDriverFile {} |
---|
| 26 | private method GetSignal { signal } |
---|
| 27 | private method GetSimulationCommand { driverFile } |
---|
| 28 | private method GetUQErrors {} |
---|
| 29 | private method GetUQSimulationCommand { driverFile } |
---|
| 30 | private method GetUQTemplateFile {} |
---|
| 31 | private method IsCacheable {} |
---|
| 32 | private method LogCachedSimulationUsage {} |
---|
| 33 | private method LogSimulationUsage {} |
---|
| 34 | private method LogSubmittedSimulationUsage {} |
---|
| 35 | private method SetCpuResourceLimit {} |
---|
| 36 | |
---|
[4127] | 37 | public variable logger "" |
---|
| 38 | public variable jobstats Rappture::Task::MiddlewareTime |
---|
| 39 | public variable resultdir "@default" |
---|
| 40 | |
---|
| 41 | constructor {xmlobj installdir args} { # defined below } |
---|
| 42 | destructor { # defined below } |
---|
| 43 | |
---|
| 44 | public method installdir {} { return $_installdir } |
---|
| 45 | |
---|
| 46 | public method run {args} |
---|
[6021] | 47 | public method get_uq {args} |
---|
[4127] | 48 | public method abort {} |
---|
| 49 | public method reset {} |
---|
| 50 | public method xml {args} |
---|
[4970] | 51 | public method save {xmlobj {name ""}} |
---|
[4127] | 52 | |
---|
[6497] | 53 | protected method OnOutput {data} |
---|
| 54 | protected method Log {args} |
---|
| 55 | protected method BuildSubmitCommand {cmd tfile params_file} |
---|
| 56 | protected method GetParamsForUQ {} |
---|
[4127] | 57 | |
---|
| 58 | private variable _xmlobj "" ;# XML object with inputs/outputs |
---|
| 59 | private variable _origxml "" ;# copy of original XML (for reset) |
---|
| 60 | private variable _installdir "" ;# installation directory for this tool |
---|
| 61 | private variable _outputcb "" ;# callback for tool output |
---|
| 62 | private common jobnum 0 ;# counter for unique job number |
---|
[6497] | 63 | private variable _uq |
---|
| 64 | |
---|
| 65 | private variable _job |
---|
| 66 | |
---|
[4127] | 67 | # get global resources for this tool session |
---|
| 68 | public proc resources {{option ""}} |
---|
| 69 | |
---|
| 70 | public common _resources |
---|
[6497] | 71 | public proc setAppName {name} { set _resources(-appname) $name } |
---|
| 72 | public proc setHubName {name} { set _resources(-hubname) $name } |
---|
| 73 | public proc setHubURL {name} { set _resources(-huburl) $name } |
---|
| 74 | public proc setSession {name} { set _resources(-session) $name } |
---|
| 75 | public proc setJobPrt {name} { set _resources(-jobprotocol) $name } |
---|
| 76 | public proc setResultDir {name} { set _resources(-resultdir) $name } |
---|
| 77 | public proc setCacheHosts {name} { set _resources(-cachehosts) $name } |
---|
[4127] | 78 | |
---|
| 79 | # default method for -jobstats control |
---|
| 80 | public proc MiddlewareTime {args} |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | # must use this name -- plugs into Rappture::resources::load |
---|
| 84 | proc task_init_resources {} { |
---|
| 85 | Rappture::resources::register \ |
---|
| 86 | application_name Rappture::Task::setAppName \ |
---|
| 87 | application_id Rappture::Task::setAppId \ |
---|
| 88 | hub_name Rappture::Task::setHubName \ |
---|
| 89 | hub_url Rappture::Task::setHubURL \ |
---|
| 90 | session_token Rappture::Task::setSession \ |
---|
| 91 | job_protocol Rappture::Task::setJobPrt \ |
---|
[6497] | 92 | results_directory Rappture::Task::setResultDir \ |
---|
| 93 | cache_hosts Rappture::Task::setCacheHosts |
---|
[4127] | 94 | } |
---|
| 95 | |
---|
| 96 | # ---------------------------------------------------------------------- |
---|
| 97 | # CONSTRUCTOR |
---|
| 98 | # ---------------------------------------------------------------------- |
---|
| 99 | itcl::body Rappture::Task::constructor {xmlobj installdir args} { |
---|
| 100 | if {![Rappture::library isvalid $xmlobj]} { |
---|
| 101 | error "bad value \"$xmlobj\": should be Rappture::Library" |
---|
| 102 | } |
---|
| 103 | set _xmlobj $xmlobj |
---|
| 104 | |
---|
| 105 | # stash a copy of the original XML for later "reset" operations |
---|
| 106 | set _origxml [Rappture::LibraryObj ::#auto "<?xml version=\"1.0\"?><run/>"] |
---|
| 107 | $_origxml copy "" from $_xmlobj "" |
---|
| 108 | |
---|
| 109 | if {![file exists $installdir]} { |
---|
| 110 | error "directory \"$installdir\" doesn't exist" |
---|
| 111 | } |
---|
| 112 | set _installdir $installdir |
---|
[6497] | 113 | package require http |
---|
| 114 | package require tls |
---|
| 115 | http::register https 443 [list ::tls::socket -tls1 1] |
---|
[4127] | 116 | |
---|
| 117 | eval configure $args |
---|
| 118 | } |
---|
| 119 | |
---|
| 120 | # ---------------------------------------------------------------------- |
---|
| 121 | # DESTRUCTOR |
---|
| 122 | # ---------------------------------------------------------------------- |
---|
| 123 | itcl::body Rappture::Task::destructor {} { |
---|
| 124 | itcl::delete object $_origxml |
---|
| 125 | } |
---|
| 126 | |
---|
| 127 | # ---------------------------------------------------------------------- |
---|
| 128 | # USAGE: resources ?-option? |
---|
| 129 | # |
---|
| 130 | # Clients use this to query information about the tool. |
---|
| 131 | # ---------------------------------------------------------------------- |
---|
| 132 | itcl::body Rappture::Task::resources {{option ""}} { |
---|
| 133 | if {$option == ""} { |
---|
| 134 | return [array get _resources] |
---|
| 135 | } |
---|
| 136 | if {[info exists _resources($option)]} { |
---|
| 137 | return $_resources($option) |
---|
| 138 | } |
---|
| 139 | return "" |
---|
| 140 | } |
---|
| 141 | |
---|
[6039] | 142 | itcl::body Rappture::Task::GetSignal {code} { |
---|
| 143 | set signals { |
---|
[6252] | 144 | xxx HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV |
---|
[6039] | 145 | USR2 PIPE ALRM TERM STKFLT CHLD CONT STOP TSTP TTIN |
---|
| 146 | TTOU URG XCPU XFSZ VTALRM PROF WINCH POLL PWR SYS |
---|
| 147 | RTMIN RTMIN+1 RTMIN+2 RTMIN+3 RTMAX-3 RTMAX-2 RTMAX-1 RTMAX |
---|
| 148 | } |
---|
| 149 | set sigNum [expr $code - 128] |
---|
| 150 | if { $sigNum > 0 && $sigNum < [llength $signals] } { |
---|
| 151 | return [lindex $signals $sigNum] |
---|
| 152 | } |
---|
| 153 | return "unknown exit code \"$code\"" |
---|
| 154 | } |
---|
| 155 | |
---|
[6021] | 156 | itcl::body Rappture::Task::get_uq {args} { |
---|
| 157 | foreach {path val} $args { |
---|
| 158 | if {$path == "-uq_type"} { |
---|
[6497] | 159 | set _uq(type) $val |
---|
[6021] | 160 | } elseif {$path == "-uq_args"} { |
---|
[6497] | 161 | set _uq(args) $val |
---|
[6021] | 162 | } |
---|
| 163 | } |
---|
| 164 | #set varlist [$_xmlobj uq_get_vars] |
---|
| 165 | foreach {varlist num} [$_xmlobj uq_get_vars] break |
---|
[6497] | 166 | return [Rappture::UQ ::#auto $varlist $num $_uq(type) $_uq(args)] |
---|
[6021] | 167 | } |
---|
| 168 | |
---|
[4127] | 169 | # ---------------------------------------------------------------------- |
---|
| 170 | # USAGE: run ?<path1> <value1> <path2> <value2> ...? ?-output <callbk>? |
---|
| 171 | # |
---|
| 172 | # This method causes the tool to run. A "driver.xml" file is created |
---|
| 173 | # as the input for the run. That file is fed to the executable |
---|
| 174 | # according to the <tool><command> string, and the job is executed. |
---|
| 175 | # |
---|
| 176 | # Any "<path> <value>" arguments are used to override the current |
---|
| 177 | # settings from the GUI. This is useful, for example, when filling |
---|
| 178 | # in missing simulation results from the analyzer. |
---|
| 179 | # |
---|
| 180 | # If the -output argument is included, then the next arg is a |
---|
| 181 | # callback command for output messages. Any output that comes in |
---|
| 182 | # while the tool is running is sent back to the caller, so the user |
---|
| 183 | # can see progress running the tool. |
---|
| 184 | # |
---|
| 185 | # Returns a list of the form {status result}, where status is an |
---|
| 186 | # integer status code (0=success) and result is the output from the |
---|
| 187 | # simulator. Successful output is something like {0 run1293921.xml}, |
---|
| 188 | # where 0=success and run1293921.xml is the name of the file containing |
---|
| 189 | # results. |
---|
| 190 | # ---------------------------------------------------------------------- |
---|
| 191 | itcl::body Rappture::Task::run {args} { |
---|
| 192 | global env errorInfo |
---|
| 193 | |
---|
| 194 | # |
---|
[6497] | 195 | # Make sure that we save the proper application name. Actually, the |
---|
| 196 | # best place to get this information is straight from the "installtool" |
---|
| 197 | # script, but just in case we have an older tool, we should insert the |
---|
[4127] | 198 | # tool name from the resources config file. |
---|
| 199 | # |
---|
[6497] | 200 | if {[info exists _resources(-appname)] && $_resources(-appname) ne "" && |
---|
| 201 | [$_xmlobj get tool.name] eq ""} { |
---|
[4127] | 202 | $_xmlobj put tool.name $_resources(-appname) |
---|
| 203 | } |
---|
| 204 | |
---|
| 205 | # if there are any args, use them to override parameters |
---|
| 206 | set _outputcb "" |
---|
[6497] | 207 | set _uq(type) "" |
---|
[4127] | 208 | foreach {path val} $args { |
---|
| 209 | if {$path == "-output"} { |
---|
| 210 | set _outputcb $val |
---|
[6021] | 211 | } elseif {$path == "-uq_type"} { |
---|
[6497] | 212 | set _uq(type) $val |
---|
[6021] | 213 | } elseif {$path == "-uq_args"} { |
---|
[6497] | 214 | set _uq(args) $val |
---|
[4127] | 215 | } else { |
---|
| 216 | $_xmlobj put $path.current $val |
---|
| 217 | } |
---|
| 218 | } |
---|
| 219 | |
---|
[6497] | 220 | # Initialize job array variables |
---|
| 221 | array set _job { |
---|
| 222 | control "" |
---|
| 223 | exitcode 0 |
---|
| 224 | mesg "" |
---|
| 225 | runfile "" |
---|
| 226 | stderr "" |
---|
| 227 | stdout "" |
---|
| 228 | success 0 |
---|
| 229 | xmlobj "" |
---|
[6021] | 230 | } |
---|
| 231 | |
---|
[6497] | 232 | SetCpuResourceLimit |
---|
| 233 | set driverFile [GetDriverFile] |
---|
| 234 | set cached 0 |
---|
| 235 | if { [IsCacheable] } { |
---|
| 236 | puts stderr "Cache checking: [time { |
---|
| 237 | set cached [CheckForCachedRunFile $driverFile] |
---|
| 238 | } ]" |
---|
| 239 | puts stderr "checking cache=$cached" |
---|
[6021] | 240 | } |
---|
[6497] | 241 | if { !$cached } { |
---|
| 242 | if { $_uq(type) != "" } { |
---|
| 243 | set _uq(tfile) [GetUQTemplateFile] |
---|
| 244 | } |
---|
| 245 | if { $_uq(type) == "" } { |
---|
| 246 | set cmd [GetSimulationCommand $driverFile] |
---|
| 247 | } else { |
---|
| 248 | set cmd [GetUQSimulationCommand $driverFile] |
---|
| 249 | } |
---|
[6021] | 250 | if { $cmd == "" } { |
---|
| 251 | puts stderr "cmd is empty" |
---|
[6497] | 252 | append mesg "There is no command specified by\n\n" |
---|
| 253 | append mesg " <command>\n" |
---|
| 254 | append mesg " </command>\n\n" |
---|
| 255 | append mesg "in the tool.xml file." |
---|
| 256 | return [list 1 $mesg] |
---|
[6021] | 257 | } |
---|
[6497] | 258 | Rappture::rusage mark |
---|
| 259 | if { ![ExecuteSimulationCommand $cmd] } { |
---|
| 260 | return [list 1 $_job(mesg)] |
---|
[4127] | 261 | } |
---|
[6497] | 262 | if { [resources -jobprotocol] == "submit" } { |
---|
| 263 | LogSubmittedSimulationUsage |
---|
[4127] | 264 | } else { |
---|
[6497] | 265 | LogSimulationUsage |
---|
[4127] | 266 | } |
---|
| 267 | } else { |
---|
[6497] | 268 | LogCachedSimulationUsage |
---|
[4127] | 269 | } |
---|
[6497] | 270 | if { $_job(success) } { |
---|
| 271 | file delete -force -- $driverFile |
---|
| 272 | Log run finished |
---|
| 273 | return [list 0 $_job(xmlobj)] |
---|
| 274 | } else { |
---|
| 275 | # See if the job was aborted. |
---|
| 276 | if {[regexp {^KILLED} $_job(control)]} { |
---|
| 277 | Log run aborted |
---|
| 278 | return [list 1 "ABORT"] |
---|
[6021] | 279 | } |
---|
[6497] | 280 | Log run failed [list 0 $_job(mesg)] |
---|
| 281 | return [list 1 $_job(mesg)] |
---|
[4127] | 282 | } |
---|
| 283 | } |
---|
| 284 | |
---|
| 285 | # ---------------------------------------------------------------------- |
---|
[6021] | 286 | # Turn the command string from tool.xml into the proper syntax to use |
---|
| 287 | # with a submit parameter sweep with a temlate file. Proper quoting |
---|
| 288 | # of the template file is necessary to prevent submit from being too smart |
---|
| 289 | # and converting it to a full pathname. |
---|
| 290 | # ---------------------------------------------------------------------- |
---|
[6497] | 291 | itcl::body Rappture::Task::BuildSubmitCommand {cmd tfile params_file} { |
---|
[6021] | 292 | set quote_next 0 |
---|
| 293 | set newcmd "submit --progress submit --runName=puq -l -i @:$tfile -d $params_file" |
---|
| 294 | set cmds [split $cmd " "] |
---|
| 295 | for {set i 0} {$i < [llength $cmds]} {incr i} { |
---|
| 296 | set arg [lindex $cmds $i] |
---|
| 297 | if {$quote_next == 1} { |
---|
| 298 | set nc [string range $arg 0 0] |
---|
| 299 | if {$nc != "\""} { |
---|
| 300 | set arg "\"\\\"$arg\\\"\"" |
---|
| 301 | } |
---|
| 302 | } |
---|
| 303 | if {$arg == "--eval"} { |
---|
| 304 | set quote_next 1 |
---|
| 305 | } else { |
---|
| 306 | set quote_next 0 |
---|
| 307 | } |
---|
| 308 | if {$arg == "@driver"} { |
---|
| 309 | set arg "\"\\\"$tfile\\\"\"" |
---|
| 310 | } |
---|
| 311 | append newcmd " " $arg |
---|
| 312 | } |
---|
| 313 | regsub -all @driver $newcmd $tfile newcmd |
---|
| 314 | return $newcmd |
---|
| 315 | } |
---|
| 316 | |
---|
| 317 | # ---------------------------------------------------------------------- |
---|
[4127] | 318 | # USAGE: abort |
---|
| 319 | # |
---|
| 320 | # Clients use this during a "run" to abort the current job. |
---|
| 321 | # Kills the job and forces the "run" method to return. |
---|
| 322 | # ---------------------------------------------------------------------- |
---|
| 323 | itcl::body Rappture::Task::abort {} { |
---|
[6497] | 324 | Log run abort |
---|
| 325 | set _job(control) "abort" |
---|
[4127] | 326 | } |
---|
| 327 | |
---|
| 328 | # ---------------------------------------------------------------------- |
---|
| 329 | # USAGE: reset |
---|
| 330 | # |
---|
| 331 | # Resets all input values to their defaults. Sometimes used just |
---|
| 332 | # before a run to reset to a clean state. |
---|
| 333 | # ---------------------------------------------------------------------- |
---|
| 334 | itcl::body Rappture::Task::reset {} { |
---|
| 335 | $_xmlobj copy "" from $_origxml "" |
---|
| 336 | foreach path [Rappture::entities -as path $_xmlobj input] { |
---|
| 337 | if {[$_xmlobj element -as type $path.default] ne ""} { |
---|
| 338 | set defval [$_xmlobj get $path.default] |
---|
| 339 | $_xmlobj put $path.current $defval |
---|
| 340 | } |
---|
| 341 | } |
---|
| 342 | } |
---|
| 343 | |
---|
| 344 | # ---------------------------------------------------------------------- |
---|
| 345 | # USAGE: xml <subcommand> ?<arg> <arg> ...? |
---|
| 346 | # USAGE: xml object |
---|
| 347 | # |
---|
| 348 | # Used by clients to manipulate the underlying XML data for this |
---|
| 349 | # tool. The <subcommand> can be any operation supported by a |
---|
| 350 | # Rappture::library object. Clients can also request the XML object |
---|
| 351 | # directly by using the "object" subcommand. |
---|
| 352 | # ---------------------------------------------------------------------- |
---|
| 353 | itcl::body Rappture::Task::xml {args} { |
---|
| 354 | if {"object" == $args} { |
---|
| 355 | return $_xmlobj |
---|
| 356 | } |
---|
| 357 | return [eval $_xmlobj $args] |
---|
| 358 | } |
---|
| 359 | |
---|
| 360 | # ---------------------------------------------------------------------- |
---|
[4970] | 361 | # USAGE: save <xmlobj> ?<filename>? |
---|
| 362 | # |
---|
| 363 | # Used by clients to save the contents of an <xmlobj> representing |
---|
| 364 | # a run out to the given file. If <filename> is not specified, then |
---|
| 365 | # it uses the -resultsdir and other settings to do what Rappture |
---|
| 366 | # would normally do with the output. |
---|
| 367 | # ---------------------------------------------------------------------- |
---|
| 368 | itcl::body Rappture::Task::save {xmlobj {filename ""}} { |
---|
| 369 | if {$filename eq ""} { |
---|
[6497] | 370 | |
---|
| 371 | # If there's a results_directory defined in the resources file, |
---|
| 372 | # then move the run.xml file there for storage. |
---|
| 373 | |
---|
[4970] | 374 | set rdir "" |
---|
| 375 | if {$resultdir eq "@default"} { |
---|
| 376 | if {[info exists _resources(-resultdir)]} { |
---|
| 377 | set rdir $_resources(-resultdir) |
---|
| 378 | } else { |
---|
[6351] | 379 | global rapptureInfo |
---|
| 380 | set rdir $rapptureInfo(cwd) |
---|
[4970] | 381 | } |
---|
| 382 | } elseif {$resultdir ne ""} { |
---|
| 383 | set rdir $resultdir |
---|
| 384 | } |
---|
| 385 | |
---|
| 386 | # use the runfile name generated by the last run |
---|
[6497] | 387 | if {$_job(runfile) ne ""} { |
---|
| 388 | set filename [file join $rdir $_job(runfile)] |
---|
[4970] | 389 | } else { |
---|
| 390 | set filename [file join $rdir run.xml] |
---|
| 391 | } |
---|
| 392 | } |
---|
| 393 | |
---|
| 394 | # add any last-minute metadata |
---|
| 395 | $xmlobj put output.time [clock format [clock seconds]] |
---|
| 396 | |
---|
| 397 | $xmlobj put tool.version.rappture.version $::Rappture::version |
---|
| 398 | $xmlobj put tool.version.rappture.revision $::Rappture::build |
---|
[6497] | 399 | $xmlobj put output.filename $filename |
---|
| 400 | $xmlobj put output.version $Rappture::version |
---|
| 401 | |
---|
[4970] | 402 | if {[info exists ::tcl_platform(user)]} { |
---|
| 403 | $xmlobj put output.user $::tcl_platform(user) |
---|
| 404 | } |
---|
| 405 | |
---|
| 406 | # save the output |
---|
| 407 | set rdir [file dirname $filename] |
---|
[6497] | 408 | file mkdir $rdir |
---|
[4970] | 409 | |
---|
| 410 | set fid [open $filename w] |
---|
| 411 | puts $fid "<?xml version=\"1.0\"?>" |
---|
| 412 | puts $fid [$xmlobj xml] |
---|
| 413 | close $fid |
---|
| 414 | |
---|
[6497] | 415 | Log output saved in $filename |
---|
[4970] | 416 | } |
---|
| 417 | |
---|
| 418 | # ---------------------------------------------------------------------- |
---|
[6497] | 419 | # USAGE: OnOutput <data> |
---|
[4127] | 420 | # |
---|
| 421 | # Used internally to send each bit of output <data> coming from the |
---|
| 422 | # tool onto the caller, so the user can see progress. |
---|
| 423 | # ---------------------------------------------------------------------- |
---|
[6497] | 424 | itcl::body Rappture::Task::OnOutput {data} { |
---|
[4127] | 425 | if {[string length $_outputcb] > 0} { |
---|
| 426 | uplevel #0 $_outputcb [list $data] |
---|
| 427 | } |
---|
| 428 | } |
---|
| 429 | |
---|
| 430 | # ---------------------------------------------------------------------- |
---|
[6497] | 431 | # USAGE: Log <cmd> <arg> <arg> ... |
---|
[4127] | 432 | # |
---|
| 433 | # Used internally to log interesting events during the run. If the |
---|
| 434 | # -logger option is set (to Rappture::Logger::log, or something like |
---|
| 435 | # that), then the arguments to this method are passed along to the |
---|
| 436 | # logger and written out to a log file. Logging is off by default, |
---|
| 437 | # so this method does nothing unless -logger is set. |
---|
| 438 | # ---------------------------------------------------------------------- |
---|
[6497] | 439 | itcl::body Rappture::Task::Log {args} { |
---|
[4127] | 440 | if {[string length $logger] > 0} { |
---|
| 441 | uplevel #0 $logger [list $args] |
---|
| 442 | } |
---|
| 443 | } |
---|
| 444 | |
---|
| 445 | # ---------------------------------------------------------------------- |
---|
| 446 | # USAGE: MiddlewareTime <key> <value> ... |
---|
| 447 | # |
---|
| 448 | # Used as the default method for reporting job status information. |
---|
| 449 | # Implements the old HUBzero method of reporting job status info to |
---|
| 450 | # stderr, which can then be picked up by the tool session container. |
---|
| 451 | # Most tools use the "submit" command, which talks directly to a |
---|
| 452 | # database to log job information, so this isn't really needed. But |
---|
| 453 | # it doesn't hurt to have this and it can be useful in some cases. |
---|
| 454 | # ---------------------------------------------------------------------- |
---|
| 455 | itcl::body Rappture::Task::MiddlewareTime {args} { |
---|
| 456 | set line "MiddlewareTime:" |
---|
| 457 | foreach {key val} $args { |
---|
| 458 | append line " $key=$val" |
---|
| 459 | } |
---|
| 460 | puts stderr $line |
---|
| 461 | } |
---|
[6021] | 462 | |
---|
[6497] | 463 | itcl::body Rappture::Task::IsCacheable {} { |
---|
| 464 | if { ![info exists _resources(-cachehosts)] || |
---|
| 465 | $_resources(-cachehosts) == "" } { |
---|
[6548] | 466 | puts stderr cachehosts=[info exists _resources(-cachehosts)] |
---|
[6497] | 467 | return 0 |
---|
| 468 | } |
---|
[6548] | 469 | global env |
---|
| 470 | if { [info exists env(RAPPTURE_CACHE_OVERRIDE)] } { |
---|
| 471 | set state $env(RAPPTURE_CACHE_OVERRIDE) |
---|
| 472 | } else { |
---|
| 473 | set state [$_xmlobj get "tool.cache"] |
---|
[6497] | 474 | } |
---|
[6548] | 475 | puts stderr "cache tag is \"$state\"" |
---|
| 476 | if { $state == "" || ![string is boolean $state] } { |
---|
| 477 | return 1; # Default is to allow caching. |
---|
[6497] | 478 | } |
---|
| 479 | return $state |
---|
| 480 | } |
---|
[6021] | 481 | |
---|
| 482 | # |
---|
| 483 | # Send the list of parameters to a python program so it can call PUQ |
---|
| 484 | # and get a CSV file containing the parameter values to use for the runs. |
---|
[6497] | 485 | itcl::body Rappture::Task::GetParamsForUQ {} { |
---|
[6021] | 486 | set pid [pid] |
---|
[6497] | 487 | # puts "puq.sh get_params $pid $_uq(varlist) $_uq(type) $_uq(args)" |
---|
| 488 | if {[catch { |
---|
| 489 | exec puq.sh get_params $pid $_uq(varlist) $_uq(type) $_uq(args) |
---|
| 490 | } errs] != 0 } { |
---|
| 491 | error "get_params.py failed: $errs\n[GetUQErrors]" |
---|
| 492 | } |
---|
| 493 | return "params${pid}.csv" |
---|
| 494 | } |
---|
| 495 | |
---|
| 496 | itcl::body Rappture::Task::SetCpuResourceLimit {} { |
---|
| 497 | # Set limits for cpu time |
---|
| 498 | set limit [$_xmlobj get tool.limits.cputime] |
---|
| 499 | if { $limit == "unlimited" } { |
---|
| 500 | set limit 43200; # 12 hours |
---|
| 501 | } else { |
---|
| 502 | if { [scan $limit "%d" dum] != 1 } { |
---|
| 503 | set limit 14400; # 4 hours by default |
---|
| 504 | } elseif { $limit > 43200 } { |
---|
| 505 | set limit 43200; # limit to 12 hrs. |
---|
| 506 | } elseif { $limit < 10 } { |
---|
| 507 | set limit 10; # lower bound is 10 seconds. |
---|
| 508 | } |
---|
| 509 | } |
---|
| 510 | Rappture::rlimit set cputime $limit |
---|
| 511 | } |
---|
| 512 | |
---|
| 513 | # Write out the driver.xml file for the tool |
---|
| 514 | itcl::body Rappture::Task::GetDriverFile {} { |
---|
| 515 | global rapptureInfo |
---|
| 516 | set fileName [file join $rapptureInfo(cwd) "driver[pid].xml"] |
---|
[6210] | 517 | if { [catch { |
---|
[6497] | 518 | set f [open $fileName w] |
---|
| 519 | puts $f "<?xml version=\"1.0\"?>" |
---|
| 520 | puts $f [$_xmlobj xml] |
---|
| 521 | close $f |
---|
[6210] | 522 | } errs] != 0 } { |
---|
[6497] | 523 | error "can't create driver file \"$fileName\": $errs" |
---|
| 524 | } |
---|
| 525 | return $fileName |
---|
| 526 | } |
---|
| 527 | |
---|
| 528 | itcl::body Rappture::Task::GetCommand { } { |
---|
| 529 | set cmd [$_xmlobj get tool.command] |
---|
| 530 | regsub -all @tool $cmd $_installdir cmd |
---|
| 531 | set cmd [string trimleft $cmd " "] |
---|
| 532 | return $cmd |
---|
| 533 | } |
---|
| 534 | |
---|
| 535 | itcl::body Rappture::Task::GetSimulationCommand { driverFile } { |
---|
| 536 | set cmd [GetCommand] |
---|
| 537 | if { $cmd == "" } { |
---|
| 538 | return "" |
---|
| 539 | } |
---|
| 540 | regsub -all @driver $cmd $driverFile cmd |
---|
| 541 | |
---|
| 542 | switch -glob -- [resources -jobprotocol] { |
---|
| 543 | "submit*" { |
---|
| 544 | # if job_protocol is "submit", then use use submit command |
---|
| 545 | set cmd "submit --local $cmd" |
---|
[6210] | 546 | } |
---|
[6497] | 547 | "mx" { |
---|
| 548 | # metachory submission |
---|
| 549 | set cmd "mx $cmd" |
---|
| 550 | } |
---|
| 551 | "exec" { |
---|
| 552 | # default -- nothing special |
---|
| 553 | } |
---|
[6021] | 554 | } |
---|
[6497] | 555 | return $cmd |
---|
[6021] | 556 | } |
---|
[6497] | 557 | |
---|
| 558 | itcl::body Rappture::Task::GetUQSimulationCommand { driverFile } { |
---|
| 559 | set cmd [GetCommand] |
---|
| 560 | if { $cmd == "" } { |
---|
| 561 | return "" |
---|
| 562 | } |
---|
| 563 | set paramsFile [GetParamsForUQ] |
---|
| 564 | set cmd [BuildSubmitCommand $cmd $_uq(tfile) $paramsFile] |
---|
| 565 | file delete -force puq |
---|
| 566 | return $cmd |
---|
| 567 | } |
---|
| 568 | |
---|
| 569 | itcl::body Rappture::Task::GetUQTemplateFile {} { |
---|
| 570 | global rapptureInfo |
---|
| 571 | # Copy xml into a new file |
---|
| 572 | set templateFile "template[pid].xml" |
---|
| 573 | set f [open $templateFile w] |
---|
| 574 | puts $f "<?xml version=\"1.0\"?>" |
---|
| 575 | puts $f [$_xmlobj xml] |
---|
| 576 | close $f |
---|
| 577 | |
---|
| 578 | # Return a list of the UQ variables and their PDFs. |
---|
| 579 | # Also turns $uq(tfile) into a template file. |
---|
| 580 | set _uq(varlist) [lindex [$_xmlobj uq_get_vars $templateFile] 0] |
---|
| 581 | set _uq(tfile) $templateFile |
---|
| 582 | return $templateFile |
---|
| 583 | } |
---|
| 584 | |
---|
| 585 | itcl::body Rappture::Task::ExecuteSimulationCommand { cmd } { |
---|
| 586 | |
---|
| 587 | set _job(runfile) "" |
---|
| 588 | set _job(success) 0 |
---|
| 589 | set _job(exitcode) 0 |
---|
| 590 | |
---|
| 591 | # Step 1. Write the command into the run file. |
---|
| 592 | $_xmlobj put tool.execute $cmd |
---|
| 593 | |
---|
| 594 | Log run started |
---|
| 595 | Rappture::rusage mark |
---|
| 596 | |
---|
| 597 | # Step 2. Check if it is a special case "ECHO" command which always |
---|
| 598 | # succeeds. |
---|
| 599 | if { [string compare -nocase -length 5 $cmd "ECHO "] == 0 } { |
---|
| 600 | set _job(stdout) [string range $cmd 5 end] |
---|
| 601 | set _job(success) 1 |
---|
| 602 | set _job(exitcode) 0 |
---|
| 603 | set _job(mesg) "" |
---|
| 604 | return 1; # Success |
---|
| 605 | } |
---|
| 606 | |
---|
| 607 | # Step 3. Execute the command, collecting its stdout and stderr. |
---|
| 608 | catch { |
---|
| 609 | eval blt::bgexec [list [itcl::scope _job(control)]] \ |
---|
| 610 | -keepnewline yes \ |
---|
| 611 | -killsignal SIGTERM \ |
---|
| 612 | -onoutput [list [itcl::code $this OnOutput]] \ |
---|
| 613 | -output [list [itcl::scope _job(stdout)]] \ |
---|
| 614 | -error [list [itcl::scope _job(stderr)]] \ |
---|
| 615 | $cmd |
---|
| 616 | } result |
---|
| 617 | |
---|
| 618 | # Step 4. Check the token and the exit code. |
---|
| 619 | set logmesg $result |
---|
| 620 | foreach { token _job(pid) _job(exitcode) mesg } $_job(control) break |
---|
| 621 | if { $token == "EXITED" } { |
---|
| 622 | if { $_job(exitcode) != 0 } { |
---|
| 623 | # This means that the program exited normally but returned a |
---|
| 624 | # non-zero exitcode. Consider this an invalid result from the |
---|
| 625 | # program. Append the stderr from the program to the message. |
---|
| 626 | if {$_job(exitcode) > 128} { |
---|
| 627 | set logmesg "Program signaled: signal was [GetSignal $_job(exitcode)]" |
---|
| 628 | } else { |
---|
| 629 | set logmesg "Program finished: non-zero exit code is $_job(exitcode)" |
---|
| 630 | } |
---|
| 631 | set _job(mesg) "$logmesg\n\n$_job(stderr)" |
---|
| 632 | Log run failed [list $logmesg] |
---|
| 633 | return 0; # Fail. |
---|
| 634 | } |
---|
| 635 | # Successful program termination with exit code of 0. |
---|
| 636 | } elseif { $token == "abort" } { |
---|
| 637 | # The user pressed the abort button. |
---|
| 638 | |
---|
| 639 | set logmesg "Program terminated by user." |
---|
| 640 | Log run failed [list $logmesg] |
---|
| 641 | set _job(mesg) "$logmesg\n\n$_job(stdout)" |
---|
| 642 | return 0; # Fail |
---|
| 643 | } else { |
---|
| 644 | # Abnormal termination |
---|
| 645 | |
---|
| 646 | set logmesg "Abnormal program termination:" |
---|
| 647 | Log run failed [list $logmesg] |
---|
| 648 | set _job(mesg) "$logmesg\n\n$_job(stdout)" |
---|
| 649 | return 0; # Fail |
---|
| 650 | } |
---|
| 651 | if { $_uq(type) != "" } { |
---|
| 652 | CollectUQResults |
---|
| 653 | } |
---|
| 654 | |
---|
| 655 | # Step 5. Look in stdout for the name of the run file. |
---|
| 656 | set pattern {=RAPPTURE-RUN=>([^\n]+)} |
---|
| 657 | if {![regexp $pattern $_job(stdout) match fileName]} { |
---|
| 658 | set _job(mesg) "Can't find result file in output.\n" |
---|
| 659 | append _job(mesg) "Did you call Rappture::result in your simulator?" |
---|
| 660 | return 0; # Fail |
---|
| 661 | } |
---|
| 662 | set _job(runfile) $fileName |
---|
| 663 | set _job(success) 1 |
---|
| 664 | set _job(mesg) $_job(stdout) |
---|
| 665 | return 1; # Success |
---|
| 666 | } |
---|
| 667 | |
---|
| 668 | itcl::body Rappture::Task::LogSimulationUsage {} { |
---|
| 669 | array set times [Rappture::rusage measure] |
---|
| 670 | |
---|
| 671 | set toolId [$_xmlobj get tool.id] |
---|
| 672 | set toolVers [$_xmlobj get tool.version.application.revision] |
---|
| 673 | set simulation "simulation" |
---|
| 674 | if { $toolId ne "" && $toolVers ne "" } { |
---|
| 675 | set simulation "[pid]_${toolId}_r${toolVers}" |
---|
| 676 | } |
---|
| 677 | |
---|
| 678 | # Need to save job info? then invoke the callback |
---|
| 679 | if { [string length $jobstats] > 0} { |
---|
| 680 | lappend args \ |
---|
| 681 | "job" [incr jobnum] \ |
---|
| 682 | "event" $simulation \ |
---|
| 683 | "start" $times(start) \ |
---|
| 684 | "walltime" $times(walltime) \ |
---|
| 685 | "cputime" $times(cputime) \ |
---|
| 686 | "status" $_job(exitcode) |
---|
| 687 | uplevel #0 $jobstats $args |
---|
| 688 | } |
---|
| 689 | |
---|
| 690 | # |
---|
| 691 | # Scan through stderr channel and look for statements that |
---|
| 692 | # represent grid jobs that were executed. The statements look |
---|
| 693 | # like this: |
---|
| 694 | # |
---|
| 695 | # MiddlewareTime: job=1 event=simulation start=3.001094 ... |
---|
| 696 | # |
---|
| 697 | |
---|
| 698 | set subjobs 0 |
---|
| 699 | set pattern {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} |
---|
| 700 | while { [regexp -indices $pattern $_job(stderr) match] } { |
---|
| 701 | foreach {p0 p1} $match break |
---|
| 702 | if { [string index $_job(stderr) $p0] == "\n" } { |
---|
| 703 | incr p0 |
---|
| 704 | } |
---|
| 705 | array unset data |
---|
| 706 | array set data { |
---|
| 707 | job 1 |
---|
| 708 | event simulation |
---|
| 709 | start 0 |
---|
| 710 | walltime 0 |
---|
| 711 | cputime 0 |
---|
| 712 | status 0 |
---|
| 713 | } |
---|
| 714 | foreach arg [lrange [string range $_job(stderr) $p0 $p1] 1 end] { |
---|
| 715 | foreach {key val} [split $arg =] break |
---|
| 716 | set data($key) $val |
---|
| 717 | } |
---|
| 718 | set data(job) [expr { $jobnum + $data(job) }] |
---|
| 719 | set data(event) "subsimulation" |
---|
| 720 | set data(start) [expr { $times(start) + $data(start) }] |
---|
| 721 | |
---|
| 722 | set details "" |
---|
| 723 | foreach key {job event start walltime cputime status} { |
---|
| 724 | # Add required keys in a particular order |
---|
| 725 | lappend details $key $data($key) |
---|
| 726 | unset data($key) |
---|
| 727 | } |
---|
| 728 | foreach key [array names data] { |
---|
| 729 | # Add anything else that the client gave -- venue, etc. |
---|
| 730 | lappend details $key $data($key) |
---|
| 731 | } |
---|
| 732 | |
---|
| 733 | if {[string length $jobstats] > 0} { |
---|
| 734 | uplevel #0 $jobstats $details |
---|
| 735 | } |
---|
| 736 | |
---|
| 737 | incr subjobs |
---|
| 738 | |
---|
| 739 | # Done -- remove this statement |
---|
| 740 | set _job(stderr) [string replace $_job(stderr) $p0 $p1] |
---|
| 741 | } |
---|
| 742 | incr jobnum $subjobs |
---|
| 743 | |
---|
| 744 | # Add cputime info to run.xml file |
---|
| 745 | if { [catch { |
---|
| 746 | Rappture::library $_job(runfile) |
---|
| 747 | } xmlobj] != 0 } { |
---|
| 748 | error "Can't create rappture library: $xmlobj" |
---|
| 749 | } |
---|
| 750 | $xmlobj put output.walltime $times(walltime) |
---|
| 751 | $xmlobj put output.cputime $times(cputime) |
---|
| 752 | global env |
---|
| 753 | if {[info exists env(SESSION)]} { |
---|
| 754 | $xmlobj put output.session $env(SESSION) |
---|
| 755 | } |
---|
| 756 | set _job(xmlobj) $xmlobj |
---|
| 757 | } |
---|
| 758 | |
---|
| 759 | itcl::body Rappture::Task::LogSubmittedSimulationUsage {} { |
---|
| 760 | array set times [Rappture::rusage measure] |
---|
| 761 | |
---|
| 762 | set toolId [$_xmlobj get tool.id] |
---|
| 763 | set toolVers [$_xmlobj get tool.version.application.revision] |
---|
| 764 | set simulation "simulation" |
---|
| 765 | if { $toolId ne "" && $toolVers ne "" } { |
---|
| 766 | set simulation "[pid]_${toolId}_r${toolVers}" |
---|
| 767 | } |
---|
| 768 | |
---|
| 769 | # Need to save job info? then invoke the callback |
---|
| 770 | if { [string length $jobstats] > 0} { |
---|
| 771 | lappend args \ |
---|
| 772 | "job" [incr jobnum] \ |
---|
| 773 | "event" $simulation \ |
---|
| 774 | "start" $times(start) \ |
---|
| 775 | "walltime" $times(walltime) \ |
---|
| 776 | "cputime" $times(cputime) \ |
---|
| 777 | "status" $_job(exitcode) |
---|
| 778 | uplevel #0 $jobstats $args |
---|
| 779 | } |
---|
| 780 | |
---|
| 781 | # |
---|
| 782 | # Scan through stderr channel and look for statements that |
---|
| 783 | # represent grid jobs that were executed. The statements look |
---|
| 784 | # like this: |
---|
| 785 | # |
---|
| 786 | # MiddlewareTime: job=1 event=simulation start=3.001094 ... |
---|
| 787 | # |
---|
| 788 | |
---|
| 789 | set subjobs 0 |
---|
| 790 | set pattern {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} |
---|
| 791 | while { [regexp -indices $pattern $_job(stderr) match] } { |
---|
| 792 | foreach {p0 p1} $match break |
---|
| 793 | if { [string index $_job(stderr) $p0] == "\n" } { |
---|
| 794 | incr p0 |
---|
| 795 | } |
---|
| 796 | array unset data |
---|
| 797 | array set data { |
---|
| 798 | job 1 |
---|
| 799 | event simulation |
---|
| 800 | start 0 |
---|
| 801 | walltime 0 |
---|
| 802 | cputime 0 |
---|
| 803 | status 0 |
---|
| 804 | } |
---|
| 805 | foreach arg [lrange [string range $_job(stderr) $p0 $p1] 1 end] { |
---|
| 806 | foreach {key val} [split $arg =] break |
---|
| 807 | set data($key) $val |
---|
| 808 | } |
---|
| 809 | set data(job) [expr { $jobnum + $data(job) }] |
---|
| 810 | set data(event) "subsimulation" |
---|
| 811 | set data(start) [expr { $times(start) + $data(start) }] |
---|
| 812 | |
---|
| 813 | set details "" |
---|
| 814 | foreach key {job event start walltime cputime status} { |
---|
| 815 | # Add required keys in a particular order |
---|
| 816 | lappend details $key $data($key) |
---|
| 817 | unset data($key) |
---|
| 818 | } |
---|
| 819 | foreach key [array names data] { |
---|
| 820 | # Add anything else that the client gave -- venue, etc. |
---|
| 821 | lappend details $key $data($key) |
---|
| 822 | } |
---|
| 823 | |
---|
| 824 | if {[string length $jobstats] > 0} { |
---|
| 825 | uplevel #0 $jobstats $details |
---|
| 826 | } |
---|
| 827 | |
---|
| 828 | incr subjobs |
---|
| 829 | |
---|
| 830 | # Done -- remove this statement |
---|
| 831 | set _job(stderr) [string replace $_job(stderr) $p0 $p1] |
---|
| 832 | } |
---|
| 833 | incr jobnum $subjobs |
---|
| 834 | |
---|
| 835 | # Add cputime info to run.xml file |
---|
| 836 | if { [catch { |
---|
| 837 | Rappture::library $_job(runfile) |
---|
| 838 | } xmlobj] != 0 } { |
---|
| 839 | error "Can't create rappture library: $xmlobj" |
---|
| 840 | } |
---|
[6681] | 841 | global env |
---|
| 842 | if {[info exists env(SESSION)]} { |
---|
| 843 | $xmlobj put output.session $env(SESSION) |
---|
| 844 | } |
---|
[6497] | 845 | set _job(xmlobj) $xmlobj |
---|
| 846 | } |
---|
| 847 | |
---|
| 848 | itcl::body Rappture::Task::LogCachedSimulationUsage {} { |
---|
| 849 | if { [catch { |
---|
| 850 | Rappture::library $_job(runfile) |
---|
| 851 | } xmlobj] != 0 } { |
---|
| 852 | error "Can't create rappture library: $xmlobj" |
---|
| 853 | } |
---|
| 854 | # Get the session from runfile |
---|
| 855 | set session [$xmlobj get "output.session"] |
---|
| 856 | if { [catch {exec submit --cache $session} result] != 0 } { |
---|
| 857 | puts stderr "submit --cache failed: $result" |
---|
| 858 | } |
---|
| 859 | set _job(xmlobj) $xmlobj |
---|
| 860 | } |
---|
| 861 | |
---|
| 862 | |
---|
| 863 | itcl::body Rappture::Task::CheckForCachedRunFile { driverFile } { |
---|
| 864 | |
---|
| 865 | # Read the driver file and collect its contents as the query. |
---|
| 866 | set url http://$_resources(-cachehosts)/cache/request |
---|
| 867 | set f [open $driverFile "r"] |
---|
| 868 | set query [read $f] |
---|
| 869 | close $f |
---|
| 870 | |
---|
| 871 | # Make the query |
---|
| 872 | if { [catch { |
---|
| 873 | http::geturl $url -query $query -timeout 6000 -binary yes |
---|
| 874 | } token] != 0 } { |
---|
| 875 | puts stderr "error performing cache query: token=$token" |
---|
| 876 | return 0 |
---|
| 877 | } |
---|
| 878 | # If the code isn't 200, we'll assume it's a cache miss. |
---|
| 879 | if { [http::ncode $token] != 200} { |
---|
| 880 | return 0 |
---|
| 881 | } |
---|
| 882 | # Get contents of the run file. |
---|
| 883 | set contents [http::data $token] |
---|
| 884 | if { $contents == "" } { |
---|
| 885 | return 0 |
---|
| 886 | } |
---|
| 887 | |
---|
| 888 | # Create a new run.xml file and write the results into it. |
---|
| 889 | set secs [clock seconds] |
---|
| 890 | set millisecs [expr [clock clicks -milliseconds] % 1000] |
---|
| 891 | set timestamp [format %d%03d%03d $secs $millisecs 0] |
---|
| 892 | |
---|
| 893 | global rapptureInfo |
---|
| 894 | set fileName [file join $rapptureInfo(cwd) "run${timestamp}.xml"] |
---|
| 895 | set f [open $fileName "w"] |
---|
| 896 | puts $f $contents |
---|
| 897 | close $f |
---|
| 898 | set _job(runfile) $fileName |
---|
| 899 | set _job(success) 1 |
---|
| 900 | set _job(stderr) "Loading cached results\n" |
---|
| 901 | OnOutput "Loading cached results\n" |
---|
| 902 | update |
---|
| 903 | return 1 |
---|
| 904 | } |
---|
| 905 | |
---|
| 906 | itcl::body Rappture::Task::GetUQErrors {} { |
---|
| 907 | set contents {} |
---|
| 908 | if { [file exists "uq_debug.err"] } { |
---|
| 909 | set f [open "uq_debug.err" r] |
---|
| 910 | set contents [read $f] |
---|
| 911 | close $f |
---|
| 912 | } |
---|
| 913 | return $contents |
---|
| 914 | } |
---|
| 915 | |
---|
| 916 | # UQ. Collect data from all jobs and put it in one xml run file. |
---|
| 917 | itcl::body Rappture::Task::CollectUQResults {} { |
---|
| 918 | file delete -force -- "run_uq.xml" |
---|
| 919 | set hdfFile puq_[pid].hdf5 |
---|
| 920 | if { [catch { |
---|
| 921 | exec puq.sh analyze $hdfFile |
---|
| 922 | } results] != 0 } { |
---|
| 923 | error "UQ analysis failed: $results\n[GetUQErrors]" |
---|
| 924 | } else { |
---|
| 925 | set _job(stdout) $results |
---|
| 926 | } |
---|
| 927 | } |
---|