1 | # -*- mode: tcl; indent-tabs-mode: nil -*- |
---|
2 | # ---------------------------------------------------------------------- |
---|
3 | # COMPONENT: task - represents the executable part of a tool |
---|
4 | # |
---|
5 | # This object is an executable version of a Rappture xml file. |
---|
6 | # A tool is a task plus its graphical user interface. Each task |
---|
7 | # resides in an installation directory with other tool resources |
---|
8 | # (libraries, examples, etc.). Each task is defined by its inputs |
---|
9 | # and outputs, and understands the context in which it executes |
---|
10 | # (via exec, submit, mx, etc.). |
---|
11 | # ====================================================================== |
---|
12 | # AUTHOR: Michael McLennan, Purdue University |
---|
13 | # Copyright (c) 2004-2014 HUBzero Foundation, LLC |
---|
14 | # |
---|
15 | # See the file "license.terms" for information on usage and |
---|
16 | # redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
17 | # ====================================================================== |
---|
18 | package require BLT |
---|
19 | |
---|
20 | itcl::class Rappture::Task { |
---|
21 | private method CheckForCachedRunFile { driverFile } |
---|
22 | private method CollectUQResults {} |
---|
23 | private method ExecuteSimulationCommand { cmd } |
---|
24 | private method GetCommand {} |
---|
25 | private method GetDriverFile {} |
---|
26 | private method GetSignal { signal } |
---|
27 | private method GetSimulationCommand { driverFile } |
---|
28 | private method GetUQErrors {} |
---|
29 | private method GetUQSimulationCommand { driverFile } |
---|
30 | private method GetUQTemplateFile {} |
---|
31 | private method IsCacheable {} |
---|
32 | private method LogCachedSimulationUsage {} |
---|
33 | private method LogSimulationUsage {} |
---|
34 | private method LogSubmittedSimulationUsage {} |
---|
35 | private method SetCpuResourceLimit {} |
---|
36 | |
---|
37 | public variable logger "" |
---|
38 | public variable jobstats Rappture::Task::MiddlewareTime |
---|
39 | public variable resultdir "@default" |
---|
40 | |
---|
41 | constructor {xmlobj installdir args} { # defined below } |
---|
42 | destructor { # defined below } |
---|
43 | |
---|
44 | public method installdir {} { return $_installdir } |
---|
45 | |
---|
46 | public method run {args} |
---|
47 | public method get_uq {args} |
---|
48 | public method abort {} |
---|
49 | public method reset {} |
---|
50 | public method xml {args} |
---|
51 | public method save {xmlobj {name ""}} |
---|
52 | |
---|
53 | protected method OnOutput {data} |
---|
54 | protected method Log {args} |
---|
55 | protected method BuildSubmitCommand {cmd tfile params_file} |
---|
56 | protected method GetParamsForUQ {} |
---|
57 | |
---|
58 | private variable _xmlobj "" ;# XML object with inputs/outputs |
---|
59 | private variable _origxml "" ;# copy of original XML (for reset) |
---|
60 | private variable _installdir "" ;# installation directory for this tool |
---|
61 | private variable _outputcb "" ;# callback for tool output |
---|
62 | private common jobnum 0 ;# counter for unique job number |
---|
63 | private variable _uq |
---|
64 | |
---|
65 | private variable _job |
---|
66 | |
---|
67 | # get global resources for this tool session |
---|
68 | public proc resources {{option ""}} |
---|
69 | |
---|
70 | public common _resources |
---|
71 | public proc setAppName {name} { set _resources(-appname) $name } |
---|
72 | public proc setHubName {name} { set _resources(-hubname) $name } |
---|
73 | public proc setHubURL {name} { set _resources(-huburl) $name } |
---|
74 | public proc setSession {name} { set _resources(-session) $name } |
---|
75 | public proc setJobPrt {name} { set _resources(-jobprotocol) $name } |
---|
76 | public proc setResultDir {name} { set _resources(-resultdir) $name } |
---|
77 | public proc setCacheHosts {name} { set _resources(-cachehosts) $name } |
---|
78 | |
---|
79 | # default method for -jobstats control |
---|
80 | public proc MiddlewareTime {args} |
---|
81 | } |
---|
82 | |
---|
83 | # must use this name -- plugs into Rappture::resources::load |
---|
84 | proc task_init_resources {} { |
---|
85 | Rappture::resources::register \ |
---|
86 | application_name Rappture::Task::setAppName \ |
---|
87 | application_id Rappture::Task::setAppId \ |
---|
88 | hub_name Rappture::Task::setHubName \ |
---|
89 | hub_url Rappture::Task::setHubURL \ |
---|
90 | session_token Rappture::Task::setSession \ |
---|
91 | job_protocol Rappture::Task::setJobPrt \ |
---|
92 | results_directory Rappture::Task::setResultDir \ |
---|
93 | cache_hosts Rappture::Task::setCacheHosts |
---|
94 | } |
---|
95 | |
---|
96 | # ---------------------------------------------------------------------- |
---|
97 | # CONSTRUCTOR |
---|
98 | # ---------------------------------------------------------------------- |
---|
99 | itcl::body Rappture::Task::constructor {xmlobj installdir args} { |
---|
100 | if {![Rappture::library isvalid $xmlobj]} { |
---|
101 | error "bad value \"$xmlobj\": should be Rappture::Library" |
---|
102 | } |
---|
103 | set _xmlobj $xmlobj |
---|
104 | |
---|
105 | # stash a copy of the original XML for later "reset" operations |
---|
106 | set _origxml [Rappture::LibraryObj ::#auto "<?xml version=\"1.0\"?><run/>"] |
---|
107 | $_origxml copy "" from $_xmlobj "" |
---|
108 | |
---|
109 | if {![file exists $installdir]} { |
---|
110 | error "directory \"$installdir\" doesn't exist" |
---|
111 | } |
---|
112 | set _installdir $installdir |
---|
113 | package require http |
---|
114 | package require tls |
---|
115 | http::register https 443 [list ::tls::socket -tls1 1] |
---|
116 | |
---|
117 | eval configure $args |
---|
118 | } |
---|
119 | |
---|
120 | # ---------------------------------------------------------------------- |
---|
121 | # DESTRUCTOR |
---|
122 | # ---------------------------------------------------------------------- |
---|
123 | itcl::body Rappture::Task::destructor {} { |
---|
124 | itcl::delete object $_origxml |
---|
125 | } |
---|
126 | |
---|
127 | # ---------------------------------------------------------------------- |
---|
128 | # USAGE: resources ?-option? |
---|
129 | # |
---|
130 | # Clients use this to query information about the tool. |
---|
131 | # ---------------------------------------------------------------------- |
---|
132 | itcl::body Rappture::Task::resources {{option ""}} { |
---|
133 | if {$option == ""} { |
---|
134 | return [array get _resources] |
---|
135 | } |
---|
136 | if {[info exists _resources($option)]} { |
---|
137 | return $_resources($option) |
---|
138 | } |
---|
139 | return "" |
---|
140 | } |
---|
141 | |
---|
142 | itcl::body Rappture::Task::GetSignal {code} { |
---|
143 | set signals { |
---|
144 | xxx HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV |
---|
145 | USR2 PIPE ALRM TERM STKFLT CHLD CONT STOP TSTP TTIN |
---|
146 | TTOU URG XCPU XFSZ VTALRM PROF WINCH POLL PWR SYS |
---|
147 | RTMIN RTMIN+1 RTMIN+2 RTMIN+3 RTMAX-3 RTMAX-2 RTMAX-1 RTMAX |
---|
148 | } |
---|
149 | set sigNum [expr $code - 128] |
---|
150 | if { $sigNum > 0 && $sigNum < [llength $signals] } { |
---|
151 | return [lindex $signals $sigNum] |
---|
152 | } |
---|
153 | return "unknown exit code \"$code\"" |
---|
154 | } |
---|
155 | |
---|
156 | itcl::body Rappture::Task::get_uq {args} { |
---|
157 | foreach {path val} $args { |
---|
158 | if {$path == "-uq_type"} { |
---|
159 | set _uq(type) $val |
---|
160 | } elseif {$path == "-uq_args"} { |
---|
161 | set _uq(args) $val |
---|
162 | } |
---|
163 | } |
---|
164 | #set varlist [$_xmlobj uq_get_vars] |
---|
165 | foreach {varlist num} [$_xmlobj uq_get_vars] break |
---|
166 | return [Rappture::UQ ::#auto $varlist $num $_uq(type) $_uq(args)] |
---|
167 | } |
---|
168 | |
---|
169 | # ---------------------------------------------------------------------- |
---|
170 | # USAGE: run ?<path1> <value1> <path2> <value2> ...? ?-output <callbk>? |
---|
171 | # |
---|
172 | # This method causes the tool to run. A "driver.xml" file is created |
---|
173 | # as the input for the run. That file is fed to the executable |
---|
174 | # according to the <tool><command> string, and the job is executed. |
---|
175 | # |
---|
176 | # Any "<path> <value>" arguments are used to override the current |
---|
177 | # settings from the GUI. This is useful, for example, when filling |
---|
178 | # in missing simulation results from the analyzer. |
---|
179 | # |
---|
180 | # If the -output argument is included, then the next arg is a |
---|
181 | # callback command for output messages. Any output that comes in |
---|
182 | # while the tool is running is sent back to the caller, so the user |
---|
183 | # can see progress running the tool. |
---|
184 | # |
---|
185 | # Returns a list of the form {status result}, where status is an |
---|
186 | # integer status code (0=success) and result is the output from the |
---|
187 | # simulator. Successful output is something like {0 run1293921.xml}, |
---|
188 | # where 0=success and run1293921.xml is the name of the file containing |
---|
189 | # results. |
---|
190 | # ---------------------------------------------------------------------- |
---|
191 | itcl::body Rappture::Task::run {args} { |
---|
192 | global env errorInfo |
---|
193 | |
---|
194 | # |
---|
195 | # Make sure that we save the proper application name. Actually, the |
---|
196 | # best place to get this information is straight from the "installtool" |
---|
197 | # script, but just in case we have an older tool, we should insert the |
---|
198 | # tool name from the resources config file. |
---|
199 | # |
---|
200 | if {[info exists _resources(-appname)] && $_resources(-appname) ne "" && |
---|
201 | [$_xmlobj get tool.name] eq ""} { |
---|
202 | $_xmlobj put tool.name $_resources(-appname) |
---|
203 | } |
---|
204 | |
---|
205 | # if there are any args, use them to override parameters |
---|
206 | set _outputcb "" |
---|
207 | set _uq(type) "" |
---|
208 | foreach {path val} $args { |
---|
209 | if {$path == "-output"} { |
---|
210 | set _outputcb $val |
---|
211 | } elseif {$path == "-uq_type"} { |
---|
212 | set _uq(type) $val |
---|
213 | } elseif {$path == "-uq_args"} { |
---|
214 | set _uq(args) $val |
---|
215 | } else { |
---|
216 | $_xmlobj put $path.current $val |
---|
217 | } |
---|
218 | } |
---|
219 | |
---|
220 | # Initialize job array variables |
---|
221 | array set _job { |
---|
222 | control "" |
---|
223 | exitcode 0 |
---|
224 | mesg "" |
---|
225 | runfile "" |
---|
226 | stderr "" |
---|
227 | stdout "" |
---|
228 | success 0 |
---|
229 | xmlobj "" |
---|
230 | } |
---|
231 | |
---|
232 | SetCpuResourceLimit |
---|
233 | set driverFile [GetDriverFile] |
---|
234 | set cached 0 |
---|
235 | if { [IsCacheable] } { |
---|
236 | puts stderr "Cache checking: [time { |
---|
237 | set cached [CheckForCachedRunFile $driverFile] |
---|
238 | } ]" |
---|
239 | puts stderr "checking cache=$cached" |
---|
240 | } |
---|
241 | if { !$cached } { |
---|
242 | if { $_uq(type) != "" } { |
---|
243 | set _uq(tfile) [GetUQTemplateFile] |
---|
244 | } |
---|
245 | if { $_uq(type) == "" } { |
---|
246 | set cmd [GetSimulationCommand $driverFile] |
---|
247 | } else { |
---|
248 | set cmd [GetUQSimulationCommand $driverFile] |
---|
249 | } |
---|
250 | if { $cmd == "" } { |
---|
251 | puts stderr "cmd is empty" |
---|
252 | append mesg "There is no command specified by\n\n" |
---|
253 | append mesg " <command>\n" |
---|
254 | append mesg " </command>\n\n" |
---|
255 | append mesg "in the tool.xml file." |
---|
256 | return [list 1 $mesg] |
---|
257 | } |
---|
258 | Rappture::rusage mark |
---|
259 | if { ![ExecuteSimulationCommand $cmd] } { |
---|
260 | return [list 1 $_job(mesg)] |
---|
261 | } |
---|
262 | if { [resources -jobprotocol] == "submit" } { |
---|
263 | LogSubmittedSimulationUsage |
---|
264 | } else { |
---|
265 | LogSimulationUsage |
---|
266 | } |
---|
267 | } else { |
---|
268 | LogCachedSimulationUsage |
---|
269 | } |
---|
270 | if { $_job(success) } { |
---|
271 | file delete -force -- $driverFile |
---|
272 | Log run finished |
---|
273 | return [list 0 $_job(xmlobj)] |
---|
274 | } else { |
---|
275 | # See if the job was aborted. |
---|
276 | if {[regexp {^KILLED} $_job(control)]} { |
---|
277 | Log run aborted |
---|
278 | return [list 1 "ABORT"] |
---|
279 | } |
---|
280 | Log run failed [list 0 $_job(mesg)] |
---|
281 | return [list 1 $_job(mesg)] |
---|
282 | } |
---|
283 | } |
---|
284 | |
---|
285 | # ---------------------------------------------------------------------- |
---|
286 | # Turn the command string from tool.xml into the proper syntax to use |
---|
287 | # with a submit parameter sweep with a temlate file. Proper quoting |
---|
288 | # of the template file is necessary to prevent submit from being too smart |
---|
289 | # and converting it to a full pathname. |
---|
290 | # ---------------------------------------------------------------------- |
---|
291 | itcl::body Rappture::Task::BuildSubmitCommand {cmd tfile params_file} { |
---|
292 | set quote_next 0 |
---|
293 | set newcmd "submit --progress submit --runName=puq -l -i @:$tfile -d $params_file" |
---|
294 | set cmds [split $cmd " "] |
---|
295 | for {set i 0} {$i < [llength $cmds]} {incr i} { |
---|
296 | set arg [lindex $cmds $i] |
---|
297 | if {$quote_next == 1} { |
---|
298 | set nc [string range $arg 0 0] |
---|
299 | if {$nc != "\""} { |
---|
300 | set arg "\"\\\"$arg\\\"\"" |
---|
301 | } |
---|
302 | } |
---|
303 | if {$arg == "--eval"} { |
---|
304 | set quote_next 1 |
---|
305 | } else { |
---|
306 | set quote_next 0 |
---|
307 | } |
---|
308 | if {$arg == "@driver"} { |
---|
309 | set arg "\"\\\"$tfile\\\"\"" |
---|
310 | } |
---|
311 | append newcmd " " $arg |
---|
312 | } |
---|
313 | regsub -all @driver $newcmd $tfile newcmd |
---|
314 | return $newcmd |
---|
315 | } |
---|
316 | |
---|
317 | # ---------------------------------------------------------------------- |
---|
318 | # USAGE: abort |
---|
319 | # |
---|
320 | # Clients use this during a "run" to abort the current job. |
---|
321 | # Kills the job and forces the "run" method to return. |
---|
322 | # ---------------------------------------------------------------------- |
---|
323 | itcl::body Rappture::Task::abort {} { |
---|
324 | Log run abort |
---|
325 | set _job(control) "abort" |
---|
326 | } |
---|
327 | |
---|
328 | # ---------------------------------------------------------------------- |
---|
329 | # USAGE: reset |
---|
330 | # |
---|
331 | # Resets all input values to their defaults. Sometimes used just |
---|
332 | # before a run to reset to a clean state. |
---|
333 | # ---------------------------------------------------------------------- |
---|
334 | itcl::body Rappture::Task::reset {} { |
---|
335 | $_xmlobj copy "" from $_origxml "" |
---|
336 | foreach path [Rappture::entities -as path $_xmlobj input] { |
---|
337 | if {[$_xmlobj element -as type $path.default] ne ""} { |
---|
338 | set defval [$_xmlobj get $path.default] |
---|
339 | $_xmlobj put $path.current $defval |
---|
340 | } |
---|
341 | } |
---|
342 | } |
---|
343 | |
---|
344 | # ---------------------------------------------------------------------- |
---|
345 | # USAGE: xml <subcommand> ?<arg> <arg> ...? |
---|
346 | # USAGE: xml object |
---|
347 | # |
---|
348 | # Used by clients to manipulate the underlying XML data for this |
---|
349 | # tool. The <subcommand> can be any operation supported by a |
---|
350 | # Rappture::library object. Clients can also request the XML object |
---|
351 | # directly by using the "object" subcommand. |
---|
352 | # ---------------------------------------------------------------------- |
---|
353 | itcl::body Rappture::Task::xml {args} { |
---|
354 | if {"object" == $args} { |
---|
355 | return $_xmlobj |
---|
356 | } |
---|
357 | return [eval $_xmlobj $args] |
---|
358 | } |
---|
359 | |
---|
360 | # ---------------------------------------------------------------------- |
---|
361 | # USAGE: save <xmlobj> ?<filename>? |
---|
362 | # |
---|
363 | # Used by clients to save the contents of an <xmlobj> representing |
---|
364 | # a run out to the given file. If <filename> is not specified, then |
---|
365 | # it uses the -resultsdir and other settings to do what Rappture |
---|
366 | # would normally do with the output. |
---|
367 | # ---------------------------------------------------------------------- |
---|
368 | itcl::body Rappture::Task::save {xmlobj {filename ""}} { |
---|
369 | if {$filename eq ""} { |
---|
370 | |
---|
371 | # If there's a results_directory defined in the resources file, |
---|
372 | # then move the run.xml file there for storage. |
---|
373 | |
---|
374 | set rdir "" |
---|
375 | if {$resultdir eq "@default"} { |
---|
376 | if {[info exists _resources(-resultdir)]} { |
---|
377 | set rdir $_resources(-resultdir) |
---|
378 | } else { |
---|
379 | global rapptureInfo |
---|
380 | set rdir $rapptureInfo(cwd) |
---|
381 | } |
---|
382 | } elseif {$resultdir ne ""} { |
---|
383 | set rdir $resultdir |
---|
384 | } |
---|
385 | |
---|
386 | # use the runfile name generated by the last run |
---|
387 | if {$_job(runfile) ne ""} { |
---|
388 | set filename [file join $rdir $_job(runfile)] |
---|
389 | } else { |
---|
390 | set filename [file join $rdir run.xml] |
---|
391 | } |
---|
392 | } |
---|
393 | |
---|
394 | # add any last-minute metadata |
---|
395 | $xmlobj put output.time [clock format [clock seconds]] |
---|
396 | |
---|
397 | $xmlobj put tool.version.rappture.version $::Rappture::version |
---|
398 | $xmlobj put tool.version.rappture.revision $::Rappture::build |
---|
399 | $xmlobj put output.filename $filename |
---|
400 | $xmlobj put output.version $Rappture::version |
---|
401 | |
---|
402 | if {[info exists ::tcl_platform(user)]} { |
---|
403 | $xmlobj put output.user $::tcl_platform(user) |
---|
404 | } |
---|
405 | |
---|
406 | # save the output |
---|
407 | set rdir [file dirname $filename] |
---|
408 | file mkdir $rdir |
---|
409 | |
---|
410 | set fid [open $filename w] |
---|
411 | puts $fid "<?xml version=\"1.0\"?>" |
---|
412 | puts $fid [$xmlobj xml] |
---|
413 | close $fid |
---|
414 | |
---|
415 | Log output saved in $filename |
---|
416 | } |
---|
417 | |
---|
418 | # ---------------------------------------------------------------------- |
---|
419 | # USAGE: OnOutput <data> |
---|
420 | # |
---|
421 | # Used internally to send each bit of output <data> coming from the |
---|
422 | # tool onto the caller, so the user can see progress. |
---|
423 | # ---------------------------------------------------------------------- |
---|
424 | itcl::body Rappture::Task::OnOutput {data} { |
---|
425 | if {[string length $_outputcb] > 0} { |
---|
426 | uplevel #0 $_outputcb [list $data] |
---|
427 | } |
---|
428 | } |
---|
429 | |
---|
430 | # ---------------------------------------------------------------------- |
---|
431 | # USAGE: Log <cmd> <arg> <arg> ... |
---|
432 | # |
---|
433 | # Used internally to log interesting events during the run. If the |
---|
434 | # -logger option is set (to Rappture::Logger::log, or something like |
---|
435 | # that), then the arguments to this method are passed along to the |
---|
436 | # logger and written out to a log file. Logging is off by default, |
---|
437 | # so this method does nothing unless -logger is set. |
---|
438 | # ---------------------------------------------------------------------- |
---|
439 | itcl::body Rappture::Task::Log {args} { |
---|
440 | if {[string length $logger] > 0} { |
---|
441 | uplevel #0 $logger [list $args] |
---|
442 | } |
---|
443 | } |
---|
444 | |
---|
445 | # ---------------------------------------------------------------------- |
---|
446 | # USAGE: MiddlewareTime <key> <value> ... |
---|
447 | # |
---|
448 | # Used as the default method for reporting job status information. |
---|
449 | # Implements the old HUBzero method of reporting job status info to |
---|
450 | # stderr, which can then be picked up by the tool session container. |
---|
451 | # Most tools use the "submit" command, which talks directly to a |
---|
452 | # database to log job information, so this isn't really needed. But |
---|
453 | # it doesn't hurt to have this and it can be useful in some cases. |
---|
454 | # ---------------------------------------------------------------------- |
---|
455 | itcl::body Rappture::Task::MiddlewareTime {args} { |
---|
456 | set line "MiddlewareTime:" |
---|
457 | foreach {key val} $args { |
---|
458 | append line " $key=$val" |
---|
459 | } |
---|
460 | puts stderr $line |
---|
461 | } |
---|
462 | |
---|
463 | itcl::body Rappture::Task::IsCacheable {} { |
---|
464 | if { ![info exists _resources(-cachehosts)] || |
---|
465 | $_resources(-cachehosts) == "" } { |
---|
466 | puts stderr cachehosts=[info exists _resources(-cachehosts)] |
---|
467 | return 0 |
---|
468 | } |
---|
469 | if 0 { |
---|
470 | set state [$_xmlobj get "tool.cache"] |
---|
471 | puts stderr "cache tag is \"$state\"" |
---|
472 | if { $state == "" } { |
---|
473 | return 0 |
---|
474 | } |
---|
475 | } else { |
---|
476 | set state 1 |
---|
477 | } |
---|
478 | if { ![string is boolean $state] } { |
---|
479 | return 0 |
---|
480 | } |
---|
481 | return $state |
---|
482 | } |
---|
483 | |
---|
484 | # |
---|
485 | # Send the list of parameters to a python program so it can call PUQ |
---|
486 | # and get a CSV file containing the parameter values to use for the runs. |
---|
487 | itcl::body Rappture::Task::GetParamsForUQ {} { |
---|
488 | set pid [pid] |
---|
489 | # puts "puq.sh get_params $pid $_uq(varlist) $_uq(type) $_uq(args)" |
---|
490 | if {[catch { |
---|
491 | exec puq.sh get_params $pid $_uq(varlist) $_uq(type) $_uq(args) |
---|
492 | } errs] != 0 } { |
---|
493 | error "get_params.py failed: $errs\n[GetUQErrors]" |
---|
494 | } |
---|
495 | return "params${pid}.csv" |
---|
496 | } |
---|
497 | |
---|
498 | itcl::body Rappture::Task::SetCpuResourceLimit {} { |
---|
499 | # Set limits for cpu time |
---|
500 | set limit [$_xmlobj get tool.limits.cputime] |
---|
501 | if { $limit == "unlimited" } { |
---|
502 | set limit 43200; # 12 hours |
---|
503 | } else { |
---|
504 | if { [scan $limit "%d" dum] != 1 } { |
---|
505 | set limit 14400; # 4 hours by default |
---|
506 | } elseif { $limit > 43200 } { |
---|
507 | set limit 43200; # limit to 12 hrs. |
---|
508 | } elseif { $limit < 10 } { |
---|
509 | set limit 10; # lower bound is 10 seconds. |
---|
510 | } |
---|
511 | } |
---|
512 | Rappture::rlimit set cputime $limit |
---|
513 | } |
---|
514 | |
---|
515 | # Write out the driver.xml file for the tool |
---|
516 | itcl::body Rappture::Task::GetDriverFile {} { |
---|
517 | global rapptureInfo |
---|
518 | set fileName [file join $rapptureInfo(cwd) "driver[pid].xml"] |
---|
519 | if { [catch { |
---|
520 | set f [open $fileName w] |
---|
521 | puts $f "<?xml version=\"1.0\"?>" |
---|
522 | puts $f [$_xmlobj xml] |
---|
523 | close $f |
---|
524 | } errs] != 0 } { |
---|
525 | error "can't create driver file \"$fileName\": $errs" |
---|
526 | } |
---|
527 | return $fileName |
---|
528 | } |
---|
529 | |
---|
530 | itcl::body Rappture::Task::GetCommand { } { |
---|
531 | set cmd [$_xmlobj get tool.command] |
---|
532 | regsub -all @tool $cmd $_installdir cmd |
---|
533 | set cmd [string trimleft $cmd " "] |
---|
534 | return $cmd |
---|
535 | } |
---|
536 | |
---|
537 | itcl::body Rappture::Task::GetSimulationCommand { driverFile } { |
---|
538 | set cmd [GetCommand] |
---|
539 | if { $cmd == "" } { |
---|
540 | return "" |
---|
541 | } |
---|
542 | regsub -all @driver $cmd $driverFile cmd |
---|
543 | |
---|
544 | switch -glob -- [resources -jobprotocol] { |
---|
545 | "submit*" { |
---|
546 | # if job_protocol is "submit", then use use submit command |
---|
547 | set cmd "submit --local $cmd" |
---|
548 | } |
---|
549 | "mx" { |
---|
550 | # metachory submission |
---|
551 | set cmd "mx $cmd" |
---|
552 | } |
---|
553 | "exec" { |
---|
554 | # default -- nothing special |
---|
555 | } |
---|
556 | } |
---|
557 | return $cmd |
---|
558 | } |
---|
559 | |
---|
560 | itcl::body Rappture::Task::GetUQSimulationCommand { driverFile } { |
---|
561 | set cmd [GetCommand] |
---|
562 | if { $cmd == "" } { |
---|
563 | return "" |
---|
564 | } |
---|
565 | set paramsFile [GetParamsForUQ] |
---|
566 | set cmd [BuildSubmitCommand $cmd $_uq(tfile) $paramsFile] |
---|
567 | file delete -force puq |
---|
568 | return $cmd |
---|
569 | } |
---|
570 | |
---|
571 | itcl::body Rappture::Task::GetUQTemplateFile {} { |
---|
572 | global rapptureInfo |
---|
573 | # Copy xml into a new file |
---|
574 | set templateFile "template[pid].xml" |
---|
575 | set f [open $templateFile w] |
---|
576 | puts $f "<?xml version=\"1.0\"?>" |
---|
577 | puts $f [$_xmlobj xml] |
---|
578 | close $f |
---|
579 | |
---|
580 | # Return a list of the UQ variables and their PDFs. |
---|
581 | # Also turns $uq(tfile) into a template file. |
---|
582 | set _uq(varlist) [lindex [$_xmlobj uq_get_vars $templateFile] 0] |
---|
583 | set _uq(tfile) $templateFile |
---|
584 | return $templateFile |
---|
585 | } |
---|
586 | |
---|
587 | itcl::body Rappture::Task::ExecuteSimulationCommand { cmd } { |
---|
588 | |
---|
589 | set _job(runfile) "" |
---|
590 | set _job(success) 0 |
---|
591 | set _job(exitcode) 0 |
---|
592 | |
---|
593 | # Step 1. Write the command into the run file. |
---|
594 | $_xmlobj put tool.execute $cmd |
---|
595 | |
---|
596 | Log run started |
---|
597 | Rappture::rusage mark |
---|
598 | |
---|
599 | # Step 2. Check if it is a special case "ECHO" command which always |
---|
600 | # succeeds. |
---|
601 | if { [string compare -nocase -length 5 $cmd "ECHO "] == 0 } { |
---|
602 | set _job(stdout) [string range $cmd 5 end] |
---|
603 | set _job(success) 1 |
---|
604 | set _job(exitcode) 0 |
---|
605 | set _job(mesg) "" |
---|
606 | return 1; # Success |
---|
607 | } |
---|
608 | |
---|
609 | # Step 3. Execute the command, collecting its stdout and stderr. |
---|
610 | catch { |
---|
611 | eval blt::bgexec [list [itcl::scope _job(control)]] \ |
---|
612 | -keepnewline yes \ |
---|
613 | -killsignal SIGTERM \ |
---|
614 | -onoutput [list [itcl::code $this OnOutput]] \ |
---|
615 | -output [list [itcl::scope _job(stdout)]] \ |
---|
616 | -error [list [itcl::scope _job(stderr)]] \ |
---|
617 | $cmd |
---|
618 | } result |
---|
619 | |
---|
620 | # Step 4. Check the token and the exit code. |
---|
621 | set logmesg $result |
---|
622 | foreach { token _job(pid) _job(exitcode) mesg } $_job(control) break |
---|
623 | if { $token == "EXITED" } { |
---|
624 | if { $_job(exitcode) != 0 } { |
---|
625 | # This means that the program exited normally but returned a |
---|
626 | # non-zero exitcode. Consider this an invalid result from the |
---|
627 | # program. Append the stderr from the program to the message. |
---|
628 | if {$code > 128} { |
---|
629 | set logmesg "Program signaled: signal was [GetSignal $_job(exitcode)]" |
---|
630 | } else { |
---|
631 | set logmesg "Program finished: non-zero exit code is $_job(exitcode)" |
---|
632 | } |
---|
633 | set _job(mesg) "$logmesg\n\n$_job(stderr)" |
---|
634 | Log run failed [list $logmesg] |
---|
635 | return 0; # Fail. |
---|
636 | } |
---|
637 | # Successful program termination with exit code of 0. |
---|
638 | } elseif { $token == "abort" } { |
---|
639 | # The user pressed the abort button. |
---|
640 | |
---|
641 | set logmesg "Program terminated by user." |
---|
642 | Log run failed [list $logmesg] |
---|
643 | set _job(mesg) "$logmesg\n\n$_job(stdout)" |
---|
644 | return 0; # Fail |
---|
645 | } else { |
---|
646 | # Abnormal termination |
---|
647 | |
---|
648 | set logmesg "Abnormal program termination:" |
---|
649 | Log run failed [list $logmesg] |
---|
650 | set _job(mesg) "$logmesg\n\n$_job(stdout)" |
---|
651 | return 0; # Fail |
---|
652 | } |
---|
653 | if { $_uq(type) != "" } { |
---|
654 | CollectUQResults |
---|
655 | } |
---|
656 | |
---|
657 | # Step 5. Look in stdout for the name of the run file. |
---|
658 | set pattern {=RAPPTURE-RUN=>([^\n]+)} |
---|
659 | if {![regexp $pattern $_job(stdout) match fileName]} { |
---|
660 | set _job(mesg) "Can't find result file in output.\n" |
---|
661 | append _job(mesg) "Did you call Rappture::result in your simulator?" |
---|
662 | return 0; # Fail |
---|
663 | } |
---|
664 | set _job(runfile) $fileName |
---|
665 | set _job(success) 1 |
---|
666 | set _job(mesg) $_job(stdout) |
---|
667 | return 1; # Success |
---|
668 | } |
---|
669 | |
---|
670 | itcl::body Rappture::Task::LogSimulationUsage {} { |
---|
671 | array set times [Rappture::rusage measure] |
---|
672 | |
---|
673 | set toolId [$_xmlobj get tool.id] |
---|
674 | set toolVers [$_xmlobj get tool.version.application.revision] |
---|
675 | set simulation "simulation" |
---|
676 | if { $toolId ne "" && $toolVers ne "" } { |
---|
677 | set simulation "[pid]_${toolId}_r${toolVers}" |
---|
678 | } |
---|
679 | |
---|
680 | # Need to save job info? then invoke the callback |
---|
681 | if { [string length $jobstats] > 0} { |
---|
682 | lappend args \ |
---|
683 | "job" [incr jobnum] \ |
---|
684 | "event" $simulation \ |
---|
685 | "start" $times(start) \ |
---|
686 | "walltime" $times(walltime) \ |
---|
687 | "cputime" $times(cputime) \ |
---|
688 | "status" $_job(exitcode) |
---|
689 | uplevel #0 $jobstats $args |
---|
690 | } |
---|
691 | |
---|
692 | # |
---|
693 | # Scan through stderr channel and look for statements that |
---|
694 | # represent grid jobs that were executed. The statements look |
---|
695 | # like this: |
---|
696 | # |
---|
697 | # MiddlewareTime: job=1 event=simulation start=3.001094 ... |
---|
698 | # |
---|
699 | |
---|
700 | set subjobs 0 |
---|
701 | set pattern {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} |
---|
702 | while { [regexp -indices $pattern $_job(stderr) match] } { |
---|
703 | foreach {p0 p1} $match break |
---|
704 | if { [string index $_job(stderr) $p0] == "\n" } { |
---|
705 | incr p0 |
---|
706 | } |
---|
707 | array unset data |
---|
708 | array set data { |
---|
709 | job 1 |
---|
710 | event simulation |
---|
711 | start 0 |
---|
712 | walltime 0 |
---|
713 | cputime 0 |
---|
714 | status 0 |
---|
715 | } |
---|
716 | foreach arg [lrange [string range $_job(stderr) $p0 $p1] 1 end] { |
---|
717 | foreach {key val} [split $arg =] break |
---|
718 | set data($key) $val |
---|
719 | } |
---|
720 | set data(job) [expr { $jobnum + $data(job) }] |
---|
721 | set data(event) "subsimulation" |
---|
722 | set data(start) [expr { $times(start) + $data(start) }] |
---|
723 | |
---|
724 | set details "" |
---|
725 | foreach key {job event start walltime cputime status} { |
---|
726 | # Add required keys in a particular order |
---|
727 | lappend details $key $data($key) |
---|
728 | unset data($key) |
---|
729 | } |
---|
730 | foreach key [array names data] { |
---|
731 | # Add anything else that the client gave -- venue, etc. |
---|
732 | lappend details $key $data($key) |
---|
733 | } |
---|
734 | |
---|
735 | if {[string length $jobstats] > 0} { |
---|
736 | uplevel #0 $jobstats $details |
---|
737 | } |
---|
738 | |
---|
739 | incr subjobs |
---|
740 | |
---|
741 | # Done -- remove this statement |
---|
742 | set _job(stderr) [string replace $_job(stderr) $p0 $p1] |
---|
743 | } |
---|
744 | incr jobnum $subjobs |
---|
745 | |
---|
746 | # Add cputime info to run.xml file |
---|
747 | if { [catch { |
---|
748 | Rappture::library $_job(runfile) |
---|
749 | } xmlobj] != 0 } { |
---|
750 | error "Can't create rappture library: $xmlobj" |
---|
751 | } |
---|
752 | $xmlobj put output.walltime $times(walltime) |
---|
753 | $xmlobj put output.cputime $times(cputime) |
---|
754 | global env |
---|
755 | if {[info exists env(SESSION)]} { |
---|
756 | $xmlobj put output.session $env(SESSION) |
---|
757 | } |
---|
758 | set _job(xmlobj) $xmlobj |
---|
759 | } |
---|
760 | |
---|
761 | itcl::body Rappture::Task::LogSubmittedSimulationUsage {} { |
---|
762 | array set times [Rappture::rusage measure] |
---|
763 | |
---|
764 | set toolId [$_xmlobj get tool.id] |
---|
765 | set toolVers [$_xmlobj get tool.version.application.revision] |
---|
766 | set simulation "simulation" |
---|
767 | if { $toolId ne "" && $toolVers ne "" } { |
---|
768 | set simulation "[pid]_${toolId}_r${toolVers}" |
---|
769 | } |
---|
770 | |
---|
771 | # Need to save job info? then invoke the callback |
---|
772 | if { [string length $jobstats] > 0} { |
---|
773 | lappend args \ |
---|
774 | "job" [incr jobnum] \ |
---|
775 | "event" $simulation \ |
---|
776 | "start" $times(start) \ |
---|
777 | "walltime" $times(walltime) \ |
---|
778 | "cputime" $times(cputime) \ |
---|
779 | "status" $_job(exitcode) |
---|
780 | uplevel #0 $jobstats $args |
---|
781 | } |
---|
782 | |
---|
783 | # |
---|
784 | # Scan through stderr channel and look for statements that |
---|
785 | # represent grid jobs that were executed. The statements look |
---|
786 | # like this: |
---|
787 | # |
---|
788 | # MiddlewareTime: job=1 event=simulation start=3.001094 ... |
---|
789 | # |
---|
790 | |
---|
791 | set subjobs 0 |
---|
792 | set pattern {(^|\n)MiddlewareTime:( +[a-z]+=[^ \n]+)+(\n|$)} |
---|
793 | while { [regexp -indices $pattern $_job(stderr) match] } { |
---|
794 | foreach {p0 p1} $match break |
---|
795 | if { [string index $_job(stderr) $p0] == "\n" } { |
---|
796 | incr p0 |
---|
797 | } |
---|
798 | array unset data |
---|
799 | array set data { |
---|
800 | job 1 |
---|
801 | event simulation |
---|
802 | start 0 |
---|
803 | walltime 0 |
---|
804 | cputime 0 |
---|
805 | status 0 |
---|
806 | } |
---|
807 | foreach arg [lrange [string range $_job(stderr) $p0 $p1] 1 end] { |
---|
808 | foreach {key val} [split $arg =] break |
---|
809 | set data($key) $val |
---|
810 | } |
---|
811 | set data(job) [expr { $jobnum + $data(job) }] |
---|
812 | set data(event) "subsimulation" |
---|
813 | set data(start) [expr { $times(start) + $data(start) }] |
---|
814 | |
---|
815 | set details "" |
---|
816 | foreach key {job event start walltime cputime status} { |
---|
817 | # Add required keys in a particular order |
---|
818 | lappend details $key $data($key) |
---|
819 | unset data($key) |
---|
820 | } |
---|
821 | foreach key [array names data] { |
---|
822 | # Add anything else that the client gave -- venue, etc. |
---|
823 | lappend details $key $data($key) |
---|
824 | } |
---|
825 | |
---|
826 | if {[string length $jobstats] > 0} { |
---|
827 | uplevel #0 $jobstats $details |
---|
828 | } |
---|
829 | |
---|
830 | incr subjobs |
---|
831 | |
---|
832 | # Done -- remove this statement |
---|
833 | set _job(stderr) [string replace $_job(stderr) $p0 $p1] |
---|
834 | } |
---|
835 | incr jobnum $subjobs |
---|
836 | |
---|
837 | # Add cputime info to run.xml file |
---|
838 | if { [catch { |
---|
839 | Rappture::library $_job(runfile) |
---|
840 | } xmlobj] != 0 } { |
---|
841 | error "Can't create rappture library: $xmlobj" |
---|
842 | } |
---|
843 | set _job(xmlobj) $xmlobj |
---|
844 | } |
---|
845 | |
---|
846 | itcl::body Rappture::Task::LogCachedSimulationUsage {} { |
---|
847 | if { [catch { |
---|
848 | Rappture::library $_job(runfile) |
---|
849 | } xmlobj] != 0 } { |
---|
850 | error "Can't create rappture library: $xmlobj" |
---|
851 | } |
---|
852 | # Get the session from runfile |
---|
853 | set session [$xmlobj get "output.session"] |
---|
854 | if { [catch {exec submit --cache $session} result] != 0 } { |
---|
855 | puts stderr "submit --cache failed: $result" |
---|
856 | } |
---|
857 | set _job(xmlobj) $xmlobj |
---|
858 | } |
---|
859 | |
---|
860 | |
---|
861 | itcl::body Rappture::Task::CheckForCachedRunFile { driverFile } { |
---|
862 | |
---|
863 | # Read the driver file and collect its contents as the query. |
---|
864 | set url http://$_resources(-cachehosts)/cache/request |
---|
865 | set f [open $driverFile "r"] |
---|
866 | set query [read $f] |
---|
867 | close $f |
---|
868 | |
---|
869 | # Make the query |
---|
870 | if { [catch { |
---|
871 | http::geturl $url -query $query -timeout 6000 -binary yes |
---|
872 | } token] != 0 } { |
---|
873 | puts stderr "error performing cache query: token=$token" |
---|
874 | return 0 |
---|
875 | } |
---|
876 | # If the code isn't 200, we'll assume it's a cache miss. |
---|
877 | if { [http::ncode $token] != 200} { |
---|
878 | return 0 |
---|
879 | } |
---|
880 | # Get contents of the run file. |
---|
881 | set contents [http::data $token] |
---|
882 | if { $contents == "" } { |
---|
883 | return 0 |
---|
884 | } |
---|
885 | |
---|
886 | # Create a new run.xml file and write the results into it. |
---|
887 | set secs [clock seconds] |
---|
888 | set millisecs [expr [clock clicks -milliseconds] % 1000] |
---|
889 | set timestamp [format %d%03d%03d $secs $millisecs 0] |
---|
890 | |
---|
891 | global rapptureInfo |
---|
892 | set fileName [file join $rapptureInfo(cwd) "run${timestamp}.xml"] |
---|
893 | set f [open $fileName "w"] |
---|
894 | puts $f $contents |
---|
895 | close $f |
---|
896 | set _job(runfile) $fileName |
---|
897 | set _job(success) 1 |
---|
898 | set _job(stderr) "Loading cached results\n" |
---|
899 | OnOutput "Loading cached results\n" |
---|
900 | update |
---|
901 | return 1 |
---|
902 | } |
---|
903 | |
---|
904 | itcl::body Rappture::Task::GetUQErrors {} { |
---|
905 | set contents {} |
---|
906 | if { [file exists "uq_debug.err"] } { |
---|
907 | set f [open "uq_debug.err" r] |
---|
908 | set contents [read $f] |
---|
909 | close $f |
---|
910 | } |
---|
911 | return $contents |
---|
912 | } |
---|
913 | |
---|
914 | # UQ. Collect data from all jobs and put it in one xml run file. |
---|
915 | itcl::body Rappture::Task::CollectUQResults {} { |
---|
916 | file delete -force -- "run_uq.xml" |
---|
917 | set hdfFile puq_[pid].hdf5 |
---|
918 | if { [catch { |
---|
919 | exec puq.sh analyze $hdfFile |
---|
920 | } results] != 0 } { |
---|
921 | error "UQ analysis failed: $results\n[GetUQErrors]" |
---|
922 | } else { |
---|
923 | set _job(stdout) $results |
---|
924 | } |
---|
925 | } |
---|