Skip to content

Commit 5eb5d55

Browse files
authored
Merge pull request #37 from JuliaRobotics/feat/3Q20/injdelay
add injectDelayBefore and watchdog
2 parents ff57df7 + 7324e6b commit 5eb5d55

File tree

4 files changed

+84
-5
lines changed

4 files changed

+84
-5
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name = "FunctionalStateMachine"
22
uuid = "3e9e306e-7e3c-11e9-12d2-8f8f67a2f951"
33
keywords = ["state machine"]
44
desc = "Functional state machine with stepping and visualization tools."
5-
version = "0.2.7"
5+
version = "0.2.8"
66

77
[deps]
88
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

README.md

+20
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,26 @@ statemachine = StateMachine{Nothing}(next=foo!)
5555
while statemachine(nothing, iterlimit=1); end
5656
```
5757

58+
### Watchdog Timeout
59+
60+
Sometimes it is useful to know that an FSM process will exit, either as intended or by throwing an error on timeout (much like a [Watchdog Timer](https://en.wikipedia.org/wiki/Watchdog_timer)). FSM uses Base.`InterruptException()` as a method of stopping a task that expires a `timeout::Real` [seconds]. Note, this functionality is not included by default in order to preserve a small memory footprint. To use the timeout feature simply call the state machine with a timeout duration:
61+
```julia
62+
userdata = nothing # any user data of type T
63+
timeout = 3.0
64+
while statemachine(userdata, timeout, verbose=true); end
65+
```
66+
67+
### Recording Verbose Output to File
68+
69+
Experience has shown that when a state machine gets stuck, it is often useful to write the `verbose` steps out to file as a bare minimum guide of where a system might be failing. This can be done by passing in a `::IOStream` handle into `verbosefid`:
70+
```julia
71+
fid = open("/tmp/verboseFSM_001.log","w")
72+
while statemachine(userdata, verbose=true, verbosefid=fid); end
73+
close(fid)
74+
```
75+
76+
This particular structure is choosen so that `@async` or other multithreaded uses of FSM can still write to a common `fid` and also allow the user to `flush(fid)` and `close(fid)` regardless of whether the FSM has stalled. Might seem "boilerplate-esque", but it's much easier for developers to snuff out bugs in highly complicted interdependent and multithreaded, multi-state-machine architectures.
77+
5878
## With User Data and History
5979

6080
```julia

src/StateMachine.jl

+40-4
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,25 @@ mutable struct StateMachine{T}
2525
StateMachine{T}(;next=emptyState, iter::Int=0, name::AbstractString="") where T = new{T}(next, iter, Vector{Tuple{DateTime, Int, Function, T}}(), name)
2626
end
2727

28+
29+
2830
"""
2931
$SIGNATURES
3032
3133
Run state machine function (as functor).
3234
35+
Notes
36+
- `timeout::Union{Real,Nothing}` is optional with default `=nothing`.
37+
- this code is skipped in lowered and llvm code if not used
38+
- subroutine will use `pollinterval::Real` [seconds] to interrogate during `timeout::Real` [seconds] period.
39+
- can stop FSM early by using any of the following:
40+
- `breakafter`, `iterlimit`.
41+
- Can `injectDelayBefore` a function `st.next` to help with debugging.
42+
- can print FSM steps with `verbose=true`.
43+
- `verbosefid::IOStream` is used as destination for verbose output, default is `stdout`.
44+
- FSM steps and `userdata` can be recorded in standard `history` format using `recordhistory=true`.
45+
- `housekeeping_cb` is callback to give user access to `StateMachine` internals and opportunity to insert bespoke operations.
46+
3347
Example
3448
```julia
3549
bar!(usrdata) = IncrementalInference.exitStateMachine
@@ -40,23 +54,45 @@ usrdata = nothing
4054
while st(usrdata); end
4155
```
4256
"""
43-
function (st::StateMachine{T})( userdata::T=nothing;
57+
function (st::StateMachine{T})( userdata::T=nothing,
58+
timeout::Union{Nothing,<:Real}=nothing;
59+
pollinterval::Real=0.05,
4460
breakafter::Function=exitStateMachine,
4561
verbose::Bool=false,
62+
verbosefid=stdout,
4663
iterlimit::Int=-1,
64+
injectDelayBefore::Union{Nothing,Pair{<:Function, <:Real}}=nothing,
4765
recordhistory::Bool=false,
4866
housekeeping_cb::Function=(st)->() ) where {T}
4967
#
5068
st.iter += 1
5169
# verbose print to help debugging
52-
!verbose ? nothing : println("FSM $(st.name), iter=$(st.iter) -- $(st.next)")
70+
!verbose ? nothing : println(verbosefid, "FSM $(st.name), iter=$(st.iter) -- $(st.next)")
5371
# early exit plumbing
5472
retval = st.next != breakafter && (iterlimit == -1 || st.iter < iterlimit)
5573
# record steps for later
56-
recordhistory ? push!(st.history, (Dates.now(), st.iter, deepcopy(st.next), deepcopy(userdata))) : nothing
74+
T0 = Dates.now()
75+
recordhistory ? push!(st.history, (T0, st.iter, deepcopy(st.next), deepcopy(userdata))) : nothing
5776
# user has some special situation going on.
5877
housekeeping_cb(st)
59-
st.next = st.next(userdata)
78+
(injectDelayBefore !== nothing && injectDelayBefore[1] == st.next) ? sleep(injectDelayBefore[2]) : nothing
79+
if timeout === nothing
80+
# no watchdog, just go and optimize llvm lowered code
81+
st.next = st.next(userdata)
82+
else
83+
# add the watchdog into the llvm lowered code
84+
currtsk = current_task()
85+
# small amount of memory usage, but must guarantee InterruptException is not accidently fired during next step.
86+
doneWatchdog = Base.RefValue{Int}(0)
87+
wdt = @async begin
88+
# wait for watchdog timeperiod in a seperate co-routine
89+
res = timedwait(()->doneWatchdog[]==1, timeout, pollint=pollinterval)
90+
# Two requirements needed to interrupt FSM step
91+
res == :timed_out && doneWatchdog[] == 0 ? schedule(currtsk, InterruptException(), error=true) : nothing
92+
end
93+
st.next = st.next(userdata)
94+
doneWatchdog[] = 1
95+
end
6096
return retval
6197
end
6298

test/testStateMachine.jl

+23
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,29 @@ while statemachine(nothing, verbose=true); end
5252
end
5353

5454

55+
@testset "test watchdog timeout" begin
56+
57+
function longwait(x)
58+
@info "starting stagnant function call, but should not see it's end (but watchdog timeout)"
59+
while true
60+
print(".")
61+
sleep(0.5)
62+
end
63+
@info "done with longwait"
64+
return exitStateMachine
65+
end
66+
67+
statemachine = StateMachine{Nothing}(next=longwait)
68+
try
69+
while statemachine(nothing, 2.0, verbose=true); end
70+
catch e
71+
@info " watchdog test, successfully caught exception for stagnant FSM step"
72+
@test_throws InterruptException throw(e)
73+
end
74+
75+
end
76+
77+
5578
@testset "test recording and rendering of an FSM run" begin
5679

5780
statemachine = StateMachine{Nothing}(next=foo!)

0 commit comments

Comments
 (0)