# copyright (C) 1997-2006 Jean-Luc Fontaine (mailto:jfontain@free.fr)
# this program is free software: please read the COPYRIGHT file enclosed in this package or use the Help Copyright menu

# $Id: predwork.tcl,v 1.20 2006/01/28 19:16:59 jfontain Exp $


class predictor {

    set (runningProcesses) 0
    set (neural,code) \
{library(nnet)
neural = function(values, lags, hidden) {               # values between 0 and 1
    length = length(values)
    lag = max(lags)            # subtract from length to get number of trainings
    x = matrix(NA, length - lag, length(lags))            # rows of input values
    for (column in 1:length(lags))                      # as many inputs as lags
        x[,column] = values[(lag + 1 - lags[column]):(length - lags[column])]
    y = values[(lag + 1):length]             # target values (after largest lag)
    minimum = Inf                                 # minimum of fitting criterion
    count = 100                   # number of tries with different randomization
    while (count > 0) {
        network = nnet(        # use linear output units for finding the minimum
            x, y, size = hidden, linout = 1, maxit = 1000, trace = 0, reltol = 0
        )   # note: a relative tolerance of 0.0001 speeds things up dramatically
        if (network$val < minimum) {                        # found a better fit
            minimum = network$value
            best = network
        }
        count = count - 1
    }
    if (!exists('best')) return(NA) else return(best)
}
predicted = function(values, lags, hidden, predict) {   # values between 0 and 1
    neural = neural(values, lags, hidden)                            ;# best fit
    if (is.null(neural)) return(NA)                         # fatal error: abort
    # target values (after largest lag):
    y = values[(max(lags) + 1):length(values)]
    count = predict                                 # predict a number of points
    while (count > 0) {
        new = y[length(y) - lags + 1]          # use last lagged values as input
        y = c(y, predict(neural, new))                  # append predicted value
        count = count - 1                                          # and iterate
    }
    return(y[(length(y) - predict + 1):length(y)])            # predicted values
}}

    proc waitForProcesses {} {
        while {$(runningProcesses) >= $global::predictorMaximumProcesses} {
            vwait predictor::(runningProcesses)                            ;# wait for other predictors to finish their calculations
        }
    }

    # return interpolation method actually used and time series with start, period and values
    # attention: start and period can be floating point numbers
    proc interpolate {preferredMethod intervals instants values} {              ;# note: could take a little time on a slow computer
        switch $preferredMethod {spline - linear {} default {error "invalid method: $preferredMethod"}}
        if {($intervals == 0) || ([llength $instants] == 0) || ([llength $values] == 0)} {
            return [list {} 0 0 {}]
        }
        foreach instant $instants value $values {
            if {[info exists x]} {
                append x ,
            } else {
                set origin $instant                                                ;# avoid large values by starting instants from 0
                set x c(
            }
            append x [expr {$instant - $origin}]
        }
        append x )
        set y c([join $values ,])
        set points [expr {$intervals + 1}]                                                                 ;# number of data entries
        set code(spline)\
            "z=spline($x,$y,n=$points,method='fmm');cat('{',z\$x\[1\],' ',z\$x\[length(z\$x)\],'} {',z\$y,'}\\n');quit()"
        set code(linear)\
            "z=approx($x,$y,n=$points,method='linear');cat('{',z\$x\[1\],' ',z\$x\[length(z\$x)\],'} {',z\$y,'}\\n');quit()"
        if {$preferredMethod eq "spline"} {
            set methods [list spline linear]             ;# try interpolating with spline first and if that fails with simple method
        } else {
            set methods linear
        }
        set instants {}; set values {}; set start 0; set end 0
        foreach method $methods {
            set task [new task $code($method)]
            switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
            vwait predictor::(processed,$task)
            if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
                foreach {instants values} $task::($task,lists) break
                foreach {start end} $instants {}
            } else {
                set (processed,$task) 0                                                                           ;# continue trying
            }
            set done $(processed,$task); unset (processed,$task)
            delete $task
            if {$done} break
        }
        return [list $method [expr {$origin + $start}] [expr {($end - $start) / double($intervals)}] $values]
    }

    proc period {period values} {                                                               ;# try to find main period using FFT
        set y c([join $values ,])
        set task [new task "s=spectrum($y,plot=0);cat('{',s\$freq\[which.max(s\$spec)\],'}\\n');quit()"]           ;# main frequency
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            set frequency [lindex $task::($task,lists) 0]
            set value [expr {round($period / $frequency)}]                                                             ;# in seconds
        } else {
            set value {}
        }
        unset (processed,$task)
        delete $task
        return $value
    }

    proc aggregate {by start period values} {
        if {$by <= 1} {                                                                                     ;# no aggregation needed
            return [list $start $period $values]
        }
        set skip [expr {[llength $values] % $by}]
        set y c([join [lrange $values $skip end] ,])
        # set task [new task "cat('{',aggregate(as.ts($y),1/$by,mean),'}\\n');quit()"]
        set task [new task "cat('{',aggregate(as.ts($y),1/$by.000000001,mean),'}\\n');quit()"]       ;# (work around bug in R 2.2.0)
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            set values [lindex $task::($task,lists) 0]
        } else {
            set values {}
        }
        unset (processed,$task)
        delete $task
        # adjust according to number of samples skipped and new period:
        set start [expr {$start + ($period * ($skip + ($by / 2.0)))}]
        return [list $start [expr {$period * $by}] $values]
    }

    proc smooth {degree start period values} {                                                       ;# degree can vary from 0 to 10
        if {$degree <= 0} {                                                                                   ;# no smoothing needed
            return [list $start $period $values]
        }
        if {$degree > 10} {
            return [list $start $period {}]
        }
        set length [llength $values]
        set y c([join $values ,])
        set task [new task "cat('{',smooth.spline($y,spar=$degree/10)\$y,'}\\n');quit()"]
        # set task [new task "cat('{',kernapply($y,kernel('modified.daniell',$degree)),'}\\n');quit()"]    ;# kernel (adjust degree)
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            set values [lindex $task::($task,lists) 0]
            # adjust according to new number of samples (for kernel smoothing):
            # set start [expr {$start + ($period * (($length - [llength $values]) / 2.0))}]
        } else {
            set values {}
        }
        unset (processed,$task)
        delete $task
        return [list $start $period $values]
    }

    proc arimaFit {this values period command progressCommand} {
        # period is empty for non periodic data, 0 for unknown period
        variable ${this}arimaParameters

        array unset {} $this,arima,*
        set ($this,arima,command) $command
        set ($this,arima,progressCommand) $progressCommand
        set ($this,arima,series) c([join $values ,])
        set ($this,arima,period) $period
        for {set p 0} {$p <= 2} {incr p} {
            for {set d 0} {$d <= 2} {incr d} {
                for {set q 0} {$q <= 2} {incr q} {
                    set ${this}arimaParameters($p,$d,$q,,,) {}
                    if {$period ne ""} {
                        for {set P 0} {$P <= 2} {incr P} {
                            for {set D 0} {$D <= 2} {incr D} {
                                for {set Q 0} {$Q <= 2} {incr Q} {
                                    set ${this}arimaParameters($p,$d,$q,$P,$D,$Q) {}
                                }
                            }
                        }
                    }
                }
            }
        }
        set ($this,arima,tasks) {}
        set ($this,arima,totalTasks) [array size ${this}arimaParameters]
        arimaProcess $this 0 0 0 0 {} {} {}                                                                          ;# boot process
    }

    proc arimaProcess {this task p d q P D Q} {                                                     ;# note: P, D and Q may be empty
        variable ${this}arimaParameters

        if {$task != 0} {
            if {![info exists ($this,arima,tasks)] || ([lsearch -exact $($this,arima,tasks) $task] < 0)} {
                catch {delete $task}                                                                            ;# old and lost task
                return                                                                                                     ;# ignore
            }
            if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
                foreach {coefficients values} $task::($task,lists) break       ;# coefficients is number of parameters for the model
                set squares 0
                foreach value $values {                                                                 ;# compute squared residuals
                    set squares [expr {$squares + pow($value, 2)}]
                }
                set number [expr {double([llength $values])}]                                   ;# force floating-point calculations
                # Schwarz-Bayesian information criterion:
                set criterion [expr {(($coefficients / $number) * log($number)) + log($squares / $number)}]
                if {![info exists ($this,arima,criterion)] || ($criterion < $($this,arima,criterion))} {               ;# better fit
                    set ($this,arima,criterion) $criterion
                    set ($this,arima,parameters) [list $p $d $q $P $D $Q]
                    set ($this,arima,values) $values
                }
            } else {
                set values {}                                                                                             ;# failure
            }
            delete $task
            ldelete ($this,arima,tasks) $task
            if {$($this,arima,progressCommand) ne ""} {
                uplevel #0 $($this,arima,progressCommand) [expr {\
                    1 - double([array size ${this}arimaParameters]) / $($this,arima,totalTasks)\
                }]                                                                                    ;# use unit value, not percent
            }
            incr (runningProcesses) -1
        }
        if {[array size ${this}arimaParameters] == 0} {                                                               ;# almost done
            if {[llength $($this,arima,tasks)] == 0} {                                                                       ;# done
                set parameters {}; set values {}; set criterion {}
                if {[info exists ($this,arima,values)]} {
                    set parameters $($this,arima,parameters)
                    set values $($this,arima,values)
                    set criterion $($this,arima,criterion)
                }                                                                                             ;# else global failure
                set command $($this,arima,command)
                unset ${this}arimaParameters
                array unset {} $this,arima,*
                if {$command ne ""} {
                    uplevel #0 $command [list $values ARIMA $parameters $criterion]
                }
            }
            return
        }
        set parameters [lsort -dictionary [array names ${this}arimaParameters]]
        set index 0
        waitForProcesses
        while {$(runningProcesses) < $global::predictorMaximumProcesses} {
            if {![info exists ($this,arima,tasks)]} break                                                   ;# may have been aborted
            incr (runningProcesses)
            set key [lindex $parameters $index]
            unset ${this}arimaParameters($key)
            foreach {p d q P D Q} [split $key ,] {}                                             ;# note: P, D and Q may be undefined
            # return number of coefficients and residual values:
            if {$P ne ""} {                                                                                              ;# periodic
                set period $($this,arima,period)
                if {$period == 0} {set period NA}                                                                  ;# period unknown
                set code "a=arima($($this,arima,series),c($p,$d,$q),list(order=c($P,$D,$Q),period=$period));cat('{',length(a\$coef),'} {',a\$residuals,'}\\n');quit()"
            } else {
                set code "a=arima($($this,arima,series),c($p,$d,$q));cat('{',length(a\$coef),'} {',a\$residuals,'}\\n');quit()"
            }
            set task [new task $code]
            switched::configure $task -command "predictor::arimaProcess $this $task [list $p $d $q $P $D $Q]"
            lappend ($this,arima,tasks) $task
            incr index
        }
    }

    proc arimaAbort {this} {                                           ;# could be invoked at any moment and for any number of times
        variable ${this}arimaParameters

        set tasks {}; catch {set tasks $($this,arima,tasks)}
        array unset {} $this,arima,*
        unset -nocomplain ${this}arimaParameters
        foreach task $tasks {
            catch {eval exec kill [task::processIdentifiers $task]}                                     ;# try to kill sub-processes
            catch {delete $task}
            incr (runningProcesses) -1
        }
    }

    proc arimaPredict {this points parameters values period} {                              ;# points is number of points to predict
        # parameters is the list of p, d, q, P, D, and Q, with the last 3 possibly empty
        set y c([join $values ,])
        foreach {p d q P D Q} $parameters {}                                                        ;# note: P, D and Q may be empty
        if {$P ne ""} {                                                                                                  ;# periodic
            if {$period == 0} {set period NA}                                                                      ;# period unknown
            set task [new task\
                "cat('{',predict(arima($y,c($p,$d,$q),list(order=c($P,$D,$Q),period=$period)),$points)\$pred,'}\\n');quit()"\
            ]
        } else {
            set task [new task "cat('{',predict(arima($y,c($p,$d,$q)),$points)\$pred,'}\\n');quit()"]
        }
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            set values [lindex $task::($task,lists) 0]
        } else {
            set values {}                                                                       ;# errors reported via trace feature
        }
        unset (processed,$task)
        delete $task
        return $values
    }

    proc arimaFitModel {this parameters values period} {
        # parameters is the list of p, d, q, P, D, and Q, with the last 3 possibly empty
        set y c([join $values ,])
        foreach {p d q P D Q} $parameters {}                                                        ;# note: P, D and Q may be empty
        if {$P ne ""} {                                                                                                  ;# periodic
            if {$period == 0} {set period NA}                                                                      ;# period unknown
            set code "a=arima($y,c($p,$d,$q),list(order=c($P,$D,$Q),period=$period));cat('{',length(a\$coef),'} {',a\$residuals,'}\\n');quit()"
        } else {
            set code "a=arima($y,c($p,$d,$q));cat('{',length(a\$coef),'} {',a\$residuals,'}\\n');quit()"
        }
        set task [new task $code]
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            foreach {coefficients values} $task::($task,lists) break           ;# coefficients is number of parameters for the model
            set squares 0
            foreach value $values {                                                                     ;# compute squared residuals
                set squares [expr {$squares + pow($value, 2)}]
            }
            set number [expr {double([llength $values])}]                                       ;# force floating-point calculations
            # Schwarz-Bayesian information criterion:
            set criterion [expr {(($coefficients / $number) * log($number)) + log($squares / $number)}]
        } else {
            set values {}                                                                       ;# errors reported via trace feature
            set criterion 0
        }
        unset (processed,$task)
        delete $task
        return [list $values $criterion]
    }

    proc neuralFit {this values period longPeriod command progressCommand} {                      ;# based on arima{} implementation
        # period is empty for non periodic data, 0 for unknown period
        variable ${this}neuralParameters

        if {$period eq ""} {set period 0}
        if {$longPeriod eq ""} {set longPeriod 0}
        array unset {} $this,neural,*
        set ($this,neural,command) $command
        set ($this,neural,progressCommand) $progressCommand
        set ($this,neural,series) c([join $values ,])
        set ($this,neural,period) $period
        set ($this,neural,longPeriod) $longPeriod
        set maximum(lag) 2; set maximum(midLag) 2; set maximum(longLag) 2
        if {$period == 0} {
            set maximum(lag) 8
            set limit [expr {([llength $values] / 4) + 1}]                                                ;# reasonable non-zero lag
            if {$maximum(lag) > $limit} {set maximum(lag) $limit}
        } elseif {$longPeriod == 0} {
            set maximum(lag) 3
            set maximum(midLag) 3
        }
        for {set hidden 1} {$hidden <= 3} {incr hidden} {
            for {set lag 1} {$lag <= $maximum(lag)} {incr lag} {
                set ${this}neuralParameters($hidden,$lag,,) {}
                if {$period > 0} {
                    for {set midLag 1} {$midLag <= $maximum(midLag)} {incr midLag} {
                        set ${this}neuralParameters($hidden,$lag,$midLag,) {}
                        if {$longPeriod > 0} {
                            for {set longLag 1} {$longLag <= $maximum(longLag)} {incr longLag} {
                                set ${this}neuralParameters($hidden,$lag,$midLag,$longLag) {}
                            }
                        }
                    }
                }
            }
        }
        set ($this,neural,tasks) {}
        set ($this,neural,totalTasks) [array size ${this}neuralParameters]
        neuralProcess $this 0 0 0 {} {}                                                                              ;# boot process
    }

    proc neuralProcess {this task hidden lag midLag longLag} {                               ;# note: mid and long lags may be empty
        variable ${this}neuralParameters

        if {$task != 0} {
            if {![info exists ($this,neural,tasks)] || ([lsearch -exact $($this,neural,tasks) $task] < 0)} {
                catch {delete $task}                                                                            ;# old and lost task
                return                                                                                                     ;# ignore
            }
            if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
                foreach {weights values} $task::($task,lists) break                 ;# weights is number of parameters for the model
                set squares 0
                foreach value $values {                                                                 ;# compute squared residuals
                    set squares [expr {$squares + pow($value, 2)}]
                }
                set number [expr {double([llength $values])}]                                   ;# force floating-point calculations
                # Schwarz-Bayesian information criterion:
                set criterion [expr {(($weights / $number) * log($number)) + log($squares / $number)}]
                if {![info exists ($this,neural,criterion)] || ($criterion < $($this,neural,criterion))} {             ;# better fit
                    set ($this,neural,criterion) $criterion
                    set ($this,neural,parameters) [list $hidden $lag $midLag $longLag]
                    set ($this,neural,values) $values
                }
            } else {
                set values {}                                                                                             ;# failure
            }
            delete $task
            ldelete ($this,neural,tasks) $task
            if {$($this,neural,progressCommand) ne ""} {
                uplevel #0 $($this,neural,progressCommand) [expr {\
                    1 - double([array size ${this}neuralParameters]) / $($this,neural,totalTasks)\
                }]                                                                                    ;# use unit value, not percent
            }
            incr (runningProcesses) -1
        }
        if {[array size ${this}neuralParameters] == 0} {                                                              ;# almost done
            if {[llength $($this,neural,tasks)] == 0} {                                                                      ;# done
                set parameters {}; set values {}; set criterion {}
                if {[info exists ($this,neural,values)]} {
                    set parameters $($this,neural,parameters)
                    set values $($this,neural,values)
                    set criterion $($this,neural,criterion)
                }                                                                                             ;# else global failure
                set command $($this,neural,command)
                unset ${this}neuralParameters
                array unset {} $this,neural,*
                if {$command ne ""} {
                    uplevel #0 $command [list $values neural $parameters $criterion]
                }
            }
            return
        }
        set parameters [lsort -dictionary [array names ${this}neuralParameters]]
        set index 0
        waitForProcesses
        while {$(runningProcesses) < $global::predictorMaximumProcesses} {
            if {![info exists ($this,neural,tasks)]} break                                                  ;# may have been aborted
            incr (runningProcesses)
            set key [lindex $parameters $index]
            unset ${this}neuralParameters($key)
            foreach {hidden lag midLag longLag} [split $key ,] {}                        ;# note: mid and long lags may be undefined
            set lags [neuralLags $($this,neural,period) $($this,neural,longPeriod) $hidden $lag $midLag $longLag]
            set code "$(neural,code)\n"
            # return number of weights and residual values:
            append code "n=neural($($this,neural,series),$lags,$hidden);cat('{',length(n\$wts),'} {',n\$residuals,'}\\n');quit()"
            set task [new task $code]
            switched::configure $task -command "predictor::neuralProcess $this $task [list $hidden $lag $midLag $longLag]"
            lappend ($this,neural,tasks) $task
            incr index
        }
    }

    proc neuralAbort {this} {                                          ;# could be invoked at any moment and for any number of times
        variable ${this}neuralParameters

        set tasks {}; catch {set tasks $($this,neural,tasks)}
        array unset {} $this,neural,*
        unset -nocomplain ${this}neuralParameters
        foreach task $tasks {
            catch {eval exec kill [task::processIdentifiers $task]}                                     ;# try to kill sub-processes
            catch {delete $task}
            incr (runningProcesses) -1
        }
    }

    proc neuralPredict {this points parameters values period longPeriod} {                  ;# points is number of points to predict
        # parameters is the list of hidden neurons, lag, mid-lag, long lag, with the last 2 possibly empty
        set y c([join $values ,])
        foreach {hidden lag midLag longLag} $parameters {}
        set lags [neuralLags $period $longPeriod $hidden $lag $midLag $longLag]
        set code "$(neural,code)\n"
        append code "cat('{',predicted($y,$lags,$hidden,$points),'}\\n');quit()"
        set task [new task $code]
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            set values [lindex $task::($task,lists) 0]
        } else {
            set values {}                                                                       ;# errors reported via trace feature
        }
        unset (processed,$task)
        delete $task
        return $values
    }

    proc neuralFitModel {this parameters values period longPeriod} {
        # parameters is the list of hidden neurons, lag, mid-lag, long lag, with the last 2 possibly empty
        set y c([join $values ,])
        foreach {hidden lag midLag longLag} $parameters {}
        set lags [neuralLags $period $longPeriod $hidden $lag $midLag $longLag]
        set code "$(neural,code)\n"
        append code "n=neural($y,$lags,$hidden);cat('{',length(n\$wts),'} {',n\$residuals,'}\\n');quit()"
        set task [new task $code]
        switched::configure $task -command "predictor::trace $task; set predictor::(processed,$task) 1"
        vwait predictor::(processed,$task)
        if {[info exists task::($task,lists)] && ([llength $task::($task,lists)] > 0)} {
            foreach {weights values} $task::($task,lists) break                     ;# weights is number of parameters for the model
            set squares 0
            foreach value $values {                                                                     ;# compute squared residuals
                set squares [expr {$squares + pow($value, 2)}]
            }
            set number [expr {double([llength $values])}]                                       ;# force floating-point calculations
            # Schwarz-Bayesian information criterion:
            set criterion [expr {(($weights / $number) * log($number)) + log($squares / $number)}]
        } else {
            set values {}                                                                       ;# errors reported via trace feature
            set criterion 0
        }
        unset (processed,$task)
        delete $task
        return [list $values $criterion]
    }

    proc neuralLags {period longPeriod hidden lag midLag longLag} {                          ;# note: mid and long lags may be empty
        for {set value 1} {$value <= $lag} {incr value} {                                         ;# short lag obviously cannot be 0
            if {[info exists lags]} {append lags ,} else {set lags c(}
            append lags $value
        }
        if {$midLag ne ""} {                                                                                             ;# periodic
            for {set value 0} {$value < $midLag} {incr value} {
                append lags ,[expr {$period + $value}]                                                  ;# lag from period, included
            }
            if {$longLag ne ""} {                                                                                     ;# long period
                for {set value 0} {$value < $longLag} {incr value} {                                    ;# lag from period, included
                    append lags ,[expr {$longPeriod + $value}]
                }
            }
        }
        append lags )
        return $lags
    }

    proc trace {task} {
        if {[info exists task::($task,communication,error)]} {
if {$global::withGUI} { ;# >8
            residentTraceModule 1
            modules::trace {} moodss(predictor) "system error: $task::($task,communication,error)"
} ;# >8
        }
        if {[info exists task::($task,errors)]} {
if {$global::withGUI} { ;# >8
            residentTraceModule 1
            modules::trace {} moodss(predictor) "engine errors: $task::($task,errors)"
} ;# >8
        }
        if {[info exists task::($task,warnings)]} {
if {$global::withGUI} { ;# >8
            residentTraceModule 1
            modules::trace {} moodss(predictor) "engine warnings: $task::($task,warnings)"
} ;# >8
        }
        if {[info exists task::($task,information)]} {
if {$global::withGUI} { ;# >8
            residentTraceModule 1
            modules::trace {} moodss(predictor) "engine information: $task::($task,information)"
} ;# >8
        }
    }

}
