# HG changeset patch # User Guido Berhoerster # Date 1413744279 -7200 # Node ID 8c5330f6e9e4a1f5da94cc3222fa7e738fbcc214 Initial revision diff -r 000000000000 -r 8c5330f6e9e4 relmon.tcl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/relmon.tcl Sun Oct 19 20:44:39 2014 +0200 @@ -0,0 +1,1664 @@ +#!/usr/bin/tclsh +# +# Copyright (C) 2011 Guido Berhoerster +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +package require Tcl 8.5 +package require http +package require tls +package require tdom +package require try +package require cmdline +package require control +package require html +package require htmlparse +package require json +package require json::write +package require logger +package require logger::utils +package require textutil::split +package require uri +package require uri::urn + + +namespace eval ::relmon { + # version + variable VERSION 1 +} + + +namespace eval ::relmon::common { + namespace export cmpVersions isUrlValid urlGetHost parseStateFile +} + +# implementation of the Debian version comparison algorithm described at +# http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version +proc ::relmon::common::cmpVersion {v1 v2} { + set v1Len [string length $v1] + set v2Len [string length $v2] + set v1Pos 0 + set v2Pos 0 + while {($v1Pos < $v1Len) || ($v2Pos < $v2Len)} { + set firstNumDiff 0 + # until reaching ASCII digits in both version strings compare character + # values which are modified as so they are sorted in the following + # order: + # - "~" + # - missing character or ASCII digits + # - ASCII alphabet + # - everything else in the order of their unicode value + while {(($v1Pos < $v1Len) && + ![string match {[0123456789]} [string index $v1 $v1Pos]]) || + (($v2Pos < $v2Len) && + ![string match {[0123456789]} [string index $v2 $v2Pos]])} { + foreach char [list [string index $v1 $v1Pos] \ + [string index $v2 $v2Pos]] charValueName \ + {v1CharValue v2CharValue} { + if {$char eq "~"} { + set $charValueName -1 + } elseif {$char eq ""} { + set $charValueName 0 + } elseif {[string match {[0123456789]} $char]} { + set $charValueName 0 + } elseif {[string match -nocase {[abcdefghijklmnopqrstuvwxyz]} \ + $char]} { + set $charValueName [scan $char "%c"] + } else { + set $charValueName [expr {[scan $char "%c"] + 0x7f + 1}] + } + } + if {$v1CharValue != $v2CharValue} { + return [expr {$v1CharValue - $v2CharValue}] + } + incr v1Pos + incr v2Pos + } + + # strip leading zeros + while {[string index $v1 $v1Pos] eq "0"} { + incr v1Pos + } + while {[string index $v2 $v2Pos] eq "0"} { + incr v2Pos + } + + # process digits until reaching a non-digit + while {[string match {[0123456789]} [string index $v1 $v1Pos]] && + [string match {[0123456789]} [string index $v2 $v2Pos]]} { + # record the first difference between the two numbers + if {$firstNumDiff == 0} { + set firstNumDiff [expr {[string index $v1 $v1Pos] - + [string index $v2 $v2Pos]}] + } + incr v1Pos + incr v2Pos + } + + # return if the number of one version has more digits than the other + # since the one with more digits is the larger number + if {[string match {[0123456789]} [string index $v1 $v1Pos]]} { + return 1 + } elseif {[string match {[0123456789]} [string index $v2 $v2Pos]]} { + return -1 + } + + # return the difference if the digits differed above + if {$firstNumDiff != 0} { + return $firstNumDiff + } + } + + return 0 +} + +proc ::relmon::common::isUrlValid {url} { + return [expr {![catch {dict create {*}[uri::split $url]} urlParts] && + ([dict get $urlParts "scheme"] in {"http" "https"}) && + ([dict get $urlParts "host"] ne "")}] +} + +proc ::relmon::common::urlGetHost {url} { + return [expr {(![catch {dict create {*}[uri::split $url]} urlParts]) ? + [dict get $urlParts "host"] : ""}] +} + +proc ::relmon::common::parseStateFile {stateFile} { + try { + set f [open $stateFile "r"] + } trap {POSIX} {errorMsg errorOptions} { + return -options $errorOptions \ + "failed to open state file \"$stateFile\": $errorMsg" + } + try { + set state [json::json2dict [chan read $f]] + } trap {POSIX} {errorMsg errorOptions} { + return -options $errorOptions \ + "failed to read from state file \"$stateFile\": $errorMsg" + } on error {errorMsg errorOptions} { + # the json package does not set an error code + dict set errorOptions "-errorcode" {RELMON JSON_PARSE_ERROR} + return -options $errorOptions \ + "failed to parse state file \"$stateFile\": $errorMsg" + } finally { + close $f + } + + return $state +} + + +namespace eval ::relmon::update { + # commandline option help text + variable usage "usage: relmon update \[-dev\] \[-c max_connections\] \[-C\ + ca_dir\] \[-D delay\]\n\ + \ \[-H max_host_connections\] \[-i\ + item\[,...\]\] \[-l logfile\]\n\ + \ \[-r retries\] \[-t min_time\] watchlist\ + statefile" + + # configuration options + variable Config [dict create \ + "log_file" "" \ + "log_level" "notice" \ + "history_limit" 20 \ + "connection_limit" 16 \ + "host_connection_limit" 4 \ + "transfer_time_limit" 60000 \ + "retry_limit" 3 \ + "host_delay" 0 \ + "timestamp_filter" 0 \ + "error_filter" 0 \ + "item_filter" {} \ + "ca_dir" "" \ + "state_file" "" \ + "watchlist_file" ""] + + # exit status + variable ExitStatus + + # transfer statistics + variable Statistics [dict create \ + "start_time" 0 \ + "end_time" 0 \ + "requests" 0 \ + "items" 0] + + # watchlist + variable Watchlist + + # ID of a delayed run of ManageTransfers + variable ManageTransfersId "" + + # queue of pending transfers + variable Queue + + # number of active connections per host + variable HostConnections + + # delays before opening a new connection to a host + variable HostDelays + + # active transfers + variable ActiveTransfers + + # buffer for tracking the state of unfinished items + variable StateBuffer + + # buffer needed by htmlparse::parse for constructing the preprocessed HTML + # document + variable PreprocessedHtmlBuffer + + # logger handle + variable Log + + # logfile handle + variable Lf +} + +proc ::relmon::update::OnError {message returnOptions} { + # internal error, abort + puts stderr [dict get $returnOptions "-errorinfo"] + + exit 1 +} + +proc ::relmon::update::CleanupBeforeExit {commandString operation} { + variable Lf + + # close logfile + if {($Lf ne "") && ($Lf ni {stdin stderr})} { + close $Lf + set Lf "" + } + + return +} + +proc ::relmon::update::ParseWatchlist {watchlistFilename} { + variable Watchlist + + set lineno 0 + set f [open $watchlistFilename "r"] + try { + while {[chan gets $f line] != -1} { + set fields [textutil::split::splitx [string trim $line] {[\t ]+}] + incr lineno + + if {([llength $fields] == 0) || + ([string index [lindex $fields 0] 0] eq "#")} { + # skip empty lines and comments + continue + } elseif {[llength $fields] < 3} { + # a line consists of a name, base URL and at least one + # version-matching pattern + return -code error -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ + "syntax error in \"$watchlistFilename\" line $lineno" + } + + set patterns [lassign $fields name baseUrl] + + # validate URL + if {![::relmon::common::isUrlValid $baseUrl]} { + return -code error -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ + "syntax error in \"$watchlistFilename\" line $lineno:\ + invalid base URL" + } + + # process patterns + set processedPatterns {} + set patternIndex 0 + foreach pattern $patterns { + incr patternIndex + + # make trailing slashes optional except in the last + # version-matching pattern + if {($patternIndex != [llength $patterns]) && + ([string index $pattern end] eq "/")} { + append pattern {?} + } + + # ensure patterns are anchored to the end of the line + if {[string index $pattern end] ne "$"} { + append pattern {$} + } + + # actually validate the regular expression + try { + set reInfo [regexp -about -- $pattern ""] + } on error {errorMsg} { + return -code error \ + -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ + "error in \"$watchlistFilename\" line $lineno:\ + $errorMsg" + } + lappend processedPatterns $pattern + } + if {[lindex $reInfo 0] < 1} { + return -code error -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ + "syntax error in \"$watchlistFilename\" line $lineno:\ + the last regular expression must contain at least one + capturing group" + } + + dict set Watchlist $name "base_url" $baseUrl + dict set Watchlist $name "patterns" $processedPatterns + } + } finally { + close $f + } + + return +} + +proc ::relmon::update::ProcessHtmlElement {tag slash param textBehindTheTag} { + variable PreprocessedHtmlBuffer + + # copy every "" element into PreprocessedHtmlBuffer + if {($slash eq "") && ([string tolower $tag] eq "a")} { + append PreprocessedHtmlBuffer "<$tag $param>" + } + + return +} + +proc ::relmon::update::PreprocessHtml {bodyDataName} { + upvar 1 $bodyDataName bodyData + variable PreprocessedHtmlBuffer + + # preprocess the document with htmlparse by constructing a new document + # consisting only of found "" elements which then can be fed into tdom + # again; this is useful if parsing via tdom fails; however, htmlparse + # should only be used as a last resort because it is just too limited, it + # gets easily confused within "