Mercurial > projects > relmon
view relmon.tcl @ 1:cba4887feb2c
Check the content type of documents that are being downloaded
Abort if the content type of the document being downloaded cannot be handled.
This is to primarily to prevent accidental downloads of potentially large files.
author | Guido Berhoerster <guido+relmon@berhoerster.name> |
---|---|
date | Sun, 19 Oct 2014 20:56:27 +0200 |
parents | 8c5330f6e9e4 |
children |
line wrap: on
line source
#!/usr/bin/tclsh # # Copyright (C) 2014 Guido Berhoerster <guido+relmon@berhoerster.name> # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. package require Tcl 8.5 package require http package require tls package require tdom package require try package require cmdline package require control package require html package require htmlparse package require json package require json::write package require logger package require logger::utils package require textutil::split package require uri package require uri::urn namespace eval ::relmon { # version variable VERSION 1 } namespace eval ::relmon::common { namespace export cmpVersions isUrlValid urlGetHost normalizeHttpHeaders \ parseStateFile } # implementation of the Debian version comparison algorithm described at # http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version proc ::relmon::common::cmpVersion {v1 v2} { set v1Len [string length $v1] set v2Len [string length $v2] set v1Pos 0 set v2Pos 0 while {($v1Pos < $v1Len) || ($v2Pos < $v2Len)} { set firstNumDiff 0 # until reaching ASCII digits in both version strings compare character # values which are modified as so they are sorted in the following # order: # - "~" # - missing character or ASCII digits # - ASCII alphabet # - everything else in the order of their unicode value while {(($v1Pos < $v1Len) && ![string match {[0123456789]} [string index $v1 $v1Pos]]) || (($v2Pos < $v2Len) && ![string match {[0123456789]} [string index $v2 $v2Pos]])} { foreach char [list [string index $v1 $v1Pos] \ [string index $v2 $v2Pos]] charValueName \ {v1CharValue v2CharValue} { if {$char eq "~"} { set $charValueName -1 } elseif {$char eq ""} { set $charValueName 0 } elseif {[string match {[0123456789]} $char]} { set $charValueName 0 } elseif {[string match -nocase {[abcdefghijklmnopqrstuvwxyz]} \ $char]} { set $charValueName [scan $char "%c"] } else { set $charValueName [expr {[scan $char "%c"] + 0x7f + 1}] } } if {$v1CharValue != $v2CharValue} { return [expr {$v1CharValue - $v2CharValue}] } incr v1Pos incr v2Pos } # strip leading zeros while {[string index $v1 $v1Pos] eq "0"} { incr v1Pos } while {[string index $v2 $v2Pos] eq "0"} { incr v2Pos } # process digits until reaching a non-digit while {[string match {[0123456789]} [string index $v1 $v1Pos]] && [string match {[0123456789]} [string index $v2 $v2Pos]]} { # record the first difference between the two numbers if {$firstNumDiff == 0} { set firstNumDiff [expr {[string index $v1 $v1Pos] - [string index $v2 $v2Pos]}] } incr v1Pos incr v2Pos } # return if the number of one version has more digits than the other # since the one with more digits is the larger number if {[string match {[0123456789]} [string index $v1 $v1Pos]]} { return 1 } elseif {[string match {[0123456789]} [string index $v2 $v2Pos]]} { return -1 } # return the difference if the digits differed above if {$firstNumDiff != 0} { return $firstNumDiff } } return 0 } proc ::relmon::common::isUrlValid {url} { return [expr {![catch {dict create {*}[uri::split $url]} urlParts] && ([dict get $urlParts "scheme"] in {"http" "https"}) && ([dict get $urlParts "host"] ne "")}] } proc ::relmon::common::urlGetHost {url} { return [expr {(![catch {dict create {*}[uri::split $url]} urlParts]) ? [dict get $urlParts "host"] : ""}] } proc ::relmon::common::normalizeHttpHeaders {headers} { set httpHeaders [dict create] foreach {header value} $headers { set words {} foreach word [split $header "-"] { lappend words [string totitle $word] } dict set httpHeaders [join $words "-"] $value } return $httpHeaders } proc ::relmon::common::parseStateFile {stateFile} { try { set f [open $stateFile "r"] } trap {POSIX} {errorMsg errorOptions} { return -options $errorOptions \ "failed to open state file \"$stateFile\": $errorMsg" } try { set state [json::json2dict [chan read $f]] } trap {POSIX} {errorMsg errorOptions} { return -options $errorOptions \ "failed to read from state file \"$stateFile\": $errorMsg" } on error {errorMsg errorOptions} { # the json package does not set an error code dict set errorOptions "-errorcode" {RELMON JSON_PARSE_ERROR} return -options $errorOptions \ "failed to parse state file \"$stateFile\": $errorMsg" } finally { close $f } return $state } namespace eval ::relmon::update { # commandline option help text variable usage "usage: relmon update \[-dev\] \[-c max_connections\] \[-C\ ca_dir\] \[-D delay\]\n\ \ \[-H max_host_connections\] \[-i\ item\[,...\]\] \[-l logfile\]\n\ \ \[-r retries\] \[-t min_time\] watchlist\ statefile" # configuration options variable Config [dict create \ "log_file" "" \ "log_level" "notice" \ "history_limit" 20 \ "connection_limit" 16 \ "host_connection_limit" 4 \ "transfer_time_limit" 60000 \ "retry_limit" 3 \ "host_delay" 0 \ "timestamp_filter" 0 \ "error_filter" 0 \ "item_filter" {} \ "ca_dir" "" \ "state_file" "" \ "watchlist_file" ""] # exit status variable ExitStatus # transfer statistics variable Statistics [dict create \ "start_time" 0 \ "end_time" 0 \ "requests" 0 \ "items" 0] # watchlist variable Watchlist # ID of a delayed run of ManageTransfers variable ManageTransfersId "" # queue of pending transfers variable Queue # number of active connections per host variable HostConnections # delays before opening a new connection to a host variable HostDelays # active transfers variable ActiveTransfers # buffer for tracking the state of unfinished items variable StateBuffer # buffer needed by htmlparse::parse for constructing the preprocessed HTML # document variable PreprocessedHtmlBuffer # logger handle variable Log # logfile handle variable Lf } proc ::relmon::update::OnError {message returnOptions} { # internal error, abort puts stderr [dict get $returnOptions "-errorinfo"] exit 1 } proc ::relmon::update::CleanupBeforeExit {commandString operation} { variable Lf # close logfile if {($Lf ne "") && ($Lf ni {stdin stderr})} { close $Lf set Lf "" } return } proc ::relmon::update::ParseWatchlist {watchlistFilename} { variable Watchlist set lineno 0 set f [open $watchlistFilename "r"] try { while {[chan gets $f line] != -1} { set fields [textutil::split::splitx [string trim $line] {[\t ]+}] incr lineno if {([llength $fields] == 0) || ([string index [lindex $fields 0] 0] eq "#")} { # skip empty lines and comments continue } elseif {[llength $fields] < 3} { # a line consists of a name, base URL and at least one # version-matching pattern return -code error -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ "syntax error in \"$watchlistFilename\" line $lineno" } set patterns [lassign $fields name baseUrl] # validate URL if {![::relmon::common::isUrlValid $baseUrl]} { return -code error -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ "syntax error in \"$watchlistFilename\" line $lineno:\ invalid base URL" } # process patterns set processedPatterns {} set patternIndex 0 foreach pattern $patterns { incr patternIndex # make trailing slashes optional except in the last # version-matching pattern if {($patternIndex != [llength $patterns]) && ([string index $pattern end] eq "/")} { append pattern {?} } # ensure patterns are anchored to the end of the line if {[string index $pattern end] ne "$"} { append pattern {$} } # actually validate the regular expression try { set reInfo [regexp -about -- $pattern ""] } on error {errorMsg} { return -code error \ -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ "error in \"$watchlistFilename\" line $lineno:\ $errorMsg" } lappend processedPatterns $pattern } if {[lindex $reInfo 0] < 1} { return -code error -errorcode {RELMON WATCHLIST_PARSE_ERRROR} \ "syntax error in \"$watchlistFilename\" line $lineno:\ the last regular expression must contain at least one capturing group" } dict set Watchlist $name "base_url" $baseUrl dict set Watchlist $name "patterns" $processedPatterns } } finally { close $f } return } proc ::relmon::update::ProcessHtmlElement {tag slash param textBehindTheTag} { variable PreprocessedHtmlBuffer # copy every "<a>" element into PreprocessedHtmlBuffer if {($slash eq "") && ([string tolower $tag] eq "a")} { append PreprocessedHtmlBuffer "<$tag $param></$tag>" } return } proc ::relmon::update::PreprocessHtml {bodyDataName} { upvar 1 $bodyDataName bodyData variable PreprocessedHtmlBuffer # preprocess the document with htmlparse by constructing a new document # consisting only of found "<a>" elements which then can be fed into tdom # again; this is useful if parsing via tdom fails; however, htmlparse # should only be used as a last resort because it is just too limited, it # gets easily confused within "<script>" elements and lacks attribute # parsing set PreprocessedHtmlBuffer "<html><body>" htmlparse::parse -cmd [namespace code ProcessHtmlElement] $bodyData append PreprocessedHtmlBuffer "</body></html>" } proc ::relmon::update::ExtractUrls {bodyDataName contentType baseUrl rePattern} { upvar 1 $bodyDataName bodyData set extractedUrls {} set resultUrls [dict create] # extract all URLs or URL fragments switch -- $contentType { {text/html} - {application/xhtml+xml} { # HTML/XHTML # if tdom parsing has failed or not found any "<a>" element, # preprocess the document with htmlparse and try again if {[catch {dom parse -html $bodyData} doc] || ([set rootElement [$doc documentElement]] eq "") || ([llength [set aElements \ [$rootElement selectNodes {descendant::a}]]] == 0)} { try { set doc [dom parse -html [PreprocessHtml bodyData]] } on error {errorMsg errorOptions} { dict set errorOptions "-errorcode" \ {RELMON TDOM_PARSE_ERROR} return -options $errorOptions $errorMsg } set rootElement [$doc documentElement] set aElements [$rootElement selectNodes {descendant::a}] } foreach node $aElements { set href [$node getAttribute "href" ""] if {$href ne ""} { lappend extractedUrls $href } } $doc delete } {application/rss+xml} { # RSS 2.0 try { set doc [dom parse $bodyData] } on error {errorMsg errorOptions} { dict set errorOptions "-errorcode" {RELMON TDOM_PARSE_ERROR} return -options $errorOptions $errorMsg } set rootElement [$doc documentElement] if {$rootElement ne ""} { foreach node [$rootElement selectNodes {descendant::link}] { set linkText [$node text] if {$linkText ne ""} { lappend extractedUrls $linkText } } } $doc delete } {application/atom+xml} { # Atom 1.0 try { set doc [dom parse $bodyData] } on error {errorMsg errorOptions} { dict set errorOptions "-errorcode" {RELMON TDOM_PARSE_ERROR} return -options $errorOptions $errorMsg } set rootElement [$doc documentElement] if {$rootElement ne ""} { foreach node [$rootElement selectNodes {descendant::link}] { set href [$node getAttribute "href" ""] if {$href ne ""} { lappend extractedUrls $href } } } $doc delete } {text/plain} { # plain text foreach line [split $bodyData "\n"] { if {$line ne ""} { lappend extractedUrls $line } } } default { return -code error \ -errorcode {RELMON UNSUPPORTED_CONTENT_TYPE_ERROR} \ "unsupported content type \"$contentType\"" } } foreach url $extractedUrls { set normalizedUrl [uri::canonicalize [uri::resolve $baseUrl $url]] dict set resultUrls $normalizedUrl \ [expr {[regexp -line -- $rePattern $normalizedUrl] ? 1 : 0}] } return $resultUrls } proc ::relmon::update::StateItemAppendError {name logMsg} { variable StateBuffer dict update StateBuffer $name stateItem { dict lappend stateItem "errors" $logMsg } return } proc ::relmon::update::HandleSuccessfulTransfer {item httpBodyName} { upvar 1 $httpBodyName httpBody variable Log variable StateBuffer variable Queue variable Watchlist set name [dict get $item "name"] set url [dict get $item "url"] set patternIndex [dict get $item "pattern_index"] set pattern [lindex [dict get $Watchlist $name "patterns"] $patternIndex] ${Log}::info "\"$name\": \"$url\": transfer finished" # parse data try { set urls [ExtractUrls httpBody [dict get $item "content_type"] $url \ $pattern] } trap {RELMON} {errorMsg} { # continue on tdom parsing errors or when receiving documents with an # unsupported content type set urls [dict create] set warningMsg "\"$name\": \"$url\": failed to parse data: $errorMsg" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg } if {$patternIndex < ([llength \ [dict get $Watchlist $name "patterns"]] - 1)} { # if this is not the last, version-matching pattern, queue matched URLs dict for {newUrl matched} $urls { if {$matched} { if {![::relmon::common::isUrlValid $newUrl]} { ${Log}::debug "\"$name\": \"$url\": ignoring matched but\ invalid URL \"$newUrl\"" continue } ${Log}::debug "\"$name\": \"$url\": queuing \"$newUrl\"" dict lappend Queue [::relmon::common::urlGetHost $newUrl] \ [dict create "name" $name "url" $newUrl \ "pattern_index" [expr {$patternIndex + 1}] \ "content_type" "" "num_redirects" 0 "num_retries" 0] } else { ${Log}::debug "\"$name\": \"$url\": ignoring \"$newUrl\"" } } } else { # otherwise this branch has finished, try to extract the versions and # store them in the buffer dict for {finalUrl matched} $urls { if {$matched} { regexp -line -- $pattern $finalUrl -> version if {$version ne ""} { ${Log}::debug "\"$name\": \"$url\": extracted version\ \"$version\" from \"$finalUrl\" found on\ \"$url\"" dict set StateBuffer $name "versions" $version $finalUrl } else { ${og}::debug "\"$name\": \"$url\": could not extract a\ version from \"$finalUrl\"" } } else { ${Log}::debug "\"$name\": \"$url\": ignoring \"$finalUrl\"" } } } return } proc ::relmon::update::HandleRedirect {item httpCode httpHeaders} { variable Log variable Queue set name [dict get $item "name"] set url [dict get $item "url"] if {![dict exists $httpHeaders "Location"]} { # bail out in case of an invalid HTTP response set warningMsg "\"$name\": \"$url\": transfer failed: invalid HTTP\ response" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg return } set location [dict get $httpHeaders "Location"] # sanitize URL from Location header if {[uri::isrelative $location]} { set redirectUrl [uri::canonicalize [uri::resolve \ $url $location]] } else { if {![::relmon::common::isUrlValid $location]} { # bail out in case of an invalid redirect URL set warningMsg "\"$name\": \"$url\": received invalid redirect URL\ \"$location\"" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg return } set redirectUrl [uri::canonicalize $location] } ${Log}::notice "\"$name\": \"$url\": received redirect to \"$redirectUrl\"" # handle up to 10 redirects by re-queuing the target URL if {[dict get $item "num_redirects"] < 10} { ${Log}::debug "\"$name\": \"$url\": queuing \"$redirectUrl\" due to\ redirect" dict lappend Queue [::relmon::common::urlGetHost $redirectUrl] \ [dict replace $item "url" $redirectUrl "content_type" "" \ "num_redirects" [expr {[dict get $item "num_redirects"] + 1}] \ "num_retries" 0] } else { set warningMsg "\"$name\": \"$url\": exceeded maximum number of\ redirects" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg } return } proc ::relmon::update::HandleProtocolError {item httpCode} { variable Log set name [dict get $item "name"] set url [dict get $item "url"] set warningMsg "\"$name\": \"$url\": transfer failed: $httpCode" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg return } proc ::relmon::update::HandleTimeoutReset {item} { variable Log variable Config variable Queue set name [dict get $item "name"] set url [dict get $item "url"] # retry by re-queuing the target URL until reaching the limit if {[dict get $item "num_retries"] < [dict get $Config "retry_limit"]} { ${Log}::warn "\"$name\": \"$url\": connection timed out or was reset,\ retrying" dict lappend Queue [::relmon::common::urlGetHost $url] \ [dict replace $item \ "num_retries" [expr {[dict get $item "num_retries"] + 1}]] } else { set warningMsg "\"$name\": \"$url\": exceeded maximum number of\ retries" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg } return } proc ::relmon::update::HandleConnectionError {item errorMsg} { variable Log set name [dict get $item "name"] set url [dict get $item "url"] set warningMsg "\"$name\": \"$url\": transfer failed: $errorMsg" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg return } proc ::relmon::update::TransferCallbackWrapper {callbackCmd args} { # ensure that exceptions get raised, by default http catches all errors and # silently ignores them, see https://core.tcl.tk/tcl/tktview?name=1414262 if {[catch {eval $callbackCmd $args} -> errorOptions]} { OnError [dict get $errorOptions "-errorinfo"] $errorOptions } return } proc ::relmon::update::ManageTransfers {} { variable Config variable ManageTransfersId variable Queue variable HostConnections variable HostDelays variable ActiveTransfers variable ExitStatus variable Log after cancel $ManageTransfersId # try to initiate new transfers while {([dict size $ActiveTransfers] < [dict get $Config "connection_limit"]) && ([dict size $Queue] > 0)} { # find URLs in the queue with a host for which we have not reached the # per-host connection limit yet and for which no delay is in effect set item {} dict for {host items} $Queue { set now [clock milliseconds] if {![dict exists $HostConnections $host]} { dict set HostConnections $host 0 } if {![dict exists $HostDelays $host]} { dict set HostDelays $host $now } if {([dict get $HostConnections $host] < [dict get $Config "host_connection_limit"]) && ([dict get $HostDelays $host] <= $now)} { # pop item from the queue set items [lassign $items item] if {[llength $items] > 0} { dict set Queue $host $items } else { dict unset Queue $host } dict incr HostConnections $host # set a random delay before the next connection to this host # can be made dict set HostDelays $host \ [expr {[clock milliseconds] + int((rand() + 0.5) * [dict get $Config "host_delay"])}] break } } # if no item could be found, the per-host connection limit for all # queued URLs has been reached and no new transfers may be started # at this point if {$item eq {}} { break } # otherwise start a new transfer set url [dict get $item "url"] set name [dict get $item "name"] try { set token [http::geturl $url \ -timeout [dict get $Config "transfer_time_limit"] \ -progress [namespace code {TransferCallbackWrapper \ OnTransferProgress}] \ -command [namespace code {TransferCallbackWrapper \ OnTransferFinished}]] } on ok {} { dict set ActiveTransfers $token $item ${Log}::info "\"$name\": \"$url\": starting transfer" } on error {errorMsg} { # an error occured during socket setup, e.g. a DNS lookup failure set warningMsg "\"$name\": \"$url\": transfer failed: $errorMsg" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg } } # terminate the event loop if there are no remaining transfers if {([dict size $ActiveTransfers]) == 0 && ([dict size $Queue] == 0)} { set ExitStatus 0 return } # due to per-host connection limits and per-host delays the maximum number # of connections may not be reached although there are still items in the # queue, in this case schedule ManageTransfers again after the smallest of # the current per-host delays set delay 0 if {([dict size $ActiveTransfers] < [dict get $Config "connection_limit"]) && ([dict size $Queue] > 0)} { dict for {host items} $Queue { if {(![dict exists $HostConnections $host] || ([dict get $HostConnections $host] < [dict get $Config "host_connection_limit"])) && ([dict exists $HostDelays $host] && ([dict get $HostDelays $host] > $now))} { set hostDelay [expr {[dict get $HostDelays $host] - $now + 1}] if {(($delay == 0) || ($hostDelay < $delay))} { set delay $hostDelay } } } if {$delay > 0} { set ManageTransfersId \ [after $delay [namespace code ManageTransfers]] } } return } proc ::relmon::update::OnTransferProgress {token total current} { upvar #0 $token httpState variable ActiveTransfers variable Log # try to determine content type and abort transfer if content type is not # one that can be parsed, this is primarily to prevent accidental downloads if {[dict get $ActiveTransfers $token "content_type"] eq ""} { set httpHeaders [relmon::common::normalizeHttpHeaders \ $httpState(meta)] if {[dict exists $httpHeaders "Content-Type"]} { set contentType [string trim [lindex [split \ [dict get $httpHeaders "Content-Type"] ";"] 0]] dict set ActiveTransfers $token "content_type" $contentType if {$contentType ni {"text/html" "application/xhtml+xml" "application/atom+xml" "application/rss+xml" "text/plain"}} { ${Log}::warn "\"[dict get $ActiveTransfers $token "name"]\":\ \"[dict get $ActiveTransfers $token "url"]\": content\ type \"$contentType\" is not acceptable" http::reset $token } } } } proc ::relmon::update::OnTransferFinished {token} { upvar #0 $token httpState variable Config variable HostConnections variable Queue variable ActiveTransfers variable Statistics variable StateBuffer variable State variable Log set item [dict get $ActiveTransfers $token] set name [dict get $item "name"] set host [relmon::common::urlGetHost [dict get $item "url"]] # update list of per-host connections, and number of remaining transfers # for this item dict unset ActiveTransfers $token dict incr HostConnections $host -1 switch -- $httpState(status) { {ok} { # normalize headers set httpHeaders [relmon::common::normalizeHttpHeaders \ $httpState(meta)] # try to determine content type if {([dict get $item "content_type"] eq "") && [dict exists $httpHeaders "Content-Type"]} { dict set item "content_type" [string trim [lindex [split \ [dict get $httpHeaders "Content-Type"] ";"] 0]] } # dispatch based on HTTP status code set httpCode [http::ncode $token] switch -glob -- $httpCode { {30[12378]} { HandleRedirect $item $httpCode $httpHeaders } {200} { HandleSuccessfulTransfer $item httpState(body) } default { HandleProtocolError $item $httpState(http) } } } {reset} { # aborted due to wrong content type } {eof} - {timeout} { # timeout or connection reset HandleTimeoutReset $item } {error} { # connection may have failed or been refused HandleConnectionError $item [lindex $httpState(error) 0] } } # check if all transfers of this item are finished set itemFinished 1 dict for {queueHost queueItems} $Queue { foreach queueItem $queueItems { if {[dict get $queueItem "name"] eq $name} { set itemFinished 0 } } } dict for {activeToken activeItem} $ActiveTransfers { if {[dict get $activeItem "name"] eq $name} { set itemFinished 0 } } if {$itemFinished} { set timestamp [clock milliseconds] # create httpState item if it does not exist yet if {![dict exists $State $name]} { dict set State $name [dict create "versions" [dict create] \ "history" [list] "timestamp" 0 "errors" [list]] } # if there are no versions, log an error message since something must # be wrong if {[llength [dict get $StateBuffer $name "versions"]] == 0} { set warningMsg "\"$name\": no versions found" ${Log}::warn $warningMsg StateItemAppendError $name $warningMsg } # update httpState item dict set State $name "errors" [dict get $StateBuffer $name "errors"] dict set State $name "timestamp" $timestamp if {[llength [dict get $StateBuffer $name "errors"]] == 0} { # expire old history entries set history [lrange [dict get $State $name "history"] \ [expr {[llength [dict get $State $name "history"]] - [dict get $Config "history_limit"] + 1}] end] # add currently latest available version to history if it is either # newer than the previous one or if the previous one is no longer # available (e.g. if it has been removed or the watchlist pattern # has been changed) set prevLatestVersion [lindex $history end 0] set curLatestVersion [lindex \ [lsort -command ::relmon::common::cmpVersion \ [dict keys [dict get $StateBuffer $name "versions"]]] end] if {([::relmon::common::cmpVersion $curLatestVersion \ $prevLatestVersion] > 0) || ![dict exists $StateBuffer $name "versions" \ $prevLatestVersion]} { lappend history [list $curLatestVersion $timestamp] dict set State $name "history" $history } dict set State $name "versions" [dict get $StateBuffer $name \ "versions"] } dict unset StateBuffer $name ${Log}::notice "progress: [dict size $StateBuffer]/[dict get \ $Statistics "items"] items left" } http::cleanup $token ManageTransfers return } # control certificate verification and log errors during TLS handshake proc ::relmon::update::OnTlsHandshake {type args} { variable Config variable Log switch -- ${type} { {error} { lassign $args {} tlsErrorMsg ${Log}::error "TLS connection error: $tlsErrorMsg" } {verify} { lassign $args {} {} {} status tlsErrorMsg array set cert [lindex $args 2] if {$status == 0} { if {[dict get $Config "ca_dir"] eq ""} { # do not verify certificates is ca-dir was not set return 1 } else { set errorMsg "$tlsErrorMsg\nCertificate details:" foreach {key description} {"serial" "Serial Number" "issuer" "Issuer" "notBefore" "Not Valid Before" "notAfter" "Not Valid After" "subject" "Subject" "sha1_hash" "SHA1 Hash"} { append errorMsg "\n$description: $cert($key)" } ${Log}::error "TLS connection error: $errorMsg" return 0 } } } } } proc ::relmon::update::main {args} { variable Config variable usage variable Statistics variable Watchlist [dict create] variable Queue [dict create] variable HostConnections [dict create] variable HostDelays [dict create] variable ActiveTransfers [dict create] variable State variable StateBuffer [dict create] variable PreprocessedHtmlBuffer variable Log variable Lf "" variable ExitStatus # parse commandline while {[set GetoptRet [cmdline::getopt args \ {c.arg C.arg d D.arg e H.arg i.arg l.arg r.arg t.arg T.arg v} \ OptArg OptVal]] == 1} { switch -glob -- $OptArg { {c} { if {![string is digit -strict $OptVal] || ($OptVal <= 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "host_connection_limit" $OptVal } {C} { if {![file isdirectory $OptVal]} { puts stderr "directory \"$OptVal\" is not a directory" exit 1 } elseif {![file readable $OptVal] || ![file executable $OptVal]} { puts stderr "directory \"$OptVal\" is not readable" exit 1 } dict set Config "ca_dir" $OptVal } {d} { dict set Config "log_level" "debug" } {D} { if {![string is digit -strict $OptVal] || ($OptVal < 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "host_delay" [expr {$OptVal * 1000}] } {e} { dict set Config "error_filter" 1 } {H} { if {![string is digit -strict $OptVal] || ($OptVal <= 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "connection_limit" $OptVal } {i} { foreach item [split $OptVal " "] { set item [string trim $item] if {$item ne ""} { dict lappend Config "item_filter" $item } } } {l} { dict set Config "log_file" $OptVal set LogDir [file dirname $OptVal] if {![file writable $LogDir] || ![file executable $LogDir]} { puts stderr "directory \"$LogDir\" is not writable" exit 1 } } {r} { if {![string is digit -strict $OptVal] || ($OptVal < 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "retry_limit" $OptVal } {t} { if {![string is digit -strict $OptVal] || ($OptVal < 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "timestamp_filter" [expr {$OptVal * 1000}] } {T} { if {![string is digit -strict $OptVal] || ($OptVal <= 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "transfer_time_limit" [expr {$OptVal * 1000}] } {v} { if {[dict get $Config "log_level"] ne "debug"} { dict set Config "log_level" "info" } } } } set argc [llength $args] if {$GetoptRet == -1} { puts stderr "unknown command line option \"-$OptArg\"" puts stderr $usage exit 1 } if {$argc != 2} { puts stderr $usage exit 1 } dict set Config "watchlist_file" [lindex $args 0] if {![file readable [dict get $Config "watchlist_file"]]} { puts stderr "watchlist file \"[dict get $Config "watchlist_file"]\"\ could not be read" exit 1 } set stateFile [lindex $args 1] dict set Config "state_file" $stateFile set StateDir [file dirname $stateFile] if {![file writable $StateDir]} { puts stderr "directory \"$StateDir\" is not writable" exit 1 } # install exit handler for closing the logfile, open the logfile and # initialize logger trace add execution exit enter CleanupBeforeExit if {[dict get $Config "log_file"] ne ""} { try { set Lf [open [dict get $Config "log_file"] "w"] } trap {POSIX} {errorMsg errorOptions} { puts stderr "failed to open logfile\ \"[dict get $Config "log_file"]\": $errorMsg" exit 1 } } else { set Lf stderr } set Log [logger::init global] if {[dict get $Config "log_level"] eq "debug"} { set logFormat {%d \[%p\] \[%M\] %m} } else { set logFormat {%d \[%p\] %m} } logger::utils::applyAppender -appender fileAppend -appenderArgs \ [list -outputChannel $Lf -conversionPattern $logFormat] \ -serviceCmd $Log # set default logging level ${Log}::setlevel [dict get $Config "log_level"] ${Log}::notice "relmon.tcl starting up" # parse the watchlist try { ParseWatchlist [dict get $Config "watchlist_file"] } trap {POSIX} {errorMsg errorOptions} - \ trap {RELMON} {errorMsg errorOptions} { ${Log}::error $errorMsg exit 1 } # read the state file try { set State [::relmon::common::parseStateFile $stateFile] } trap {POSIX ENOENT} {errorMsg} { ${Log}::debug "state file \"$stateFile\" does not exist" set State [dict create] } trap {POSIX} {errorMsg} - \ trap {RELMON} {errorMsg} { ${Log}::error $errorMsg exit 1 } # initialize queue and state buffer from the watchlist dict set Statistics "start_time" [clock milliseconds] dict for {name watchlistItem} $Watchlist { # apply filters specified on the command line to watchlist items if {([llength [dict get $Config "item_filter"]] > 0) && ($name ni [dict get $Config "item_filter"])} { continue } if {[dict get $Config "error_filter"] && [dict exists $State $name "errors"] && ([llength [dict get $State $name "errors"]] == 0)} { continue } if {[dict exists $State $name "timestamp"] && ([dict get $State $name "timestamp"] > [dict get $Statistics "start_time"] - [dict get $Config "timestamp_filter"])} { continue } dict lappend Queue [::relmon::common::urlGetHost \ [dict get $watchlistItem "base_url"]] \ [dict create \ "name" $name \ "url" [dict get $watchlistItem "base_url"] \ "pattern_index" 0 \ "content_type" "" \ "num_redirects" 0 \ "num_retries" 0] dict incr Statistics "items" dict set StateBuffer $name [dict create "versions" [dict create] \ "errors" [list]] } # configure http and tls http::register https 443 [list tls::socket \ -cadir [dict get $Config "ca_dir"] -request 1 -require 1 \ -command [namespace code OnTlsHandshake] -ssl2 0 -ssl3 1 -tls1 1] http::config -useragent "Mozilla/5.0 (Windows NT 6.1; rv:16.0)\ Gecko/20100101 Firefox/16.0 relmon/$relmon::VERSION" # handle errors while in the event loop interp bgerror {} [namespace code OnError] # enter the main loop after idle [namespace code ManageTransfers] vwait [namespace which -variable ExitStatus] dict set Statistics "end_time" [clock milliseconds] # display statistics ${Log}::notice "items checked: [dict get $Statistics "items"]" ${Log}::notice "time elapsed: [expr {([dict get $Statistics "end_time"] - [dict get $Statistics "start_time"]) / 1000}]s" # serialize the new state set JsonStateItems {} dict for {item data} $State { set versions {} dict for {version url} [dict get $data "versions"] { lappend versions $version [json::write string $url] } set history {} foreach historyItem [dict get $data "history"] { lassign $historyItem version timestamp lappend history [json::write array [json::write string $version] \ $timestamp] } set errors {} foreach errorItem [dict get $data "errors"] { lappend errors [json::write string $errorItem] } lappend JsonStateItems $item [json::write object \ "versions" [json::write object {*}$versions] \ "history" [json::write array {*}$history] \ "timestamp" [dict get $data "timestamp"] \ "errors" [json::write array {*}$errors]] } set JsonState [json::write object {*}$JsonStateItems] # try to preserve permissions and ownership try { set stateFileAttributes [file attributes $stateFile] } trap {POSIX ENOENT} {} { set stateFileAttributes {} } trap {POSIX} {errorMsg errorOptions} { ${Log}::error "failed to stat \"$stateFile\": $errorMsg" } # write the new state to a temporary file set tmpFile "$stateFile.[pid].tmp" try { set f [open $tmpFile {RDWR CREAT EXCL TRUNC} 0600] } trap {POSIX} {errorMsg errorOptions} { ${Log}::error "failed to open \"$tmpFile\": $errorMsg" exit 1 } try { chan puts -nonewline $f $JsonState } trap {POSIX} {errorMsg errorOptions} { catch {file delete $tmpFile} ${Log}::error "failed to write to \"$tmpFile\": $errorMsg" exit 1 } finally { close $f } # make a backup of the previous state file try { file copy -force $stateFile "$stateFile~" } trap {POSIX ENOENT} {} { # ignore non-existing file } trap {POSIX} {errorMsg errorOptions} { ${Log}::error "failed to create a backup of \"$statFile\":\ $errorMsg" } # rename the temporary file to the state file name try { file rename -force $tmpFile $stateFile } trap {POSIX} {errorMsg errorOptions} { catch {file delete $tmpFile} ${Log}::error "failed to rename \"$tmpFile\" to \"$stateFile\":\ $errorMsg" exit 1 } # restore ownership and permissions try { file attributes $stateFile {*}$stateFileAttributes } trap {POSIX} {errorMsg errorOptions} { ${Log}::error "failed to set permissions and ownership on\ \"$stateFile\": $errorMsg" exit 1 } # clean up ${Log}::delete exit $ExitStatus } namespace eval ::relmon::show { # commandline option help text variable usage "usage: relmon show statefile name..." } proc ::relmon::show::GetItem {stateName name} { upvar 1 $stateName state set item [dict get $state $name] # format state data as plain-text set output "" append output "Name: $name\n" append output "Latest Version:\ [lindex [lindex [dict get $item "history"] end] 0]\n" append output "Refreshed: [clock format \ [expr {[dict get $item "timestamp"] / 1000}] \ -format {%Y-%m-%dT%H:%M:%S%z}]\n" append output "Versions:\n" dict for {version url} [dict get $item "versions"] { append output "\t$version $url\n" } append output "Errors:\n" if {[dict get $item "errors"] eq ""} { append output "\tNone\n" } else { foreach errorMsg [dict get $item "errors"] { append output "\t[string map {\n \n\t} [string trim $errorMsg]]\n" } } append output "History:\n" foreach historyItem [dict get $item "history"] { append output "\t[lindex $historyItem 0] [clock format \ [expr {[lindex $historyItem 1] / 1000}] \ -format {%Y-%m-%dT%H:%M:%S%z}]\n" } return $output } proc ::relmon::show::main {args} { variable usage # parse commandline if {[cmdline::getopt args {} OptArg OptVal] == -1} { puts stderr "unknown command line option \"-$OptArg\"" puts stderr $usage exit 1 } if {[llength $args] < 2} { puts stderr $usage exit 1 } set stateFile [lindex $args 0] set names [lrange $args 1 end] try { set state [::relmon::common::parseStateFile $stateFile] } trap {POSIX} {errorMsg} - trap {RELMON} {errorMsg} { puts stderr $errorMsg exit 1 } # show each item foreach name $names { puts -nonewline [GetItem state $name] } exit 0 } namespace eval ::relmon::list { # commandline option help text variable usage "usage: relmon list \[-H\] \[-f html|parseable|text\]\ \[-F url\]\n\ \ \[-n number_items\] statefile\n\ \ relmon list -f atom -F url \[-n number_items\] statefile" # configuration options variable Config [dict create \ "format" "text" \ "show_history" 0 \ "history_limit" 100 \ "feed_url" ""] } proc ::relmon::list::FormatText {stateName includeHistory historyLimit} { upvar 1 $stateName state set output "" append output [format "%-35s %-15s %-24s %-3s\n" "Project" "Version" \ "Refreshed" "St."] append output [string repeat "-" 80] append output "\n" set history {} dict for {name item} $state { foreach historyItem [dict get $item "history"] { lappend history [list [lindex $historyItem 1] $name \ [lindex $historyItem 0]] } set latestVersion [lindex [lindex [dict get $item "history"] end] 0] set timestamp [clock format [expr {[dict get $item "timestamp"] / 1000}] -format {%Y-%m-%dT%H:%M:%S%z}] set status [expr {[llength [dict get $item "errors"]] > 0 ? "E" : ""}] append output [format "%-35s %15s %-24s %-1s\n" $name $latestVersion \ $timestamp $status] } if {$includeHistory} { append output "\nHistory\n" append output [string repeat "-" 80] append output "\n" set history [lsort -decreasing -integer -index 0 $history] foreach historyItem [lrange $history 0 $historyLimit] { append output [format "%-24s %-35s %15s\n" \ [clock format [expr {[lindex $historyItem 0] / 1000}] \ -format {%Y-%m-%dT%H:%M:%S%z}] [lindex $historyItem 1] \ [lindex $historyItem 2]] } } return $output } proc ::relmon::list::FormatParseable {stateName includeHistory historyLimit} { upvar 1 $stateName state set output "" set history {} dict for {name item} $state { foreach historyItem [dict get $item "history"] { lappend history [list [lindex $historyItem 1] $name \ [lindex $historyItem 0]] } set latestVersion [lindex [lindex [dict get $item "history"] end] 0] if {$latestVersion eq ""} { set latestVersion - } set timestamp [clock format [expr {[dict get $item "timestamp"] / 1000}] -timezone :UTC -format {%Y-%m-%dT%H:%M:%SZ}] set status [expr {[llength [dict get $item "errors"]] > 0 ? "ERROR" : "OK"}] append output [format "%s\t%s\t%s\t%s\n" $name $latestVersion \ $timestamp $status] } if {$includeHistory} { append output "\n" set history [lsort -decreasing -integer -index 0 $history] foreach historyItem [lrange $history 0 $historyLimit] { append output [format "%s\t%s\t%s\n" [clock format \ [expr {[lindex $historyItem 0] / 1000}] -timezone :UTC \ -format {%Y-%m-%dT%H:%M:%SZ}] [lindex $historyItem 1] \ [lindex $historyItem 2]] } } return $output } proc ::relmon::list::FormatHtml {stateName includeHistory historyLimit feedUrl} { upvar 1 $stateName state set output "<html>\n" append output "<head>\n" append output "<title>Current Releases</title>\n" if {$feedUrl ne ""} { append output "<link type=\"application/atom+xml\" rel=\"alternate\"\ title=\"Release History\"\ href=\"[html::html_entities $feedUrl]\"/>\n" } append output "</head>\n" append output "<body>\n" append output "<h1>Current Releases</h1>\n<table>\n<tr>\n<th>Project</th>\ \n<th>Version</th>\n<th>Refreshed</th>\n<th>Status</th>\n</tr>\n" set history {} dict for {name item} $state { foreach historyItem [dict get $item "history"] { lappend history [list [lindex $historyItem 1] $name \ [lindex $historyItem 0]] } set latestVersion [lindex [lindex [dict get $item "history"] end] 0] set timestamp [clock format [expr {[dict get $item "timestamp"] / 1000}] -format {%Y-%m-%dT%H:%M:%S%z}] set status [expr {[llength [dict get $item "errors"]] > 0 ? "Error" : "OK"}] append output "<tr>\n<td>[html::html_entities $name]</td>\n" if {$latestVersion ne ""} { if {[dict exists $item "versions" $latestVersion]} { set url [dict get $item "versions" $latestVersion] append output "<td><a\ href=\"[html::html_entities $url]\"\ title=\"[html::html_entities\ "$name $latestVersion"]\">[html::html_entities \ $latestVersion]</a></td>\n" } else { append output "<td>[html::html_entities \ $latestVersion]</td>\n" } } else { append output "<td></td>\n" } append output "<td>$timestamp</td>\n" append output "<td>[html::html_entities $status]</td>\n</tr>\n" } append output "</table>\n" if {$includeHistory} { set history [lsort -decreasing -integer -index 0 $history] append output "<h1>Release History</h1>\n<table>\n" append output "<tr><th>Time</th><th>Project</th><th>Version</th></tr>\n" foreach historyItem [lrange $history 0 $historyLimit] { set timestamp [clock format [expr {[lindex $historyItem 0] / 1000}] -format {%Y-%m-%dT%H:%M:%S%z}] set name [lindex $historyItem 1] set version [lindex $historyItem 2] append output "<tr>\n<td>$timestamp</td>\n" append output "<td>[html::html_entities $name]</td>\n" append output "<td>[html::html_entities $version]</td></tr>\n" } append output "</table>\n" } append output "</body>\n</html>\n" return $output } proc ::relmon::list::FormatAtom {stateName historyLimit feedUrl} { upvar 1 $stateName state set host [::relmon::common::urlGetHost $feedUrl] set output "<?xml version=\"1.0\" encoding=\"utf-8\"?>\ \n<feed xmlns=\"http://www.w3.org/2005/Atom\">\n" append output "<author><name>relmon</name></author>\n" append output "<title>Release History</title>\n" append output "<id>[html::html_entities $feedUrl]</id>\n" set history {} dict for {name item} $state { foreach historyItem [dict get $item "history"] { lappend history [list [lindex $historyItem 1] $name \ [lindex $historyItem 0]] } } set history [lsort -decreasing -integer -index 0 $history] set updated [lindex [lindex $history end] 0] if {$updated eq ""} { set updated [clock seconds] } append output "<updated>[clock format $updated \ -format {%Y-%m-%dT%H:%M:%S%z}]</updated>\n" foreach historyItem [lrange $history 0 $historyLimit] { set name [lindex $historyItem 1] set version [lindex $historyItem 2] set timestamp [clock format [expr {[lindex $historyItem 0] / 1000}] \ -format {%Y-%m-%dT%H:%M:%S%z}] set id "tag:$host,[clock format [lindex $historyItem 0] \ -format {%Y-%m-%d}]:[uri::urn::quote $name-$version]" append output "<entry>\n" append output "<id>[html::html_entities $id]</id>\n" append output "<updated>$timestamp</updated>\n" append output "<title>[html::html_entities "$name $version"]</title>" append output "<content>[html::html_entities \ "$name $version"]</content>\n" append output "</entry>\n" } append output "</feed>\n" return $output } proc ::relmon::list::main {args} { variable usage variable Config # parse commandline while {[set GetoptRet [cmdline::getopt args {f.arg F.arg H n.arg} OptArg \ OptVal]] == 1} { switch -glob -- $OptArg { {f} { if {$OptVal ni {atom html parseable text}} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "format" $OptVal } {F} { if {[catch {dict create {*}[uri::split $OptVal]} UrlParts] || ([dict get $UrlParts "host"] eq "")} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "feed_url" $OptVal } {H} { dict set Config "show_history" 1 } {n} { if {![string is digit -strict $OptVal] || ($OptVal <= 0)} { puts stderr "invalid value passed to \"-$OptArg\"" exit 1 } dict set Config "history_limit" [expr {$OptVal - 1}] } } } set argc [llength $args] if {$GetoptRet == -1} { puts stderr "unknown command line option \"-$OptArg\"" puts stderr $usage exit 1 } if {$argc != 1} { puts stderr $usage exit 1 } if {([dict get $Config "format"] eq "atom") && ([dict get $Config "feed_url"] eq "")} { puts stderr "mandatory \"-F\" option is missing" puts stderr $usage exit 1 } set StateFile [lindex $args 0] # read the state file try { set State [::relmon::common::parseStateFile $StateFile] } trap {POSIX} {errorMsg} - trap {RELMON} {errorMsg} { puts stderr $errorMsg exit 1 } # call formatter switch -- [dict get $Config "format"] { {atom} { puts -nonewline [FormatAtom State \ [dict get $Config "history_limit"] \ [dict get $Config "feed_url"]] } {html} { puts -nonewline [FormatHtml State \ [dict get $Config "show_history"] \ [dict get $Config "history_limit"] \ [dict get $Config "feed_url"]] } {parseable} { puts -nonewline [FormatParseable State \ [dict get $Config "show_history"] \ [dict get $Config "history_limit"]] } {default} { puts -nonewline [FormatText State \ [dict get $Config "show_history"] \ [dict get $Config "history_limit"]] } } exit 0 } namespace eval ::relmon::help { # commandline option help text variable usage "usage: relmon help \[subcommand\]" } proc ::relmon::help::main {args} { variable usage # parse commandline if {[cmdline::getopt args {} OptArg OptVal] == -1} { puts stderr "unknown command line option \"-$OptArg\"" puts stderr $usage exit 1 } set argc [llength $args] if {$argc > 1} { puts stderr $usage exit 1 } set subCommand [lindex $args 0] if {$subCommand ne ""} { if {[info procs ::relmon::${subCommand}::main] ne ""} { puts stderr [set ::relmon::${subCommand}::usage] } else { puts stderr "unknown subcommand \"$subCommand\"" puts stderr $usage exit 1 } } else { foreach subCommandNs [namespace children ::relmon] { if {[info procs ${subCommandNs}::main] ne ""} { puts stderr [set ${subCommandNs}::usage] } } } exit 0 } proc ::relmon::main {args} { variable usage set subArgs [lassign $args subCommand] # generate list of subcommands set subCommands {} foreach subCommandNs [namespace children ::relmon] { if {[info procs ${subCommandNs}::main] ne ""} { lappend subCommands [namespace tail $subCommandNs] } } if {$subCommand ni $subCommands} { if {$subCommand ne ""} { puts stderr "unknown subcommand \"$subCommand\"" } foreach command $subCommands { puts stderr [set relmon::${command}::usage] } exit 1 } # dispatch subcommand relmon::${subCommand}::main {*}$subArgs } relmon::main {*}$argv