################################################################################ # Copyleft ©2011 lee8oi@gmail.com +-------+ # # + 0.3.0 + # # +-------+ # # Durby - https://github.com/lee8oi/durby # # # # This is a fork of 'webby' script. See original header below for more # # information. # # # # Theres a short story here. Webby script is a project by speechles designed # # to handle the task of grabbing information from web links and testing regexp # # in channels. It was used to replace the problematic http information # # grabbing code in other scripts such as the unofficial Incith google script. # # 'Durby' was originally a project called durltitle which was intended to be a # # script that grabs urls from channel messages and returns the title content. # # After for working with speechles in fixing the elusive utf-8 bugs in webby # # I decided it would be smarter to base a title grabber script on a proven # # system instead of reinventing the wheel. So Durltitle merged with Webby and # # thus created 'Durby'. # # # # Durby adds these new features & changes to webby: # # # # Urlwatch - for grabbing urls from channel messages and returning the # # information automatically. # # # # Pattern Ignore - Allows you to configure the script to ignore urls that # # match predefined ignore patterns. # # # # Nick Ignore - Allows you to configure the script to ignore requests & urls # # posted in channel by certain nicks. Useful for ignoring other bots. # # # # Verbose Mode - Can be enabled by default or used on demand with the # # --verbose switch to append the urls type info and description to the results.# # Durby defaults to simply showing title and tiny url. # # # # Title Collection - Enabled by default. Sets durby to collect titles and # # display the results at once instead of posting each result individually when # # urlwatch finds multiple links. (Verbose mode disables this feature). # # # # Usage: # # .chanset #channel +durby # # !durby website.here.com [--html] [--header] [--xheader] # # [--post] [--override] [--nostrip] [--swap] # # [--regexp regexp-here--] [--verbose] # # # # Or simply post a url in channel if urlwatch is on (is by default) # # # # Todo: store when urls were posted and who posted them. Allow script to post # # the information along with results when urls match an already posted url. # ################################################################################ #!!!!!!!!!!!!!!!!!!!!!!!!{ORIGINAL WEBBY HEADER}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# #!!!!!#################################################################!!!!!!!!# #!!!!!# webby - web link information script v1.6 #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# This script will display titles and other relevant information#!!!!!!!!# #!!!!!# from links given in irc channels. Can also be used to test #!!!!!!!!# #!!!!!# and contstruct regular expression in channel. #!!!!!!!!# #!!!!!# Usage: #!!!!!!!!# #!!!!!# .chanset #channel +webby #!!!!!!!!# #!!!!!# !webby website.here.com [--html] [--header] [--xheader] #!!!!!!!!# #!!!!!# [--post] [--override] [--nostrip] [--swap] #!!!!!!!!# #!!!!!# [--regexp regexp-here--] #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# ChangeLog: #!!!!!!!!# #!!!!!# v1.6 - Corrected dynamic encoding conflict resolution. #!!!!!!!!# #!!!!!# able to identify and correct 100% of header #!!!!!!!!# #!!!!!# charset vs meta charset conflicts. #!!!!!!!!# #!!!!!# v1.5 - Added dynamic encoding conflict resolution. #!!!!!!!!# #!!!!!# work-in-progress .. may be incorrect.. #!!!!!!!!# #!!!!!# ..seems to work in a few tests... crosses fingers #!!!!!!!!# #!!!!!# v1.4 - Added detection for http-package exploits, and #!!!!!!!!# #!!!!!# cleaned up messaging. #!!!!!!!!# #!!!!!# v1.3 - Added both automatic and forced character set #!!!!!!!!# #!!!!!# recognition. #!!!!!!!!# #!!!!!# Added Http-package character set misdetection, now #!!!!!!!!# #!!!!!# you can know when a flaw within http-package could#!!!!!!!!# #!!!!!# render your results useless and realize it isn't #!!!!!!!!# #!!!!!# a fault of this script, but a flaw within the #!!!!!!!!# #!!!!!# package Http. #!!!!!!!!# #!!!!!# Added --swap parameter for hassle free encoding #!!!!!!!!# #!!!!!# conflict resolutions. #!!!!!!!!# #!!!!!# v1.2 - Addded multiple url shortening sites via their #!!!!!!!!# #!!!!!# api query. can also be randomized or cycled. #!!!!!!!!# #!!!!!# v1.1 - Added post and regexp support for those learning #!!!!!!!!# #!!!!!# regexp for the very first time. #!!!!!!!!# #!!!!!# v1.0 - first release, enjoy.. :) #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# TODO: #!!!!!!!!# #!!!!!# - Add support for regexp templates specific to each site. #!!!!!!!!# #!!!!!# - Support multiple proxies ( with ability to choose ) #!!!!!!!!# #!!!!!# - Suggestions/Thanks/Bugs, e-mail at bottom of header. #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# LICENSE: #!!!!!!!!# #!!!!!# This code comes with ABSOLUTELY NO WARRANTY. #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# This program is free software; you can redistribute it #!!!!!!!!# #!!!!!# and/or modify it under the terms of the GNU General Public #!!!!!!!!# #!!!!!# License as published by the Free Software Foundation; #!!!!!!!!# #!!!!!# either version 2 of the License, or (at your option) any #!!!!!!!!# #!!!!!# later version. #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# This program is distributed in the hope that it will be #!!!!!!!!# #!!!!!# useful, but WITHOUT ANY WARRANTY; without even the implied #!!!!!!!!# #!!!!!# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR #!!!!!!!!# #!!!!!# PURPOSE. See the GNU General Public License for more #!!!!!!!!# #!!!!!# details. (http://www.gnu.org/copyleft/library.txt) #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# Copyleft (C) 2009-2011, speechles #!!!!!!!!# #!!!!!# imspeechless@gmail.com #!!!!!!!!# #!!!!!# October 27th, 2011 #!!!!!!!!# #!!!!!# #!!!!!!!!# #!!!!!# Credit: lee8oi for the help ... to PsWii60 for nothing.. :P #!!!!!!!!# #!!!!!#################################################################!!!!!!!!# #!!!!!!!!!!!!!!!!!!!!!!!!!{ORIGINAL WEBBY HEADER}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# ################################################################################ # ---> Start of config; setting begins # do you want to display header attributes always? # --- [ 0 no / 1 yes ] variable webbyheader 0 # do you want to display x-header attributes always? # --- [ 0 no / 1 yes ] variable webbyXheader 0 # do you want to display html attributes always? # --- [ 0 no / 1 yes ] variable webbydoc 0 # if using the regexp (regular expression) engine do # you want to still show title and description of webpage? # --- [ 0 no / 1 yes ] variable webbyRegShow 0 # max length of each line of your regexp capture? # --- [ integer ] variable webbySplit 403 # how many regexp captures is the maximum to issue? # presently going above 9, will only result in 9... # --- [ 1 - 9 ] variable webbyMaxRegexp 9 # which method should be used when shortening the url? # (0-3) will only use the one you've chosen. # (4-5) will use them all. # 0 --> http://tinyurl.com # 1 --> http://u.nu # 2 --> http://is.gd # 3 --> http://cli.gs # 4 --> randomly select one of the four above ( 2,0,0,3,1..etc ) # 5 --> cycle through the four above ( 0,1,2,3,0,1..etc ) # --- [ 0 - 5 ] variable webbyShortType 5 # regexp capture limit # this is how wide each regexp capture can be, prior to it # being cleaned up for html elements. this can take a very # long time to cleanup if the entire html was taken. so this # variable is there to protect your bot lagging forever and # people giving replies with tons of html to lag it. # --- [ integer ] variable regexp_limit 3000 # Adjusts time in last-modified which gives you durations ago # Set this in seconds. Most people should leave this set to 0. # variable webbyTimeAdjust 3600 # how should we treat encodings? # 0 - do nothing, use eggdrops internal encoding whatever that may be. # 1 - use the encoding the website is telling us to use. # This is the option everyone should use primarily. # 2 - Force a static encoding for everything. If you use option 2, # please specify the encoding in the setting below this one. # --- [ 0 off-internal / 1 automatic / 2 forced ] variable webbyDynEnc 1 # option 2, force encoding # if you've chosen this above then you must define what # encoding we are going to force for all queries... # --- [ encoding ] variable webbyEnc "iso8859-1" # fix Http-Package conflicts? # this will disregard http-packages detected charset if it appears # different from the character set detected within the meta's of # the html and vice versa, you can change this behavior # on-the-fly using the --swap parameter. # --- [ 0 no / 1 use http-package / 2 use html meta's ] variable webbyFixDetection 2 # report Http-package conflicts? # this will display Http-package conflict errors to the # channel, and will show corrections made to any encodings. # --- [ 0 never / 1 when the conflict happens / 2 always ] variable webbyShowMisdetection 0 # ignore urls matching ignore pattern? # enabling this option will set webby to ignore urls that match # any pattern in the patterns list. # +-+ variable durbyPatternIgnore 1 # +-+ # What patterns should be ignored? # patterns can be any valid string match (glob) pattern. Urls matching # any of these patterns will be ignored if webbyPatternIgnore option is # enabled variable patterns { *.jpg *.png *.pdf *.gif *.mov *.zip *.exe *.tar *.gz *.rar *.bmp #*porn.com* *youtube.com* *youtu.be* *imgur* } # Should we ignore nicks? # set this to 0 to disable. 1 to enable # +-+ variable durbyIgnoreNicks 1 # +-+ # # Which nicks should we ignore? # nicks added to this list will be ignored. variable ignorenicks { dukelovett dukelovett2 } # Show urls in output messages? # set this to 1 to enable urls # +-+ variable durbyShowUrls 0 # +-+ # Use verbose mode by default? # if enabled durby will always show type information and description of url # along with the standard output. Otherwise use --verbose switch to request the # extra output. Verbose mode disables title collection. # --- [ 0 off / 1 on ] # +-+ variable durbyVerbose 0 # +-+ # # grab urls from channel messages? # this will set Durby to watch for urls in channel and # post url information when one is found. # +-+ variable durbyUrlWatch 1 # +-+ # # Collect titles in one message? # if this option is enabled urlwatch will collect titles in one message and # post them instead of posting individually. # --- [ 0 off / 1 on ] # +-+ variable durbyCollectTitles 1 # +-+ # <--- end of config; script begins package require http if {![catch {package require tls}]} { ::http::register https 443 ::tls::socket } if {([lsearch [info commands] zlib] == -1) && ([catch {package require zlib} error] !=0)} { if {([catch {package require Trf} error] == 0) || ([lsearch [info commands] zip] != -1)} { putlog "durby: Found trf package. Fast lane activated!" set webbyTrf 1 } else { putlog "durby: Cannot find zlib or trf package! Gzipped url queries will not be used. Enjoy the slow lane! :P" set webbyNoGzip 1 } } else { putlog "durby: Found zlib package. Fast lane activated!" } set weburlwatch(titlegrab) 0 set weburlwatch(pubmflags) "-|-" set weburlwatch(delay) 1 set weburlwatch(last) 111 set weburlwatch(length) 5 set weburlwatch(watch) 1 variable urlwatchtoken 0 setudef flag durby bind pub - !webby webby bind pub - !durby webby bind pubm - {*://*} weburlwatch proc weburlwatch {nick host user chan text} { # watch for web urls in channel variable weburlwatch set result "" if {([channel get $chan durby]) && ([expr {[unixtime] - $weburlwatch(delay)}] > $weburlwatch(last))\ && ($::durbyUrlWatch > 0)} { foreach word [split $text] { if {($word == "!webby") || ($word == "!durby")} { return 0 } else { if {[string length $word] >= $weburlwatch(length) && \ [regexp {^(f|ht)tp(s|)://} $word] && ![regexp {://([^/:]*:([^/]*@|\d+(/|$))|.*/\.)} $word]} { #check ignore list if {$::durbyPatternIgnore > 0} { foreach pattern $::patterns { if {[string match -nocase $pattern $word]} { return 0 } } } if {$::durbyIgnoreNicks > 0} { foreach nicks $::ignorenicks { if {[string match -nocase $nicks $nick]} { return 0 } } } set ::urlwatchtoken 1 set weburlwatch(last) [unixtime] set weburlwatch(titlegrab) 1 lappend result "~ [webby $nick $host $user $chan $word] " } } } set ::urlwatchtoken 0 if {($::durbyCollectTitles > 0)} {putserv "privmsg $chan :[join $result]"} } # change to return 0 if you want the pubm trigger logged additionally.. return 1 } proc webby {nick uhost handle chan site} { if {![channel get $chan durby]} { return } if {[regsub -nocase -all -- {--verbose} $site "" site]} { set vf 0 } if {[regsub -nocase -all -- {--header} $site "" site]} { set w1 0 } if {[regsub -nocase -all -- {--validate} $site "" site]} { set w1 0 ; set w2 0 ; set w3 0 ; set w10 0 } if {[regsub -nocase -all -- {--xheader} $site "" site]} { set w2 0 } if {[regsub -nocase -all -- {--html} $site "" site]} { set w3 0 } if {[regsub -nocase -all -- {--post} $site "" site]} { set w4 0 } if {[regsub -nocase -all -- {--override} $site "" site]} { set w6 0 } if {[regsub -nocase -all -- {--nostrip} $site "" site]} { set w7 0 } if {[regsub -nocase -all -- {--swap} $site "" site]} { set w8 0 } if {[regsub -nocase -all -- {--gz} $site "" site]} { set w9 0 } if {[regexp -nocase -- {--regexp (.*?)--} $site - reggy]} { if {[catch {set varnum [lindex [regexp -about -- $reggy] 0]} error]} { putserv "privmsg $chan :\002regexp\002 [set error]" return 0 } elseif {$varnum > $::webbyMaxRegexp} { putserv "privmsg $chan :\002regexp\002 too many captures ($varnum), reducing to ($::webbyMaxRegexp)" set varnum $::webbyMaxRegexp } set w5 0 regsub -nocase -- {--regexp .*?--} $site "" site } #check ignore lists if {$::durbyPatternIgnore > 0} { foreach pattern $::patterns { if {[string match -nocase $pattern $site]} { return 0 } } } #check nick ignore list if {$::durbyIgnoreNicks > 0} { foreach nicks $::ignorenicks { if {[string match -nocase $nicks $nick]} { return 0 } } } if {[string match "*://*" $site]} { set site [join [lrange [split $site "/"] 2 end] "/"] } if {[string equal "-" [string index [set site [string map {" " " "} [string trim $site]]] 0]]} { putserv "privmsg $chan :\002durby\002: you've used an improper flag which may exploit http-package!" return } foreach e [split $site "."] { lappend newsite [idna::domain_toascii $e] } set fullquery "http://[join $newsite "."]" set ua "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5" set http [::http::config -useragent $ua] if {[info exists w4]} { putserv "privmsg $chan :Deconstructing post query..." if {![regexp -- {\|} $fullquery] && ![regexp -- {\?} $fullquery]} { putserv "privmsg $chan :\002durby\002: $fullquery does not appear to be a post query, using simple query..." unset w4 } else { if {[regexp -- {\|} $fullquery]} { set url [lindex [split $fullquery "|"] 0] set query [lindex [split $fullquery "|"] 1] if {[regexp -- {\?} $query]} { set suburl "/[lindex [split $query "?"] 0]?" set query [lindex [split $query "?"] 1] } else { set suburl "" } putserv "privmsg $chan :\002durby\002: detected \002url\002 $url \002query\002 $query" } else { set suburl "" set url [lindex [split $fullquery "?"] 0] set query [lindex [split $fullquery "?"] 1] putserv "privmsg $chan :\002durby\002: detected \002url\002 $url \002query\002 $query" } set post [list] foreach {entry} [split $query "&"] { if {![regexp {\=} $entry]} { putserv "privmsg $chan :\002durby\002: broken post structure ($entry). Is not post query, using simple query..." unset w4 break } else { set name [lindex [split $entry "="] 0] set value [lindex [split $entry "="] 1] set post [concat $post "$name=$value"] } } } if {[info exists w4]} { putserv "privmsg $chan :Posting..." } } set value "" if {[info exists w9]} { set gzp "Accept-Encoding gzip,deflate" } else { set gzp "" } if {[info exists w4]} { if {[info exists w10]} { catch {set http [::http::geturl "$url$suburl" -query "[join $post "&"]" -headers $gzp -validate 1 -timeout 10000]} error } else { catch {set http [::http::geturl "$url$suburl" -query "[join $post "&"]" -headers $gzp -timeout 10000]} error } } else { if {[info exists w10]} { catch {set http [::http::geturl "$fullquery" -headers $gzp -validate 1 -timeout 10000]} error } else { catch {set http [::http::geturl "$fullquery" -headers $gzp -timeout 10000]} error } set url $fullquery ; set query "" } if {![string match -nocase "::http::*" $error]} { set cleanerr [string totitle [string map {"\n" " | "} $error]] putserv "privmsg $chan :\002durby\002: $cleanerr \( $fullquery \)" return 0 } if {![string equal -nocase [::http::status $http] "ok"]} { putserv "privmsg $chan :\002durby\002: [string totitle [::http::status $http]] \( $fullquery \)" return 0 } upvar #0 $http state if {![info exists state(meta)]} { putserv "privmsg $chan :\002durby\002: unsupported URL error \( $fullquery \)" ; return 0 } set redir [::http::ncode $http] # iterate through the meta array foreach {name value} $state(meta) { # do we have cookies? if {[string equal -nocase $name "Set-Cookie"]} { # yes, add them to cookie list lappend webbyCookies [lindex [split $value {;}] 0] } } if {[info exists webbyCookies] && [llength $webbyCookies]} { set cookies "[join $webbyCookies {;}]" } else { set cookies "" } set html [::http::data $http] if {[string match *t.co* $fullquery] && [regexp {} $html - value]} { if {[info exists w10]} { if {[string length $cookies]} { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"] Cookie $cookies" -validate 1 -timeout [expr 1000 * 10]]} error } else { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"]" -validate 1 -timeout [expr 1000 * 10]]} error } } else { if {[string length $cookies]} { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"] Cookie $cookies" -timeout [expr 1000 * 10]]} error } else { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"]" -timeout [expr 1000 * 10]]} error } } if {![string match -nocase "::http::*" $error]} { putserv "privmsg $chan :\002durby\002: [string totitle $error] \( $value \)" return 0 } if {![string equal -nocase [::http::status $http] "ok"]} { putserv "privmsg $chan :\002durby\002: [string totitle [::http::status $http]] \( $value \)" return 0 } upvar #0 $http state if {![info exists state(meta)]} { putserv "privmsg $chan :\002durby\002: unsupported URL error \( $fullquery \)" ; return 0 } set html [::http::data $http] set url [string map {" " "%20"} $value] set redir [::http::ncode $http] # iterate through the meta array foreach {name value} $state(meta) { # do we have cookies? if {[string equal -nocase $name "Set-Cookie"]} { # yes, add them to cookie list lappend webbyCookies [lindex [split $value {;}] 0] } } if {[info exists webbyCookies] && [llength $webbyCookies]} { set cookies "[join $webbyCookies {;}]" } else { set cookies "" } } # REDIRECT ? set r 0 while {[string match "*${redir}*" "307|303|302|301" ]} { foreach {name value} $state(meta) { if {[regexp -nocase ^location$ $name]} { if {![string match "http*" $value]} { if {![string match "/" [string index $value 0]]} { set value "[join [lrange [split $url "/"] 0 2] "/"]/$value" } else { set value "[join [lrange [split $url "/"] 0 2] "/"]$value" } } if {[string match [string map {" " "%20"} $value] $url]} { if {![info exists poison]} { set poison 1 } else { incr poison if {$poison > 2} { putserv "privmsg $chan :\002durby\002: redirect error (self to self)\($url\) - ($cookies) are no help..." return } } } if {[info exists w10]} { if {[string length $cookies]} { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"] Cookie $cookies" -validate 1 -timeout [expr 1000 * 10]]} error } else { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"]" -validate 1 -timeout [expr 1000 * 10]]} error } } else { if {[string length $cookies]} { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"] Cookie $cookies" -timeout [expr 1000 * 10]]} error } else { catch {set http [::http::geturl "[string map {" " "%20"} $value]" -headers "[string trim "Referer $url $gzp"]" -timeout [expr 1000 * 10]]} error } } if {![string match -nocase "::http::*" $error]} { putserv "privmsg $chan :\002durby\002: [string totitle $error] \( $value \)" return 0 } if {![string equal -nocase [::http::status $http] "ok"]} { putserv "privmsg $chan :\002durby\002: [string totitle [::http::status $http]] \( $value \)" return 0 } set redir [::http::ncode $http] set html [::http::data $http] set url [string map {" " "%20"} $value] upvar #0 $http state if {[incr r] > 10} { putserv "privmsg $chan :\002durby\002: redirect error (>10 too deep) \( $url \)" ; return } # iterate through the meta array #if {![string length cookies]} { foreach {name value} $state(meta) { # do we have cookies? if {[string equal -nocase $name "Set-Cookie"]} { # yes, add them to cookie list lappend webbyCookies [lindex [split $value {;}] 0] } } if {[info exists webbyCookies] && [llength $webbyCookies]} { set cookies "[join $webbyCookies {;}]" } else { set cookies "" } #} } } } set bl [string bytelength $html] set nc [::http::ncode $http] ; set flaw "" if {![info exists webbyNoGzip]} { foreach {name value} $state(meta) { if {[regexp -nocase ^Content-Encoding$ $name]} { if {[string equal -nocase "gzip" $value] && [string length $html]} { if {![info exists webbyTrf]} { catch { set bl "$bl bytes (gzip); [string bytelength [set html [zlib inflate [string range $html 10 [expr { [string length $html] - 8 } ]]]]]" } } else { catch { set bl "$bl bytes (gzip); [string bytelength [set html [zip -mode decompress -nowrap 1 [string range $html 10 [expr { [string length $html] - 8 } ]]]]]" } } break } } } } if {[regexp -nocase {"Content-Type" content=".*?; charset=(.*?)".*?>} $html - char]} { set char [string trim [string trim $char "\"' /"] {;}] regexp {^(.*?)"} $char - char set mset $char if {![string length $char]} { set char "None Given" ; set char2 "None Given" } set char2 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $char]] } else { if {[regexp -nocase {} $html - char]} { set char [string trim $char "\"' /"] regexp {^(.*?)"} $char - char set mset $char if {![string length $char]} { set char "None Given" ; set char2 "None Given" } set char2 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $char]] } elseif {[regexp -nocase {encoding="(.*?)"} $html - char]} { set mset $char ; set char [string trim $char] if {![string length $char]} { set char "None Given" ; set char2 "None Given" } set char2 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $char]] } else { set char "None Given" ; set char2 "None Given" ; set mset "None Given" } } set char3 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $state(charset)]] if {[string equal $char $state(charset)] && [string equal $char $char2] && ![string equal -nocase "none given" $char]} { set char [string trim $state(charset) {;}] set flaw "" } else { if {![string equal -nocase $char2 $char3] && ![string equal -nocase "none given" $char2] && $::webbyFixDetection > 0} { if {$::webbyFixDetection > 0} { switch $::webbyFixDetection { 1 { if {![info exists w8]} { if {$::webbyShowMisdetection > 0 } { set flaw "\002durby\002: conflict! html meta tagging reports: $char2 .. using charset detected from http-package: $char3 to avoid conflict."} { set flaw "" } set html [webbyConflict $html $char3 $char2 2 [info exists w9]] set char [string trim $char3 {;}] } else { if {$::webbyShowMisdetection > 0 } { set flaw "\002durby\002: conflict! http-package reports: $char3 .. using charset detected from html meta tagging: $char2 to avoid conflict." } { set flaw "" } set html [webbyConflict $html $char3 $char2 1 [info exists w9]] set char [string trim $char2 {;}] } } 2 { if {![info exists w8]} { if {$::webbyShowMisdetection > 0 } { set flaw "\002durby\002: conflict! http-package reports: $char3 .. using charset detected from html meta tagging: $char2 to avoid conflict." } { set flaw "" } set html [webbyConflict $html $char3 $char2 1 [info exists w9]] set char [string trim $char2 {;}] } else { if {$::webbyShowMisdetection > 0 } { set flaw "\002durby\002: conflict! html meta tagging reports: $char2 .. using charset detected from http-package: $char3 to avoid conflict." } { set flaw "" } set html [webbyConflict $html $char3 $char2 2 [info exists w9]] set char [string trim $char3 {;}] } } } } else { set flaw "" set char [string trim $char3 {;}] } } else { set char [string trim $char3 {;}] if {[catch {package present http 2.7}]} { # assume http package 2.5.3 if {[string equal -nocase "utf-8" [encoding system]]} { if {![string equal -nocase "utf-8" $char3]} { set html [encoding convertfrom $char3 $html] } else { #set html [encoding convertto $char3 $html] } } elseif {![string equal -nocase "utf-8" [encoding system]]} { if {[string equal "utf-8" $char3]} { if {![info exists w9]} { set html [encoding convertto $char3 $html] } } else { set html [encoding convertto "utf-8" [encoding convertfrom $char3 $html]] } } set flaw "" } else { # we have http package 2.7 if {![string equal -nocase "utf-8" [encoding system]]} { set html [encoding convertto $char3 $html] } } } } set s [list] ; set sx [list] ; set type "\( $nc" ; set metas [list] if {[string equal -nocase "none given" $char]} { set char [string trim $state(charset) {;}] } set cset $state(charset) switch $::webbyDynEnc { 2 { set html [incithencode $html [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $::webbyEnc]] } } set red ""; if {$r > 0} { set red "; $r redirects" } ; set end "" foreach {name value} $state(meta) { if {[string match -nocase "content-type" $name]} { append type "; [lindex [split $value ";"] 0]; $char; ${bl} bytes$red" } if {[string match -nocase "last-modified" $name]} { set end " \)\( [webbyTime $value]" } if {[string match -nocase "set-cookie" $name]} { continue } if {[string match -nocase "x-*" $name]} { lappend sx "\002$name\002=$value" ; continue } lappend s "\002$name\002=[string trim $value {;}]" } append type "$end \)" ; set e "" ::http::cleanup $http regsub -all {(?:\n|\t|\v|\r|\x01)} $html " " html # DEBUG DEBUG set junk [open "webby.txt" w] puts $junk $html set ::delTemp $html close $junk if {![regexp -nocase {