diff options
author | Luke Bratch <luke@bratch.co.uk> | 2015-08-07 10:53:10 +0100 |
---|---|---|
committer | Luke Bratch <luke@bratch.co.uk> | 2015-08-07 10:53:10 +0100 |
commit | 492ad1f94d602285e9719791dd502e5df0712fc5 (patch) | |
tree | b63bfdf194d2a7fc96c6b98c37c524af7152510c | |
parent | 7b5636f6183b326bad20af84395cbe754a189efb (diff) |
Switch to a cleaner awk script to detect titles
-rwxr-xr-x | title.bash | 5 |
1 files changed, 3 insertions, 2 deletions
@@ -16,6 +16,7 @@ # v0.9.1: fix support for servers that don't support HTTP HEAD # v0.9.2: tidy up temporary files afterwards # v0.9.3: advertise text/html acceptance as some servers require it +# v0.9.4: switch to a cleaner awk script to detect titles OPTS='--location --insecure --silent --max-filesize 1048576 --max-time 10 -A "Mozilla/5.0 (X11; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0" -H "Accept-Encoding: gzip" -H "Accept: text/html"' @@ -34,10 +35,10 @@ then echo "Usage: '!title <url>' where <url> is an http:// or https:// URL" elif [ "$url" == "-v" ] || [ "$url" == "--version" ] then - echo "blatitle version 0.9.3" + echo "blatitle version 0.9.4" else eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz grep -q "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body - sed -n -e 'H;${x;s!.*<head[^>]*>\(.*\)</head>.*!\1!I;tnext};b;:next;s!.*<title>\(.*\)</title>.*!\1!pI' /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' | sed -e 's/^[ \t]*//' + awk -vRS="</title>" '/<title>/{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' | sed -e 's/^[ \t]*//' rm /tmp/$TIME.header /tmp/$TIME.body fi |