From e4447324a41e18e3a0d770f8dac0a558463ae1f8 Mon Sep 17 00:00:00 2001 From: Luke Bratch Date: Sat, 21 Nov 2015 22:35:47 +0000 Subject: Decode HTML in the title using recode (adds recode as a dependency) --- title.bash | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'title.bash') diff --git a/title.bash b/title.bash index df06d52..f0065fd 100755 --- a/title.bash +++ b/title.bash @@ -18,6 +18,7 @@ # v0.9.3: advertise text/html acceptance as some servers require it # v0.9.4: switch to a cleaner awk script to detect titles # v1.0 : if Content-Type is text/plain, just use the first line as the title +# v1.1 : decode HTML in the title using recode (adds recode as a dependency) OPTS='--location --insecure --silent --max-filesize 1048576 --max-time 10 -A "Mozilla/5.0 (X11; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0" -H "Accept-Encoding: gzip" -H "Accept: text/html"' @@ -36,7 +37,7 @@ then echo "Usage: '!title ' where is an http:// or https:// URL" elif [ "$url" == "-v" ] || [ "$url" == "--version" ] then - echo "blatitle version 1.0" + echo "blatitle version 1.1" else eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz grep -q "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body @@ -45,7 +46,7 @@ else then head -n1 /tmp/$TIME.body else - awk -vRS="" '//{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' | sed -e 's/^[ \t]*//' + awk -vRS="" '//{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' | sed -e 's/^[ \t]*//' | recode html..ascii fi rm /tmp/$TIME.header /tmp/$TIME.body fi -- cgit v1.2.3