summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Bratch <luke@bratch.co.uk>2015-11-30 11:48:58 +0000
committerLuke Bratch <luke@bratch.co.uk>2015-11-30 11:48:58 +0000
commit931e650436d30f1b187d0ac5639be608a41807e0 (patch)
tree07e400973f723be189451ff56acb1cda570380a8
parent521ceac5adc13ab333786dfa903c94909ba74eed (diff)
Decode HTML using PHP instead of recode (adds PHP as a dependency, removes recode)
-rwxr-xr-xtitle.bash6
1 files changed, 4 insertions, 2 deletions
diff --git a/title.bash b/title.bash
index cc0f83c..7161950 100755
--- a/title.bash
+++ b/title.bash
@@ -20,6 +20,7 @@
# v1.0 : if Content-Type is text/plain, just use the first line as the title
# v1.1 : decode HTML in the title using recode (adds recode as a dependency)
# v1.2 : deal with some cURL exit codes
+# v1.2.1: decode HTML using PHP instead of recode (adds PHP as a dependency, removes recode)
OPTS='--location --insecure --silent --max-filesize 1048576 --max-time 10 -A "Mozilla/5.0 (X11; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0" -H "Accept-Encoding: gzip" -H "Accept: text/html"'
@@ -38,7 +39,7 @@ then
echo "Usage: '!title <url>' where <url> is an http:// or https:// URL"
elif [ "$url" == "-v" ] || [ "$url" == "--version" ]
then
- echo "blatitle version 1.2"
+ echo "blatitle version 1.2.1"
else
eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz
ret=$?
@@ -57,7 +58,8 @@ else
then
head -n1 /tmp/$TIME.body
else
- awk -vRS="</title>" '/<title>/{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' | sed -e 's/^[ \t]*//' | recode html..ascii
+ awk -vRS="</title>" '/<title>/{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' \
+ | sed -e 's/^[ \t]*//' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);'
fi
rm /tmp/$TIME.header /tmp/$TIME.body
fi