summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Bratch <luke@bratch.co.uk>2020-10-19 11:38:36 +0100
committerLuke Bratch <luke@bratch.co.uk>2020-10-19 11:38:36 +0100
commit82df67f9f1d39198c2b6386fa513b4f3bf7298d7 (patch)
tree3760ef9790cace76493d66a0edf8740b945a0c0a
parentebb69db21c2fcd8939b5149999a8b03c24db0015 (diff)
Add support for Open Graph titles
-rwxr-xr-xtitle.bash12
1 files changed, 10 insertions, 2 deletions
diff --git a/title.bash b/title.bash
index 5faf3ad..1d933c8 100755
--- a/title.bash
+++ b/title.bash
@@ -32,6 +32,7 @@
# v1.6 : handle multiple title tags, only spoof IP if in quirks mode, add www.ispreview.co.uk to quirks mode
# v1.6.1: change user agent string to latest Firefox
# : add Twitter to quirks mode since Twitter no longer sends a <title> to even the latest Firefox user agent string
+# v1.7 : add support for Open Graph titles
# Disable globbing
set -f
@@ -58,7 +59,7 @@ then
echo "Usage: '!title <url>' where <url> is an http:// or https:// URL"
elif [ "$url" == "-v" ] || [ "$url" == "--version" ]
then
- echo "blatitle version 1.6.1"
+ echo "blatitle version 1.7"
else
eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz
ret=$?
@@ -75,9 +76,16 @@ else
grep -qi "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain"
if [[ "$TYPE" == "text/plain" ]] || [[ "$url" =~ ^gopher:// ]]
then
+ # text/plain or gopher:// URL, just use the first line
grep -m1 . /tmp/$TIME.body
else
- cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title[^>]*>([^<]+)<\/title>' | head -1 | sed -r 's/<title[^>]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);'
+ # Probably HTML
+ # Check for Open Graph og:title first
+ xmllint --xpath 'string(/meta/@content)' <(grep -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null
+ # None found, look for HTML <title>
+ if [ "$?" -ne 0 ] ; then
+ cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title[^>]*>([^<]+)<\/title>' | head -1 | sed -r 's/<title[^>]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);'
+ fi
fi
rm /tmp/$TIME.header /tmp/$TIME.body
fi