summaryrefslogtreecommitdiff
path: root/title.bash
diff options
context:
space:
mode:
authorLuke Bratch <luke@bratch.co.uk>2020-10-21 15:14:36 +0100
committerLuke Bratch <luke@bratch.co.uk>2020-10-21 15:14:36 +0100
commita84ed1a96c6df7f103fde765c023c6f459d4a2f4 (patch)
tree47702ad5f9c58da04e2a9336a5baade560e2ffef /title.bash
parent82df67f9f1d39198c2b6386fa513b4f3bf7298d7 (diff)
Add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way
Diffstat (limited to 'title.bash')
-rwxr-xr-xtitle.bash14
1 files changed, 13 insertions, 1 deletions
diff --git a/title.bash b/title.bash
index 1d933c8..36ee3f6 100755
--- a/title.bash
+++ b/title.bash
@@ -33,6 +33,7 @@
# v1.6.1: change user agent string to latest Firefox
# : add Twitter to quirks mode since Twitter no longer sends a <title> to even the latest Firefox user agent string
# v1.7 : add support for Open Graph titles
+# v1.8 : add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way
# Disable globbing
set -f
@@ -42,9 +43,16 @@ OPTS='--interface 178.32.55.206 --location --insecure --silent --max-filesize 10
read url
# Quirks
+# Claim to be cURL
if echo "$url" | grep -Eq "^http(s|)://(m.|www.|)(youtube\.com|youtu\.be)/|^http(s|)://www\.ispreview\.co\.uk/|^http(s|)://(www.|)twitter\.com/" ; then
OPTS='--interface 192.168.122.10 --location --insecure --silent --max-filesize 1048576 --max-time 10 -H "Accept-Encoding: gzip" -H "Accept: text/html" -H "Accept-Language: en-GB,en;q=0.5"'
fi
+# Use Twitter mobile site
+if echo "$url" | grep -Eq "^http(s|)://(www.|)twitter\.com/" ; then
+ url=$(echo "$url" | sed 's|/www\.twitter\.com/|/mobile.twitter.com/|')
+ url=$(echo "$url" | sed 's|/twitter\.com/|/mobile.twitter.com/|')
+ quirk="twitter"
+fi
TIME=$(date +%s%N)
@@ -59,7 +67,7 @@ then
echo "Usage: '!title <url>' where <url> is an http:// or https:// URL"
elif [ "$url" == "-v" ] || [ "$url" == "--version" ]
then
- echo "blatitle version 1.7"
+ echo "blatitle version 1.8"
else
eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz
ret=$?
@@ -78,6 +86,10 @@ else
then
# text/plain or gopher:// URL, just use the first line
grep -m1 . /tmp/$TIME.body
+ elif [[ "$quirk" = "twitter" ]]
+ then
+ # Extract the Twitter title
+ grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//'
else
# Probably HTML
# Check for Open Graph og:title first