Add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way

author: Luke Bratch <luke@bratch.co.uk> 2020-10-21 15:14:36 +0100
committer: Luke Bratch <luke@bratch.co.uk> 2020-10-21 15:14:36 +0100
commit: a84ed1a96c6df7f103fde765c023c6f459d4a2f4 (patch)
tree: 47702ad5f9c58da04e2a9336a5baade560e2ffef /title.bash
parent: 82df67f9f1d39198c2b6386fa513b4f3bf7298d7 (diff)
1 files changed, 13 insertions, 1 deletions
diff --git a/title.bash b/title.bash
index 1d933c8..36ee3f6 100755
--- a/title.bash
+++ b/title.bash
@@ -33,6 +33,7 @@
 # v1.6.1: change user agent string to latest Firefox
 #       : add Twitter to quirks mode since Twitter no longer sends a <title> to even the latest Firefox user agent string
 # v1.7  : add support for Open Graph titles
+# v1.8  : add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way
 
 # Disable globbing
 set -f
@@ -42,9 +43,16 @@ OPTS='--interface 178.32.55.206 --location --insecure --silent --max-filesize 10
 read url
 
 # Quirks
+# Claim to be cURL
 if echo "$url" | grep -Eq "^http(s|)://(m.|www.|)(youtube\.com|youtu\.be)/|^http(s|)://www\.ispreview\.co\.uk/|^http(s|)://(www.|)twitter\.com/" ; then
   OPTS='--interface 192.168.122.10 --location --insecure --silent --max-filesize 1048576 --max-time 10 -H "Accept-Encoding: gzip" -H "Accept: text/html" -H "Accept-Language: en-GB,en;q=0.5"'
 fi
+# Use Twitter mobile site
+if echo "$url" | grep -Eq "^http(s|)://(www.|)twitter\.com/" ; then
+  url=$(echo "$url" | sed 's|/www\.twitter\.com/|/mobile.twitter.com/|')
+  url=$(echo "$url" | sed 's|/twitter\.com/|/mobile.twitter.com/|')
+  quirk="twitter"
+fi
 
 TIME=$(date +%s%N)
 
@@ -59,7 +67,7 @@ then
   echo "Usage: '!title <url>' where <url> is an http:// or https:// URL"
 elif [ "$url" == "-v" ] || [ "$url" == "--version" ]
 then
-  echo "blatitle version 1.7"
+  echo "blatitle version 1.8"
 else
   eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz
   ret=$?
@@ -78,6 +86,10 @@ else
   then
     # text/plain or gopher:// URL, just use the first line
     grep -m1 . /tmp/$TIME.body
+  elif [[ "$quirk" = "twitter" ]]
+  then
+    # Extract the Twitter title
+    grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//'
   else
     # Probably HTML
     # Check for Open Graph og:title first
author	Luke Bratch <luke@bratch.co.uk>	2020-10-21 15:14:36 +0100
committer	Luke Bratch <luke@bratch.co.uk>	2020-10-21 15:14:36 +0100
commit	a84ed1a96c6df7f103fde765c023c6f459d4a2f4 (patch)
tree	47702ad5f9c58da04e2a9336a5baade560e2ffef /title.bash
parent	82df67f9f1d39198c2b6386fa513b4f3bf7298d7 (diff)