From a84ed1a96c6df7f103fde765c023c6f459d4a2f4 Mon Sep 17 00:00:00 2001 From: Luke Bratch Date: Wed, 21 Oct 2020 15:14:36 +0100 Subject: Add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way --- title.bash | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/title.bash b/title.bash index 1d933c8..36ee3f6 100755 --- a/title.bash +++ b/title.bash @@ -33,6 +33,7 @@ # v1.6.1: change user agent string to latest Firefox # : add Twitter to quirks mode since Twitter no longer sends a to even the latest Firefox user agent string # v1.7 : add support for Open Graph titles +# v1.8 : add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way # Disable globbing set -f @@ -42,9 +43,16 @@ OPTS='--interface 178.32.55.206 --location --insecure --silent --max-filesize 10 read url # Quirks +# Claim to be cURL if echo "$url" | grep -Eq "^http(s|)://(m.|www.|)(youtube\.com|youtu\.be)/|^http(s|)://www\.ispreview\.co\.uk/|^http(s|)://(www.|)twitter\.com/" ; then OPTS='--interface 192.168.122.10 --location --insecure --silent --max-filesize 1048576 --max-time 10 -H "Accept-Encoding: gzip" -H "Accept: text/html" -H "Accept-Language: en-GB,en;q=0.5"' fi +# Use Twitter mobile site +if echo "$url" | grep -Eq "^http(s|)://(www.|)twitter\.com/" ; then + url=$(echo "$url" | sed 's|/www\.twitter\.com/|/mobile.twitter.com/|') + url=$(echo "$url" | sed 's|/twitter\.com/|/mobile.twitter.com/|') + quirk="twitter" +fi TIME=$(date +%s%N) @@ -59,7 +67,7 @@ then echo "Usage: '!title <url>' where <url> is an http:// or https:// URL" elif [ "$url" == "-v" ] || [ "$url" == "--version" ] then - echo "blatitle version 1.7" + echo "blatitle version 1.8" else eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz ret=$? @@ -78,6 +86,10 @@ else then # text/plain or gopher:// URL, just use the first line grep -m1 . /tmp/$TIME.body + elif [[ "$quirk" = "twitter" ]] + then + # Extract the Twitter title + grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//' else # Probably HTML # Check for Open Graph og:title first -- cgit v1.2.3