diff options
author | Luke Bratch <luke@bratch.co.uk> | 2020-10-21 15:14:36 +0100 |
---|---|---|
committer | Luke Bratch <luke@bratch.co.uk> | 2020-10-21 15:14:36 +0100 |
commit | a84ed1a96c6df7f103fde765c023c6f459d4a2f4 (patch) | |
tree | 47702ad5f9c58da04e2a9336a5baade560e2ffef /title.bash | |
parent | 82df67f9f1d39198c2b6386fa513b4f3bf7298d7 (diff) |
Add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way
Diffstat (limited to 'title.bash')
-rwxr-xr-x | title.bash | 14 |
1 files changed, 13 insertions, 1 deletions
@@ -33,6 +33,7 @@ # v1.6.1: change user agent string to latest Firefox # : add Twitter to quirks mode since Twitter no longer sends a <title> to even the latest Firefox user agent string # v1.7 : add support for Open Graph titles +# v1.8 : add special extra quirks mode for Twitter because it doesn't seem to resemble a website in any way # Disable globbing set -f @@ -42,9 +43,16 @@ OPTS='--interface 178.32.55.206 --location --insecure --silent --max-filesize 10 read url # Quirks +# Claim to be cURL if echo "$url" | grep -Eq "^http(s|)://(m.|www.|)(youtube\.com|youtu\.be)/|^http(s|)://www\.ispreview\.co\.uk/|^http(s|)://(www.|)twitter\.com/" ; then OPTS='--interface 192.168.122.10 --location --insecure --silent --max-filesize 1048576 --max-time 10 -H "Accept-Encoding: gzip" -H "Accept: text/html" -H "Accept-Language: en-GB,en;q=0.5"' fi +# Use Twitter mobile site +if echo "$url" | grep -Eq "^http(s|)://(www.|)twitter\.com/" ; then + url=$(echo "$url" | sed 's|/www\.twitter\.com/|/mobile.twitter.com/|') + url=$(echo "$url" | sed 's|/twitter\.com/|/mobile.twitter.com/|') + quirk="twitter" +fi TIME=$(date +%s%N) @@ -59,7 +67,7 @@ then echo "Usage: '!title <url>' where <url> is an http:// or https:// URL" elif [ "$url" == "-v" ] || [ "$url" == "--version" ] then - echo "blatitle version 1.7" + echo "blatitle version 1.8" else eval curl $OPTS -D /tmp/$TIME.header '$url' -o /tmp/$TIME.body.gz ret=$? @@ -78,6 +86,10 @@ else then # text/plain or gopher:// URL, just use the first line grep -m1 . /tmp/$TIME.body + elif [[ "$quirk" = "twitter" ]] + then + # Extract the Twitter title + grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//' else # Probably HTML # Check for Open Graph og:title first |