diff options
author | Luke Bratch <luke@bratch.co.uk> | 2024-04-17 20:57:57 +0100 |
---|---|---|
committer | Luke Bratch <luke@bratch.co.uk> | 2024-04-17 20:57:57 +0100 |
commit | 4a661879ffb04e0a00f3c7183e7e5b01f73101d7 (patch) | |
tree | 03048bde5f8412eafa964e698b3ecf0292167da5 /title.bash | |
parent | 1a715e928bf0c3fbfcef329f955e598477f7e8ad (diff) |
Diffstat (limited to 'title.bash')
-rwxr-xr-x | title.bash | 10 |
1 files changed, 5 insertions, 5 deletions
@@ -80,20 +80,20 @@ else echo "cURL error $ret when fetching the page." exit fi - grep -qi "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body - grep -qi "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain" + grep -qia "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body + grep -qia "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain" if [[ "$TYPE" == "text/plain" ]] || [[ "$url" =~ ^gopher:// ]] then # text/plain or gopher:// URL, just use the first line - grep -m1 . /tmp/$TIME.body + grep -a -m1 . /tmp/$TIME.body elif [[ "$quirk" = "twitter" ]] then # Extract the Twitter title - grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//' + grep -a -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//' else # Probably HTML # Check for Open Graph og:title first - xmllint --xpath 'string(/meta/@content)' <(grep -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null + xmllint --xpath 'string(/meta/@content)' <(grep -a -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null # None found, look for HTML <title> if [ "$?" -ne 0 ] ; then cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title[^>]*>([^<]+)</title>' | head -1 | sed -r 's/<title[^>]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' |