From 4a661879ffb04e0a00f3c7183e7e5b01f73101d7 Mon Sep 17 00:00:00 2001 From: Luke Bratch Date: Wed, 17 Apr 2024 20:57:57 +0100 Subject: Use git -a (--text) when grepping files in case files have binary content in them --- title.bash | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/title.bash b/title.bash index 5736e0a..1929173 100755 --- a/title.bash +++ b/title.bash @@ -80,20 +80,20 @@ else echo "cURL error $ret when fetching the page." exit fi - grep -qi "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body - grep -qi "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain" + grep -qia "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body + grep -qia "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain" if [[ "$TYPE" == "text/plain" ]] || [[ "$url" =~ ^gopher:// ]] then # text/plain or gopher:// URL, just use the first line - grep -m1 . /tmp/$TIME.body + grep -a -m1 . /tmp/$TIME.body elif [[ "$quirk" = "twitter" ]] then # Extract the Twitter title - grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//' + grep -a -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//' else # Probably HTML # Check for Open Graph og:title first - xmllint --xpath 'string(/meta/@content)' <(grep -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null + xmllint --xpath 'string(/meta/@content)' <(grep -a -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null # None found, look for HTML if [ "$?" -ne 0 ] ; then cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title[^>]*>([^<]+)' | head -1 | sed -r 's/]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' -- cgit v1.2.3