summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Bratch <luke@bratch.co.uk>2024-04-17 20:57:57 +0100
committerLuke Bratch <luke@bratch.co.uk>2024-04-17 20:57:57 +0100
commit4a661879ffb04e0a00f3c7183e7e5b01f73101d7 (patch)
tree03048bde5f8412eafa964e698b3ecf0292167da5
parent1a715e928bf0c3fbfcef329f955e598477f7e8ad (diff)
Use git -a (--text) when grepping files in case files have binary content in them
-rwxr-xr-xtitle.bash10
1 files changed, 5 insertions, 5 deletions
diff --git a/title.bash b/title.bash
index 5736e0a..1929173 100755
--- a/title.bash
+++ b/title.bash
@@ -80,20 +80,20 @@ else
echo "cURL error $ret when fetching the page."
exit
fi
- grep -qi "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body
- grep -qi "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain"
+ grep -qia "Content-Encoding: gzip" /tmp/$TIME.header && gunzip /tmp/$TIME.body.gz || mv /tmp/$TIME.body.gz /tmp/$TIME.body
+ grep -qia "Content-Type: text/plain" /tmp/$TIME.header && TYPE="text/plain"
if [[ "$TYPE" == "text/plain" ]] || [[ "$url" =~ ^gopher:// ]]
then
# text/plain or gopher:// URL, just use the first line
- grep -m1 . /tmp/$TIME.body
+ grep -a -m1 . /tmp/$TIME.body
elif [[ "$quirk" = "twitter" ]]
then
# Extract the Twitter title
- grep -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//'
+ grep -a -A1 tweet-text /tmp/$TIME.body | head -n2 | tail -n1 | perl -pe 's|^.*?>||' | perl -pe 's|<.*?>||g' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' | sed -r 's/^\s+// ; s/\s+$//'
else
# Probably HTML
# Check for Open Graph og:title first
- xmllint --xpath 'string(/meta/@content)' <(grep -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null
+ xmllint --xpath 'string(/meta/@content)' <(grep -a -Em1 '<.*meta.*property.*og:title' /tmp/$TIME.body) 2> /dev/null
# None found, look for HTML <title>
if [ "$?" -ne 0 ] ; then
cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title[^>]*>([^<]+)</title>' | head -1 | sed -r 's/<title[^>]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);'