diff options
author | Luke Bratch <luke@bratch.co.uk> | 2019-02-15 16:06:09 +0000 |
---|---|---|
committer | Luke Bratch <luke@bratch.co.uk> | 2019-02-15 16:06:09 +0000 |
commit | b6f2ac5debac9e9900e4fcaf270701de62d3b335 (patch) | |
tree | a60af1a3096be4e4a0aae33c41a87b84b5292234 | |
parent | 4c4477561311cb03bc35ed960da4316599253416 (diff) |
Replace title header extraction method with one that doesn't care about attributes inside the opening tag
-rwxr-xr-x | title.bash | 3 |
1 files changed, 1 insertions, 2 deletions
@@ -74,8 +74,7 @@ else then grep -m1 . /tmp/$TIME.body else - awk -vRS="</title>" '/<title>/{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' \ - | sed -e 's/^[ \t]*//' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' + cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title.*>.*</title>' | sed -r 's/<title[^>]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' fi rm /tmp/$TIME.header /tmp/$TIME.body fi |