diff options
| author | Luke Bratch <luke@bratch.co.uk> | 2019-02-15 16:06:09 +0000 | 
|---|---|---|
| committer | Luke Bratch <luke@bratch.co.uk> | 2019-02-15 16:06:09 +0000 | 
| commit | b6f2ac5debac9e9900e4fcaf270701de62d3b335 (patch) | |
| tree | a60af1a3096be4e4a0aae33c41a87b84b5292234 | |
| parent | 4c4477561311cb03bc35ed960da4316599253416 (diff) | |
Replace title header extraction method with one that doesn't care about attributes inside the opening tag
| -rwxr-xr-x | title.bash | 3 | 
1 files changed, 1 insertions, 2 deletions
| @@ -74,8 +74,7 @@ else    then      grep -m1 . /tmp/$TIME.body    else -    awk -vRS="</title>" '/<title>/{gsub(/.*<title>|\n+/,"");print;exit}' IGNORECASE=1 /tmp/$TIME.body | sed ':a;N;$!ba;s/\n//g' \ -      | sed -e 's/^[ \t]*//' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);' +    cat /tmp/$TIME.body | tr '\n' ' ' | tr '\r' ' ' | grep -oiE '<title.*>.*</title>' | sed -r 's/<title[^>]*//I; s/^>//I; s/<\/title>$//I' | php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML401);'    fi    rm /tmp/$TIME.header /tmp/$TIME.body  fi | 
