diff options
author | Phil Burton <phil@d3r.com> | 2019-06-11 14:17:29 +0100 |
---|---|---|
committer | Phil Burton <phil@d3r.com> | 2019-06-11 14:17:29 +0100 |
commit | d00f19145627312125c593f35193f04733b4df4e (patch) | |
tree | 3e4f2a6756bfb1c55d429f6d70bde8af272d5e36 /etym.php |
First commit
Diffstat (limited to 'etym.php')
-rw-r--r-- | etym.php | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/etym.php b/etym.php new file mode 100644 index 0000000..08059fd --- /dev/null +++ b/etym.php @@ -0,0 +1,85 @@ +<?php + +use PHPHtmlParser\Dom; + +$baseURL = "http://www.etymonline.com/word/"; +$htmlNode = "section[class^='word__defination']"; +$pasteCmd = "pastebinit -b http://p.of.je 2>/dev/null"; + +function readStdin() +{ + $input = fgets(STDIN); + + if ($input === false) { + echo "No input supplied!\n"; + exit(1); + } + + $input = rtrim($input, "\n"); + + return $input; +} + +function cleanUpHtml($input) +{ + // Strip HTML Tags + $clear = strip_tags($input); + // Clean up things like & + $clear = html_entity_decode($clear); + // Strip out any url-encoded stuff + $clear = urldecode($clear); + // Replace Multiple spaces with single space + $clear = preg_replace('/ +/', ' ', $clear); + // Trim the string of leading/trailing space + $clear = trim($clear); + // Capitalise the first char. + $clear = ucfirst($clear); + + return $clear; +} + +function handleTruncation($input, $definition, $url) +{ + $truncated = $definition; + + $MAX_CHARACTERS = 350; + if (strlen($truncated) >= $MAX_CHARACTERS) { + // Create that povjee link. + // Capitalise the first char of the input. + $input = ucfirst($input); + $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]"; + $safeDef = escapeshellarg($defAndUrl); + + $pasteBinCmd = "echo $safeDef | " . $pasteCmd; + $pasteBinLink = exec($pasteBinCmd); + + $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]"; + } + + return $truncated; +} + +function getDefinition() +{ + + $input = readStdin(); + + $targetURL = $baseURL . $input; + + $dom = new Dom; + $dom->load($targetURL); + $html = $dom->find($htmlNode)[0]->innerHtml(); + $node = $html->find($htmlNode, 0); + + if ($node === null) { + echo "No entry found for '$input'!\n"; + exit(2); + } + + $definition = cleanUpHtml($node->innertext); + $definition = handleTruncation($input, $definition, $targetURL); + + echo $definition . "\n"; +} + +getDefinition(); |