diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Etym.php | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/src/Etym.php b/src/Etym.php new file mode 100644 index 0000000..b796a3a --- /dev/null +++ b/src/Etym.php @@ -0,0 +1,125 @@ +<?php + +namespace App; + +use PHPHtmlParser\Dom; + +/** + * Get the etymology of a word + * + * @author Phil Burton <phil@pgburton.com> + */ +class Etym +{ + protected $baseURL; + protected $domSearch; + protected $pasteCmd; + + /** + * Initalise with config options + * + * @param string $baseURL + * @param string $domSearch + * @param string $pasteCmd + * @author Phil Burton <phil@pgburton.com> + */ + public function __construct(string $baseURL, string $domSearch, string $pasteCmd) + { + $this->baseURL = $baseURL; + $this->domSearch = $domSearch; + $this->pasteCmd = $pasteCmd; + } + + /** + * Search for and return etym defeiniton + * + * @return string + * @author Phil Burton <phil@d3r.com> + */ + public function getDefinition(): string + { + $input = $this->readStdin(); + + $targetURL = $this->baseURL . $input; + + $dom = new Dom; + $dom->load($targetURL); + $html = $dom->find($this->domSearch); + + if (!$html instanceof \PHPHtmlParser\Dom\Collection || count($html) <= 0) { + echo "No entry found for '$input'!\n"; + exit(2); + } + + $definition = $this->cleanUpHtml($html[0]->innerHtml()); + $definition = $this->handleTruncation($input, $definition, $targetURL); + + // echo $definition . "\n"; + return $definition; + } + + /** + * Get and return the user input + * + * @return string + * @author Phil Burton <phil@pgburton.com> + */ + protected function readStdin(): string + { + $input = fgets(STDIN); + + if ($input === false) { + echo "No input supplied!\n"; + exit(1); + } + + $input = rtrim($input, "\n"); + + return $input; + } + + /** + * Tidy and return the hmtl + * + * @param string $input + * @return string + * @author Phil Burton <phil@pgburton.com> + */ + protected function cleanUpHtml(string $input) + { + // Strip HTML Tags + $clear = strip_tags($input); + // Clean up things like & + $clear = html_entity_decode($clear); + // Strip out any url-encoded stuff + $clear = urldecode($clear); + // Replace Multiple spaces with single space + $clear = preg_replace('/ +/', ' ', $clear); + // Trim the string of leading/trailing space + $clear = trim($clear); + // Capitalise the first char. + $clear = ucfirst($clear); + + return $clear; + } + + protected function handleTruncation($input, $definition, $url) + { + $truncated = $definition; + + $MAX_CHARACTERS = 350; + if (strlen($truncated) >= $MAX_CHARACTERS) { + // Create that povjee link. + // Capitalise the first char of the input. + $input = ucfirst($input); + $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]"; + $safeDef = escapeshellarg($defAndUrl); + + $pasteBinCmd = "echo $safeDef | " . $this->pasteCmd; + $pasteBinLink = exec($pasteBinCmd); + $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]"; + } + + return $truncated; + } +} |