summaryrefslogtreecommitdiff
path: root/src/Etym.php
diff options
context:
space:
mode:
authorPhil Burton <phil@d3r.com>2019-06-11 14:17:29 +0100
committerPhil Burton <phil@d3r.com>2019-06-11 14:17:29 +0100
commitd00f19145627312125c593f35193f04733b4df4e (patch)
tree3e4f2a6756bfb1c55d429f6d70bde8af272d5e36 /src/Etym.php
First commit
Diffstat (limited to 'src/Etym.php')
-rw-r--r--src/Etym.php125
1 files changed, 125 insertions, 0 deletions
diff --git a/src/Etym.php b/src/Etym.php
new file mode 100644
index 0000000..b796a3a
--- /dev/null
+++ b/src/Etym.php
@@ -0,0 +1,125 @@
+<?php
+
+namespace App;
+
+use PHPHtmlParser\Dom;
+
+/**
+ * Get the etymology of a word
+ *
+ * @author Phil Burton <phil@pgburton.com>
+ */
+class Etym
+{
+ protected $baseURL;
+ protected $domSearch;
+ protected $pasteCmd;
+
+ /**
+ * Initalise with config options
+ *
+ * @param string $baseURL
+ * @param string $domSearch
+ * @param string $pasteCmd
+ * @author Phil Burton <phil@pgburton.com>
+ */
+ public function __construct(string $baseURL, string $domSearch, string $pasteCmd)
+ {
+ $this->baseURL = $baseURL;
+ $this->domSearch = $domSearch;
+ $this->pasteCmd = $pasteCmd;
+ }
+
+ /**
+ * Search for and return etym defeiniton
+ *
+ * @return string
+ * @author Phil Burton <phil@d3r.com>
+ */
+ public function getDefinition(): string
+ {
+ $input = $this->readStdin();
+
+ $targetURL = $this->baseURL . $input;
+
+ $dom = new Dom;
+ $dom->load($targetURL);
+ $html = $dom->find($this->domSearch);
+
+ if (!$html instanceof \PHPHtmlParser\Dom\Collection || count($html) <= 0) {
+ echo "No entry found for '$input'!\n";
+ exit(2);
+ }
+
+ $definition = $this->cleanUpHtml($html[0]->innerHtml());
+ $definition = $this->handleTruncation($input, $definition, $targetURL);
+
+ // echo $definition . "\n";
+ return $definition;
+ }
+
+ /**
+ * Get and return the user input
+ *
+ * @return string
+ * @author Phil Burton <phil@pgburton.com>
+ */
+ protected function readStdin(): string
+ {
+ $input = fgets(STDIN);
+
+ if ($input === false) {
+ echo "No input supplied!\n";
+ exit(1);
+ }
+
+ $input = rtrim($input, "\n");
+
+ return $input;
+ }
+
+ /**
+ * Tidy and return the hmtl
+ *
+ * @param string $input
+ * @return string
+ * @author Phil Burton <phil@pgburton.com>
+ */
+ protected function cleanUpHtml(string $input)
+ {
+ // Strip HTML Tags
+ $clear = strip_tags($input);
+ // Clean up things like &amp;
+ $clear = html_entity_decode($clear);
+ // Strip out any url-encoded stuff
+ $clear = urldecode($clear);
+ // Replace Multiple spaces with single space
+ $clear = preg_replace('/ +/', ' ', $clear);
+ // Trim the string of leading/trailing space
+ $clear = trim($clear);
+ // Capitalise the first char.
+ $clear = ucfirst($clear);
+
+ return $clear;
+ }
+
+ protected function handleTruncation($input, $definition, $url)
+ {
+ $truncated = $definition;
+
+ $MAX_CHARACTERS = 350;
+ if (strlen($truncated) >= $MAX_CHARACTERS) {
+ // Create that povjee link.
+ // Capitalise the first char of the input.
+ $input = ucfirst($input);
+ $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]";
+ $safeDef = escapeshellarg($defAndUrl);
+
+ $pasteBinCmd = "echo $safeDef | " . $this->pasteCmd;
+ $pasteBinLink = exec($pasteBinCmd);
+ $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]";
+ }
+
+ return $truncated;
+ }
+}