summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Burton <phil@d3r.com>2019-06-11 14:17:29 +0100
committerPhil Burton <phil@d3r.com>2019-06-11 14:17:29 +0100
commitd00f19145627312125c593f35193f04733b4df4e (patch)
tree3e4f2a6756bfb1c55d429f6d70bde8af272d5e36
First commit
-rw-r--r--.gitignore1
-rw-r--r--composer.json17
-rw-r--r--composer.lock114
-rw-r--r--etym.php85
-rw-r--r--scripts/run.php14
-rw-r--r--src/Etym.php125
6 files changed, 356 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..57872d0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/vendor/
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..9b75c19
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,17 @@
+{
+ "name": "blatech/blaetym",
+ "description": "Find etymology of a word",
+ "type": "library",
+ "require": {
+ "paquettg/php-html-parser": "^2.0"
+ },
+ "autoload": {
+ "psr-4": {"App\\": "src/"}
+ },
+ "authors": [
+ {
+ "name": "Phil Burton",
+ "email": "phil@pgburton.com"
+ }
+ ]
+}
diff --git a/composer.lock b/composer.lock
new file mode 100644
index 0000000..b4ffb1e
--- /dev/null
+++ b/composer.lock
@@ -0,0 +1,114 @@
+{
+ "_readme": [
+ "This file locks the dependencies of your project to a known state",
+ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
+ "This file is @generated automatically"
+ ],
+ "content-hash": "b6dc99abca93405035c63822eb275afd",
+ "packages": [
+ {
+ "name": "paquettg/php-html-parser",
+ "version": "2.0.2",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/paquettg/php-html-parser.git",
+ "reference": "77e4a44b0916690b4300fe9abf98fd05bbba48f0"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/paquettg/php-html-parser/zipball/77e4a44b0916690b4300fe9abf98fd05bbba48f0",
+ "reference": "77e4a44b0916690b4300fe9abf98fd05bbba48f0",
+ "shasum": ""
+ },
+ "require": {
+ "ext-mbstring": "*",
+ "paquettg/string-encode": "~1.0.0",
+ "php": ">=7.1"
+ },
+ "require-dev": {
+ "mockery/mockery": "^1.2",
+ "php-coveralls/php-coveralls": "^2.1",
+ "phpunit/phpunit": "^7.5.1"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-0": {
+ "PHPHtmlParser": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Gilles Paquette",
+ "email": "paquettg@gmail.com",
+ "homepage": "http://gillespaquette.ca"
+ }
+ ],
+ "description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.",
+ "homepage": "https://github.com/paquettg/php-html-parser",
+ "keywords": [
+ "dom",
+ "html",
+ "parser"
+ ],
+ "time": "2019-02-10T01:35:49+00:00"
+ },
+ {
+ "name": "paquettg/string-encode",
+ "version": "1.0.1",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/paquettg/string-encoder.git",
+ "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/paquettg/string-encoder/zipball/a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee",
+ "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee",
+ "shasum": ""
+ },
+ "require": {
+ "php": ">=7.1"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^7.5.1"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-0": {
+ "stringEncode": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Gilles Paquette",
+ "email": "paquettg@gmail.com",
+ "homepage": "http://gillespaquette.ca"
+ }
+ ],
+ "description": "Facilitating the process of altering string encoding in PHP.",
+ "homepage": "https://github.com/paquettg/string-encoder",
+ "keywords": [
+ "charset",
+ "encoding",
+ "string"
+ ],
+ "time": "2018-12-21T02:25:09+00:00"
+ }
+ ],
+ "packages-dev": [],
+ "aliases": [],
+ "minimum-stability": "stable",
+ "stability-flags": [],
+ "prefer-stable": false,
+ "prefer-lowest": false,
+ "platform": [],
+ "platform-dev": []
+}
diff --git a/etym.php b/etym.php
new file mode 100644
index 0000000..08059fd
--- /dev/null
+++ b/etym.php
@@ -0,0 +1,85 @@
+<?php
+
+use PHPHtmlParser\Dom;
+
+$baseURL = "http://www.etymonline.com/word/";
+$htmlNode = "section[class^='word__defination']";
+$pasteCmd = "pastebinit -b http://p.of.je 2>/dev/null";
+
+function readStdin()
+{
+ $input = fgets(STDIN);
+
+ if ($input === false) {
+ echo "No input supplied!\n";
+ exit(1);
+ }
+
+ $input = rtrim($input, "\n");
+
+ return $input;
+}
+
+function cleanUpHtml($input)
+{
+ // Strip HTML Tags
+ $clear = strip_tags($input);
+ // Clean up things like &amp;
+ $clear = html_entity_decode($clear);
+ // Strip out any url-encoded stuff
+ $clear = urldecode($clear);
+ // Replace Multiple spaces with single space
+ $clear = preg_replace('/ +/', ' ', $clear);
+ // Trim the string of leading/trailing space
+ $clear = trim($clear);
+ // Capitalise the first char.
+ $clear = ucfirst($clear);
+
+ return $clear;
+}
+
+function handleTruncation($input, $definition, $url)
+{
+ $truncated = $definition;
+
+ $MAX_CHARACTERS = 350;
+ if (strlen($truncated) >= $MAX_CHARACTERS) {
+ // Create that povjee link.
+ // Capitalise the first char of the input.
+ $input = ucfirst($input);
+ $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]";
+ $safeDef = escapeshellarg($defAndUrl);
+
+ $pasteBinCmd = "echo $safeDef | " . $pasteCmd;
+ $pasteBinLink = exec($pasteBinCmd);
+
+ $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]";
+ }
+
+ return $truncated;
+}
+
+function getDefinition()
+{
+
+ $input = readStdin();
+
+ $targetURL = $baseURL . $input;
+
+ $dom = new Dom;
+ $dom->load($targetURL);
+ $html = $dom->find($htmlNode)[0]->innerHtml();
+ $node = $html->find($htmlNode, 0);
+
+ if ($node === null) {
+ echo "No entry found for '$input'!\n";
+ exit(2);
+ }
+
+ $definition = cleanUpHtml($node->innertext);
+ $definition = handleTruncation($input, $definition, $targetURL);
+
+ echo $definition . "\n";
+}
+
+getDefinition();
diff --git a/scripts/run.php b/scripts/run.php
new file mode 100644
index 0000000..b50bcc7
--- /dev/null
+++ b/scripts/run.php
@@ -0,0 +1,14 @@
+<?php
+
+require __DIR__ . '/../vendor/autoload.php';
+
+use App\Etym;
+
+$baseURL = "http://www.etymonline.com/word/";
+$htmlNode = "section[class^='word__defination']";
+$pasteCmd = "pastebinit -b http://p.of.je 2>/dev/null";
+
+$etym = new Etym($baseURL, $htmlNode, $pasteCmd);
+$result = $etym->getDefinition();
+
+echo $result . PHP_EOL;
diff --git a/src/Etym.php b/src/Etym.php
new file mode 100644
index 0000000..b796a3a
--- /dev/null
+++ b/src/Etym.php
@@ -0,0 +1,125 @@
+<?php
+
+namespace App;
+
+use PHPHtmlParser\Dom;
+
+/**
+ * Get the etymology of a word
+ *
+ * @author Phil Burton <phil@pgburton.com>
+ */
+class Etym
+{
+ protected $baseURL;
+ protected $domSearch;
+ protected $pasteCmd;
+
+ /**
+ * Initalise with config options
+ *
+ * @param string $baseURL
+ * @param string $domSearch
+ * @param string $pasteCmd
+ * @author Phil Burton <phil@pgburton.com>
+ */
+ public function __construct(string $baseURL, string $domSearch, string $pasteCmd)
+ {
+ $this->baseURL = $baseURL;
+ $this->domSearch = $domSearch;
+ $this->pasteCmd = $pasteCmd;
+ }
+
+ /**
+ * Search for and return etym defeiniton
+ *
+ * @return string
+ * @author Phil Burton <phil@d3r.com>
+ */
+ public function getDefinition(): string
+ {
+ $input = $this->readStdin();
+
+ $targetURL = $this->baseURL . $input;
+
+ $dom = new Dom;
+ $dom->load($targetURL);
+ $html = $dom->find($this->domSearch);
+
+ if (!$html instanceof \PHPHtmlParser\Dom\Collection || count($html) <= 0) {
+ echo "No entry found for '$input'!\n";
+ exit(2);
+ }
+
+ $definition = $this->cleanUpHtml($html[0]->innerHtml());
+ $definition = $this->handleTruncation($input, $definition, $targetURL);
+
+ // echo $definition . "\n";
+ return $definition;
+ }
+
+ /**
+ * Get and return the user input
+ *
+ * @return string
+ * @author Phil Burton <phil@pgburton.com>
+ */
+ protected function readStdin(): string
+ {
+ $input = fgets(STDIN);
+
+ if ($input === false) {
+ echo "No input supplied!\n";
+ exit(1);
+ }
+
+ $input = rtrim($input, "\n");
+
+ return $input;
+ }
+
+ /**
+ * Tidy and return the hmtl
+ *
+ * @param string $input
+ * @return string
+ * @author Phil Burton <phil@pgburton.com>
+ */
+ protected function cleanUpHtml(string $input)
+ {
+ // Strip HTML Tags
+ $clear = strip_tags($input);
+ // Clean up things like &amp;
+ $clear = html_entity_decode($clear);
+ // Strip out any url-encoded stuff
+ $clear = urldecode($clear);
+ // Replace Multiple spaces with single space
+ $clear = preg_replace('/ +/', ' ', $clear);
+ // Trim the string of leading/trailing space
+ $clear = trim($clear);
+ // Capitalise the first char.
+ $clear = ucfirst($clear);
+
+ return $clear;
+ }
+
+ protected function handleTruncation($input, $definition, $url)
+ {
+ $truncated = $definition;
+
+ $MAX_CHARACTERS = 350;
+ if (strlen($truncated) >= $MAX_CHARACTERS) {
+ // Create that povjee link.
+ // Capitalise the first char of the input.
+ $input = ucfirst($input);
+ $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]";
+ $safeDef = escapeshellarg($defAndUrl);
+
+ $pasteBinCmd = "echo $safeDef | " . $this->pasteCmd;
+ $pasteBinLink = exec($pasteBinCmd);
+ $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]";
+ }
+
+ return $truncated;
+ }
+}