From d00f19145627312125c593f35193f04733b4df4e Mon Sep 17 00:00:00 2001 From: Phil Burton Date: Tue, 11 Jun 2019 14:17:29 +0100 Subject: First commit --- .gitignore | 1 + composer.json | 17 ++++++++ composer.lock | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++ etym.php | 85 ++++++++++++++++++++++++++++++++++++++ scripts/run.php | 14 +++++++ src/Etym.php | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 356 insertions(+) create mode 100644 .gitignore create mode 100644 composer.json create mode 100644 composer.lock create mode 100644 etym.php create mode 100644 scripts/run.php create mode 100644 src/Etym.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57872d0 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/vendor/ diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..9b75c19 --- /dev/null +++ b/composer.json @@ -0,0 +1,17 @@ +{ + "name": "blatech/blaetym", + "description": "Find etymology of a word", + "type": "library", + "require": { + "paquettg/php-html-parser": "^2.0" + }, + "autoload": { + "psr-4": {"App\\": "src/"} + }, + "authors": [ + { + "name": "Phil Burton", + "email": "phil@pgburton.com" + } + ] +} diff --git a/composer.lock b/composer.lock new file mode 100644 index 0000000..b4ffb1e --- /dev/null +++ b/composer.lock @@ -0,0 +1,114 @@ +{ + "_readme": [ + "This file locks the dependencies of your project to a known state", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", + "This file is @generated automatically" + ], + "content-hash": "b6dc99abca93405035c63822eb275afd", + "packages": [ + { + "name": "paquettg/php-html-parser", + "version": "2.0.2", + "source": { + "type": "git", + "url": "https://github.com/paquettg/php-html-parser.git", + "reference": "77e4a44b0916690b4300fe9abf98fd05bbba48f0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/paquettg/php-html-parser/zipball/77e4a44b0916690b4300fe9abf98fd05bbba48f0", + "reference": "77e4a44b0916690b4300fe9abf98fd05bbba48f0", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "paquettg/string-encode": "~1.0.0", + "php": ">=7.1" + }, + "require-dev": { + "mockery/mockery": "^1.2", + "php-coveralls/php-coveralls": "^2.1", + "phpunit/phpunit": "^7.5.1" + }, + "type": "library", + "autoload": { + "psr-0": { + "PHPHtmlParser": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Gilles Paquette", + "email": "paquettg@gmail.com", + "homepage": "http://gillespaquette.ca" + } + ], + "description": "An HTML DOM parser. It allows you to manipulate HTML. Find tags on an HTML page with selectors just like jQuery.", + "homepage": "https://github.com/paquettg/php-html-parser", + "keywords": [ + "dom", + "html", + "parser" + ], + "time": "2019-02-10T01:35:49+00:00" + }, + { + "name": "paquettg/string-encode", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/paquettg/string-encoder.git", + "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/paquettg/string-encoder/zipball/a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee", + "reference": "a8708e9fac9d5ddfc8fc2aac6004e2cd05d80fee", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "require-dev": { + "phpunit/phpunit": "^7.5.1" + }, + "type": "library", + "autoload": { + "psr-0": { + "stringEncode": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Gilles Paquette", + "email": "paquettg@gmail.com", + "homepage": "http://gillespaquette.ca" + } + ], + "description": "Facilitating the process of altering string encoding in PHP.", + "homepage": "https://github.com/paquettg/string-encoder", + "keywords": [ + "charset", + "encoding", + "string" + ], + "time": "2018-12-21T02:25:09+00:00" + } + ], + "packages-dev": [], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": [], + "platform-dev": [] +} diff --git a/etym.php b/etym.php new file mode 100644 index 0000000..08059fd --- /dev/null +++ b/etym.php @@ -0,0 +1,85 @@ +/dev/null"; + +function readStdin() +{ + $input = fgets(STDIN); + + if ($input === false) { + echo "No input supplied!\n"; + exit(1); + } + + $input = rtrim($input, "\n"); + + return $input; +} + +function cleanUpHtml($input) +{ + // Strip HTML Tags + $clear = strip_tags($input); + // Clean up things like & + $clear = html_entity_decode($clear); + // Strip out any url-encoded stuff + $clear = urldecode($clear); + // Replace Multiple spaces with single space + $clear = preg_replace('/ +/', ' ', $clear); + // Trim the string of leading/trailing space + $clear = trim($clear); + // Capitalise the first char. + $clear = ucfirst($clear); + + return $clear; +} + +function handleTruncation($input, $definition, $url) +{ + $truncated = $definition; + + $MAX_CHARACTERS = 350; + if (strlen($truncated) >= $MAX_CHARACTERS) { + // Create that povjee link. + // Capitalise the first char of the input. + $input = ucfirst($input); + $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]"; + $safeDef = escapeshellarg($defAndUrl); + + $pasteBinCmd = "echo $safeDef | " . $pasteCmd; + $pasteBinLink = exec($pasteBinCmd); + + $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]"; + } + + return $truncated; +} + +function getDefinition() +{ + + $input = readStdin(); + + $targetURL = $baseURL . $input; + + $dom = new Dom; + $dom->load($targetURL); + $html = $dom->find($htmlNode)[0]->innerHtml(); + $node = $html->find($htmlNode, 0); + + if ($node === null) { + echo "No entry found for '$input'!\n"; + exit(2); + } + + $definition = cleanUpHtml($node->innertext); + $definition = handleTruncation($input, $definition, $targetURL); + + echo $definition . "\n"; +} + +getDefinition(); diff --git a/scripts/run.php b/scripts/run.php new file mode 100644 index 0000000..b50bcc7 --- /dev/null +++ b/scripts/run.php @@ -0,0 +1,14 @@ +/dev/null"; + +$etym = new Etym($baseURL, $htmlNode, $pasteCmd); +$result = $etym->getDefinition(); + +echo $result . PHP_EOL; diff --git a/src/Etym.php b/src/Etym.php new file mode 100644 index 0000000..b796a3a --- /dev/null +++ b/src/Etym.php @@ -0,0 +1,125 @@ + + */ +class Etym +{ + protected $baseURL; + protected $domSearch; + protected $pasteCmd; + + /** + * Initalise with config options + * + * @param string $baseURL + * @param string $domSearch + * @param string $pasteCmd + * @author Phil Burton + */ + public function __construct(string $baseURL, string $domSearch, string $pasteCmd) + { + $this->baseURL = $baseURL; + $this->domSearch = $domSearch; + $this->pasteCmd = $pasteCmd; + } + + /** + * Search for and return etym defeiniton + * + * @return string + * @author Phil Burton + */ + public function getDefinition(): string + { + $input = $this->readStdin(); + + $targetURL = $this->baseURL . $input; + + $dom = new Dom; + $dom->load($targetURL); + $html = $dom->find($this->domSearch); + + if (!$html instanceof \PHPHtmlParser\Dom\Collection || count($html) <= 0) { + echo "No entry found for '$input'!\n"; + exit(2); + } + + $definition = $this->cleanUpHtml($html[0]->innerHtml()); + $definition = $this->handleTruncation($input, $definition, $targetURL); + + // echo $definition . "\n"; + return $definition; + } + + /** + * Get and return the user input + * + * @return string + * @author Phil Burton + */ + protected function readStdin(): string + { + $input = fgets(STDIN); + + if ($input === false) { + echo "No input supplied!\n"; + exit(1); + } + + $input = rtrim($input, "\n"); + + return $input; + } + + /** + * Tidy and return the hmtl + * + * @param string $input + * @return string + * @author Phil Burton + */ + protected function cleanUpHtml(string $input) + { + // Strip HTML Tags + $clear = strip_tags($input); + // Clean up things like & + $clear = html_entity_decode($clear); + // Strip out any url-encoded stuff + $clear = urldecode($clear); + // Replace Multiple spaces with single space + $clear = preg_replace('/ +/', ' ', $clear); + // Trim the string of leading/trailing space + $clear = trim($clear); + // Capitalise the first char. + $clear = ucfirst($clear); + + return $clear; + } + + protected function handleTruncation($input, $definition, $url) + { + $truncated = $definition; + + $MAX_CHARACTERS = 350; + if (strlen($truncated) >= $MAX_CHARACTERS) { + // Create that povjee link. + // Capitalise the first char of the input. + $input = ucfirst($input); + $defAndUrl = "\"$input\"" . "\n\n" . $definition . "\n\n" . "[Original at: $url]"; + $safeDef = escapeshellarg($defAndUrl); + + $pasteBinCmd = "echo $safeDef | " . $this->pasteCmd; + $pasteBinLink = exec($pasteBinCmd); + $truncated = substr($truncated, 0, $MAX_CHARACTERS) . "... [More info at $pasteBinLink]"; + } + + return $truncated; + } +} -- cgit v1.2.3