From 9cd0234665ea66dff172d94b9c1b4cb61b1d25b1 Mon Sep 17 00:00:00 2001 From: Fbenas Date: Sun, 21 Jun 2020 17:58:23 +0100 Subject: Improve robustness of all scripts and add more to sync command --- app/Console/Commands/ScrapeFile.php | 31 +------- app/Console/Commands/ScrapeFiles.php | 33 +-------- app/Console/Commands/ScrapeUrl.php | 40 +---------- app/Console/Commands/ScrapeUrlFile.php | 48 +++---------- app/Console/Commands/ScrapeYoutube.php | 47 +++--------- app/Console/Commands/SyncVideos.php | 74 ++++++++----------- app/Console/Commands/TestTor.php | 23 ------ app/Rugby/Concerns/Matchable.php | 71 ++++++++++++++++-- app/Rugby/Factory/DataAdapter.php | 1 - app/Rugby/Factory/Service.php | 21 ++++++ app/Rugby/Factory/SixnationsrugbyAdapter.php | 1 - app/Rugby/Model/Match.php | 9 ++- app/Rugby/Model/Tournament.php | 5 +- app/Rugby/Model/Video.php | 5 +- app/Youtube/Service.php | 83 +++++++++++++++++----- .../2020_06_20_214459_add_url_to_videos.php | 36 ++++++++++ resources/views/index.blade.php | 4 +- 17 files changed, 254 insertions(+), 278 deletions(-) create mode 100644 database/migrations/2020_06_20_214459_add_url_to_videos.php diff --git a/app/Console/Commands/ScrapeFile.php b/app/Console/Commands/ScrapeFile.php index 45beda9..564cf00 100644 --- a/app/Console/Commands/ScrapeFile.php +++ b/app/Console/Commands/ScrapeFile.php @@ -9,35 +9,9 @@ use Illuminate\Support\Facades\Storage; class ScrapeFile extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'scrape:file { filename }'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Scrape a file for data'; - /** - * Create a new command instance. - * - * @return void - */ - public function __construct() - { - parent::__construct(); - } - - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $filename = $this->argument('filename'); @@ -48,12 +22,11 @@ class ScrapeFile extends Command } $raw_data = Storage::disk('local')->get($filename); + $tournament_name = 'Six Nations ' . explode('-', explode('.txt', $filename)[0])[1]; - $service = new Service(new SixnationsrugbyAdapter($raw_data, 'Six Nations ' . explode('-', explode('.txt', $filename)[0])[1])); - + $service = new Service(new SixnationsrugbyAdapter($raw_data, $tournament_name)); $service->save(); return Command::SUCCESS; } - } diff --git a/app/Console/Commands/ScrapeFiles.php b/app/Console/Commands/ScrapeFiles.php index 05b7176..e80e9e2 100644 --- a/app/Console/Commands/ScrapeFiles.php +++ b/app/Console/Commands/ScrapeFiles.php @@ -9,35 +9,9 @@ use Illuminate\Support\Facades\Storage; class ScrapeFiles extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'scrape:files { directory }'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Scrape a file for data'; - /** - * Create a new command instance. - * - * @return void - */ - public function __construct() - { - parent::__construct(); - } - - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $directory = $this->argument('directory'); @@ -48,17 +22,16 @@ class ScrapeFiles extends Command } $files = Storage::disk('local')->files($directory); - - foreach ($files as $file) { - $tournament = 'Six Nations ' . explode('-', explode('.txt', $file)[0])[1]; + $this->info('Starting file: ' . $file); $raw_data = Storage::disk('local')->get($file); + $tournament = 'Six Nations ' . explode('-', explode('.txt', $file)[0])[1]; + $service = new Service(new SixnationsrugbyAdapter($raw_data, $tournament)); $service->save(); } return Command::SUCCESS; } - } diff --git a/app/Console/Commands/ScrapeUrl.php b/app/Console/Commands/ScrapeUrl.php index c02080e..06091bf 100644 --- a/app/Console/Commands/ScrapeUrl.php +++ b/app/Console/Commands/ScrapeUrl.php @@ -7,55 +7,19 @@ use Goutte\Client; class ScrapeUrl extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'scrape:url { url }'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Scrape a webpage for data'; - protected $client; - - /** - * Create a new command instance. - * - * @return void - */ - public function __construct() - { - parent::__construct(); - $this->client = new Client(); - } - - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $url = $this->argument('url'); - if ($url != 'https://www.sixnationsrugby.com/fixtures/') { - $this->error('Url not supported'); - return; - } - - $crawler = $this->client->request('GET', $this->argument('url')); + $crawler = (new Client())->request('GET', $this->argument('url')); - $crawler->filter('div.fixtures__top-tier')->each( + $crawler->filter('title')->each( function ($node) { print $node->text()."\n"; } ); - - } } diff --git a/app/Console/Commands/ScrapeUrlFile.php b/app/Console/Commands/ScrapeUrlFile.php index f4f114b..e9472d2 100644 --- a/app/Console/Commands/ScrapeUrlFile.php +++ b/app/Console/Commands/ScrapeUrlFile.php @@ -9,35 +9,9 @@ use Illuminate\Support\Facades\Storage; class ScrapeUrlFile extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'scrape:urls { filename } { format }'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Scrape a youtube for videos'; - /** - * Create a new command instance. - * - * @return void - */ - public function __construct() - { - parent::__construct(); - } - - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $filename = $this->argument('filename'); @@ -48,23 +22,21 @@ class ScrapeUrlFile extends Command foreach ($urls as $url) { $service = new Service($url, $this->output); - if ($format == 'video') { - $service->downloadVideo('video'); - } elseif ($format == 'audio') { - $service->downloadAudio('audio'); + if ($service->modelExists($url)) { + $this->info("Skipping $url. Exists in db!"); + continue; } - $video_model = Video::create( - [ - 'path' => $service->getFullPath() - ] - ); + if ($service->createModel($format)) { + $this->info("Download {$service->getTitle()} from $url!"); + } else { + $this->info('Skipping ' . $url . '. ' . ucfirst($format) . ' file already exists!'); + continue; + } - $this->info('Download of ' . $service->getTitle() . ' complete!'); + $this->info("Download {$service->getTitle()} from $url!"); } - - return Command::SUCCESS; } diff --git a/app/Console/Commands/ScrapeYoutube.php b/app/Console/Commands/ScrapeYoutube.php index de620d4..b848ecb 100644 --- a/app/Console/Commands/ScrapeYoutube.php +++ b/app/Console/Commands/ScrapeYoutube.php @@ -2,41 +2,15 @@ namespace App\Console\Commands; -use App\Rugby\Model\Video; + use App\Youtube\Service; use Illuminate\Console\Command; class ScrapeYoutube extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'scrape:youtube { url } { format }'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Scrape a youtube for videos'; - /** - * Create a new command instance. - * - * @return void - */ - public function __construct() - { - parent::__construct(); - } - - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $url = $this->argument('url'); @@ -44,19 +18,16 @@ class ScrapeYoutube extends Command $service = new Service($url, $this->output); - if ($format == 'video') { - $service->downloadVideo('video'); - } elseif ($format == 'audio') { - $service->downloadAudio('audio'); + if ($service->modelExists($url)) { + $this->info('Video already exists in db'); + return Command::FAILURE; } - $video_model = Video::create( - [ - 'path' => $service->getFullPath() - ] - ); - - $this->info('Download of ' . $service->getTitle() . ' complete!'); + if ($service->createModel($format)) { + $this->info('Download of ' . $service->getTitle() . ' complete!'); + } else { + $this->info(ucfirst($format) . ' file already exists'); + } return Command::SUCCESS; } diff --git a/app/Console/Commands/SyncVideos.php b/app/Console/Commands/SyncVideos.php index d29436d..4f9f800 100644 --- a/app/Console/Commands/SyncVideos.php +++ b/app/Console/Commands/SyncVideos.php @@ -9,48 +9,32 @@ use Illuminate\Support\Facades\Storage; class SyncVideos extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'sync:videos'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Sync downloaded videos with existing matches'; - /** - * Create a new command instance. - * - * @return void - */ - public function __construct() - { - parent::__construct(); - } - - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $videos = Model\Video::whereNull('match_id')->get(); + $this->info('Found ' . $videos->count() . ' videos!'); foreach ($videos as $video) { $filename = $video->getFilename(); + $this->info('Searching for match from filename: ' . $filename); + $tournaments = Model\Tournament::all()->filter( function ($object) use ($filename) { return $object->isMatch($filename); } ); + if (!$tournaments->count()) { + $this->info('Could not find tournament from filename: ' . $filename); + return; + } + + $this->info('Found Tournmament:' . $tournaments->first()->name); + $tournament_ids = $tournaments->pluck('id'); $teams = Model\Team::all()->filter( @@ -59,38 +43,40 @@ class SyncVideos extends Command } ); + if (!$teams->count()) { + $this->info('Could not find teams from filename: ' . $filename); + continue; + } + + if ($teams->count() == 1) { + $this->info('Could only find 1 team (' . $teams->first()->name . ') from filename: ' . $filename); + continue; + } + + $this->info('Found Teams:' . $teams->first()->name, $teams->slice(1, 1)->first()->name); + $team_ids = $teams->pluck('name')->toArray(); $matches = $tournaments->first()->matches; foreach ($matches as $match) { - if (in_array($match->homeTeam()->first()->name, $team_ids)) { - if (in_array($match->awayTeam()->first()->name, $team_ids)) { + if (in_array($match->getHomeTeam()->name, $team_ids)) { + if (in_array($match->getAwayTeam()->name, $team_ids)) { + + $this->info('Found Match for filename: ' . $filename); $match->videos()->save($video); Storage::disk('local')->move( 'youtube/video/' . $video->getFilename(), 'public/matches/' . $video->getFilename() ); + + continue 2; } } } - } - // $service = new Service($url, $this->output); - // - // if ($format == 'video') { - // $service->downloadVideo('video'); - // } elseif ($format == 'audio') { - // $service->downloadAudio('audio'); - // } - // - // $video_model = Video::create( - // [ - // 'path' => $service->getFullPath() - // ] - // ); - // - // $this->info('Download of ' . $service->getTitle() . ' complete!'); + $this->info('Failed to find Match for filename: ' . $filename); + } return Command::SUCCESS; } diff --git a/app/Console/Commands/TestTor.php b/app/Console/Commands/TestTor.php index 87f0050..5c4c296 100644 --- a/app/Console/Commands/TestTor.php +++ b/app/Console/Commands/TestTor.php @@ -7,43 +7,20 @@ use Illuminate\Support\Facades\Http; class TestTor extends Command { - /** - * The name and signature of the console command. - * - * @var string - */ protected $signature = 'test:tor { onion }'; - - /** - * The console command description. - * - * @var string - */ protected $description = 'Testing tor'; - /** - * Create a new command instance. - * - * @return void - */ public function __construct() { parent::__construct(); } - /** - * Execute the console command. - * - * @return mixed - */ public function handle() { $url = $this->argument('onion'); $response = Http::get($url); - dd($response->body()); - // $url = 'http://jhiwjjlqpyawmpjx.onion/'; // Note the addition of a semicolon. $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); diff --git a/app/Rugby/Concerns/Matchable.php b/app/Rugby/Concerns/Matchable.php index b0c512c..2d2f02b 100644 --- a/app/Rugby/Concerns/Matchable.php +++ b/app/Rugby/Concerns/Matchable.php @@ -6,7 +6,7 @@ trait Matchable { public function matchableFilters() { - + dd("You must implement matchableFilters!"); } public function isMatch(string $search): bool @@ -28,7 +28,7 @@ trait Matchable return true; } - public function matchString(string $needle, string $type, string $hay): bool + protected function matchString(string $needle, string $type, string $hay): bool { if ($type == 'date') { if (!$this->isDate($needle)) { @@ -40,12 +40,22 @@ trait Matchable }; return $this->getYear($hay) == $this->getYear($needle); + } elseif ($type == 'number') { + if (!$this->isNumeric($needle)) { + dd('Matchable `' . $hay . '` is not numeric'); + }; + + if (!$this->isNumeric($hay)) { + return false; + } + + return $this->getNumeric($hay) == $this->getNumeric($needle); } return $hay == $needle; } - public function matchArray(string $needle, array $haystack, string $type): bool + protected function matchArray(string $needle, array $haystack, string $type): bool { foreach ($haystack as $hay) { $result = $this->matchString($needle, $type, $hay); @@ -58,7 +68,7 @@ trait Matchable return false; } - public function isDate(string $value): bool + protected function isDate(string $value): bool { $patterns = [ "/\d{2}\-\d{2}\-\d{4}/", @@ -76,7 +86,58 @@ trait Matchable return false; } - public function getYear(string $value): string + protected function isNumeric(string $value): bool + { + $value = strtolower($value); + + if (is_numeric($value)) { + return true; + } + + + + if (array_key_exists($value, $this->getNumberLookup())) { + return true; + } + + return false; + } + + protected function getNumberLookup(): array + { + return [ + 'one' => 1, + 'two' => 2, + 'three' => 3, + 'four' => 4, + 'five' => 5, + 'six' => 6, + 'seven' => 7, + 'eight' => 8, + 'nine' => 9, + 'ten' => 10, + ]; + } + + protected function getNumeric(string $value): int + { + $value = strtolower($value); + + $numbers = $this->getNumberLookup(); + + if (is_numeric($value)) { + return $value; + } + + if (array_key_exists($value, $numbers)) { + return $numbers[$value]; + } + + return false; + + } + + protected function getYear(string $value): string { $patterns = [ "/\d{4}/" diff --git a/app/Rugby/Factory/DataAdapter.php b/app/Rugby/Factory/DataAdapter.php index b4971ce..d0434c9 100644 --- a/app/Rugby/Factory/DataAdapter.php +++ b/app/Rugby/Factory/DataAdapter.php @@ -4,5 +4,4 @@ namespace App\Rugby\Factory; interface DataAdapter { - } diff --git a/app/Rugby/Factory/Service.php b/app/Rugby/Factory/Service.php index fa8028f..e966ce6 100644 --- a/app/Rugby/Factory/Service.php +++ b/app/Rugby/Factory/Service.php @@ -52,6 +52,27 @@ class Service $away_team = Model\Team::Create(['name' => $data['team_away']]); } + $existing_match = Model\Match::whereHas( + 'teams', + function ($query) use ($home_team) { + $query->where('name', '=', $home_team->name); + } + )->whereHas( + 'teams', + function ($query) use ($away_team) { + $query->where('name', '=', $away_team->name); + } + )->whereHas( + 'tournaments', + function ($query) use ($tournament) { + $query->where('name', '=', $tournament->name); + } + )->get(); + + if ($existing_match->first()) { + return; + } + $match = Model\Match::create( [ 'date' => (new \Carbon\Carbon($data['match_date']))->format('Y-m-d H:i:s'), diff --git a/app/Rugby/Factory/SixnationsrugbyAdapter.php b/app/Rugby/Factory/SixnationsrugbyAdapter.php index 6b6edd7..4a36602 100644 --- a/app/Rugby/Factory/SixnationsrugbyAdapter.php +++ b/app/Rugby/Factory/SixnationsrugbyAdapter.php @@ -28,7 +28,6 @@ class SixnationsrugbyAdapter implements DataAdapter return $this->tournament_name; } - protected function cleanData($data) { // string diff --git a/app/Rugby/Model/Match.php b/app/Rugby/Model/Match.php index 58a3dd9..38d58d8 100644 --- a/app/Rugby/Model/Match.php +++ b/app/Rugby/Model/Match.php @@ -47,14 +47,14 @@ class Match extends Model return $video->getUrl(); } - public function homeTeam() + public function getHomeTeam() { - return $this->teams()->wherePivot('is_home', '=', true); + return $this->teams()->wherePivot('is_home', '=', true)->first(); } - public function awayTeam() + public function getAwayTeam() { - return $this->teams()->wherePivot('is_home', '=', false); + return $this->teams()->wherePivot('is_home', '=', false)->first(); } public function tournaments() @@ -66,5 +66,4 @@ class Match extends Model { return $this->hasMany(Video::class, 'match_id'); } - } diff --git a/app/Rugby/Model/Tournament.php b/app/Rugby/Model/Tournament.php index 3ab6352..5c81278 100644 --- a/app/Rugby/Model/Tournament.php +++ b/app/Rugby/Model/Tournament.php @@ -12,7 +12,6 @@ class Tournament extends Model use Matchable; protected $table = 'tournaments'; - protected $fillable = ['name']; public function matchableFilters() @@ -25,6 +24,10 @@ class Tournament extends Model return ['date', $value]; } + if ($this->isNumeric($value)) { + return ['number', $value]; + } + return ['string', $value]; } ); diff --git a/app/Rugby/Model/Video.php b/app/Rugby/Model/Video.php index 2182b65..48da15e 100644 --- a/app/Rugby/Model/Video.php +++ b/app/Rugby/Model/Video.php @@ -10,7 +10,7 @@ class Video extends Model { protected $table = 'videos'; protected $casts = ['date' => 'datetime:Y-m-d']; - protected $fillable = ['path']; + protected $fillable = ['path', 'url']; public function match() { @@ -27,8 +27,5 @@ class Video extends Model public function getUrl(): string { return asset('storage/matches/' . $this->getFilename()); - // return Storage::disk('local')->url( - // 'matches/' . $this->getFilename() - // ); } } diff --git a/app/Youtube/Service.php b/app/Youtube/Service.php index 862a27a..e55fae5 100644 --- a/app/Youtube/Service.php +++ b/app/Youtube/Service.php @@ -2,6 +2,8 @@ namespace App\Youtube; +use App\Rugby\Model\Video; +use Goutte\Client; use Illuminate\Support\Facades\Storage; use Illuminate\Support\Str; use Symfony\Component\Console\Helper\ProgressBar; @@ -13,6 +15,7 @@ use YoutubeDl\YoutubeDl; class Service { protected $url; + protected $path; protected $progressBar; protected $running = false; protected $video; @@ -20,6 +23,7 @@ class Service public function __construct(string $url, $output = null) { $this->url = $url; + if ($output) { $this->progressBar = new ProgressBar($output, 100); $this->output = $output; @@ -37,14 +41,68 @@ class Service public function downloadVideo(string $path) { - $this->path = $path; - $this->download($this->getVideoOptions()); + if (!$this->fileExists($this->url, 'video')) { + $this->path = $path; + $this->download($this->getVideoOptions()); + return true; + } + + return false; + } + + // public function downloadAudio(string $path) + // { + // if (!$this->fileExists($this->url, 'audio')) { + // $this->path = $path; + // + // $this->download($this->getAudioOptions()); + // return true; + // } + // + // return false; + // } + + public function modelExists(string $url): bool + { + $existing_match = Video::where('url', '=', $url)->get(); + + if ($existing_match->count()) { + return true; + } + + return false; + } + + public function createModel($url) + { + if ($this->downloadVideo('video')) { + $video_model = Video::create( + [ + 'path' => $this->getFullPath(), + 'url' => $url + ] + ); + }; + } + + public function getTitle(): string + { + return $this->video->getTitle(); } - public function downloadAudio(string $path) + protected function fileExists(string $url, string $directory): bool { - $this->path = $path; - $this->download($thjis->getAudioOptions()); + $url_parts = explode('=', $url); + + $all_files = Storage::disk('local')->files('youtube/'. $directory); + + $matching_files = preg_grep("/$url_parts[1]/i", $all_files); + + foreach ($matching_files as $path) { + return true; + } + + return false; } protected function download(array $options) @@ -74,15 +132,12 @@ class Service $this->progressBar->finish(); $this->running(false); - // $this->getWorkingPath() .'/' . $video->getFilename(), Storage::disk('local')->move( 'tmp/youtube/video/' . $video->getFilename(), 'youtube/video/' . $video->getFilename() ); $this->video = $video; - - // $video->getFile(); // \SplFileInfo instance of downloaded file } catch (NotFoundException $e) { dd($e); // Video not found @@ -98,16 +153,6 @@ class Service } } - public function getTitle(): string - { - return $this->video->getTitle(); - } - - public function getFullPath(): string - { - return $this->getStoragePath() . '/' . $this->video->getFilename(); - } - protected function formatBytes(string $bytes) { $units = [ @@ -143,7 +188,7 @@ class Service return [ 'prefer-free-formats' => true, 'no-overwrites' => true, - 'skip-download' => true + // 'skip-download' => true ]; } diff --git a/database/migrations/2020_06_20_214459_add_url_to_videos.php b/database/migrations/2020_06_20_214459_add_url_to_videos.php new file mode 100644 index 0000000..b55fb29 --- /dev/null +++ b/database/migrations/2020_06_20_214459_add_url_to_videos.php @@ -0,0 +1,36 @@ +string('url')->nullable(); + } + ); + } + + /** + * Reverse the migrations. + * + * @return void + */ + public function down() + { + Schema::table( + 'videos', function (Blueprint $table) { + // + } + ); + } +} diff --git a/resources/views/index.blade.php b/resources/views/index.blade.php index 0500e96..a322bc6 100644 --- a/resources/views/index.blade.php +++ b/resources/views/index.blade.php @@ -32,9 +32,9 @@ @foreach ($tournament->matches as $match) {{ $match->id }} - {{ $match->homeTeam()->first()->getName() }} + {{ $match->getHomeTeam()->getName() }} {{ $match->score }}
{{$match->half_score}} - {{ $match->awayTeam()->first()->getName() }} + {{ $match->getAwayTeam()->getName() }} {{ $match->getDisplayName() }} {{ $match->referee }} {{ $match->getDisplayDate() }} -- cgit v1.2.3