commit 17dfb22d37badae2fcd9fb5425b79eee00a1ffeb Author: Alex 'AdUser' Z Date: Wed May 27 16:06:31 2015 +1000 + initial diff --git a/twatch-lite b/twatch-lite new file mode 100755 index 0000000..406baf0 --- /dev/null +++ b/twatch-lite @@ -0,0 +1,222 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use feature qw(say); +use utf8; + +use Getopt::Long; +use YAML::Any qw(LoadFile); +use LWP::UserAgent; +use XML::Feed; + +##### variables +our $VERSION = '0.01'; +my %opts = ( + config => "$ENV{HOME}/.config/twatch-lite/config", + state => "$ENV{HOME}/.config/twatch-lite/state", + verbose => 0, +); + +##### functions +sub usage { + print <] + -c, --config=FILE Path to config file, mandatory + default: $opts{config} + -s, --state=FILE Path to file with feed positions, optional + default: $opts{state} + -v, --verbose Increase verbosity +USAGE + exit shift // 1; +} + +sub _log { + my ($level, $message) = @_; + return if ($level eq 'debug' and not $opts{verbose}); + print STDERR "[$level] ", $message, "\n"; + exit(1) if $level eq 'fatal'; +} + +sub load_config { + my ($path) = @_; + + _log(debug => "Loading config"); + + _log(fatal => "Can't find config: no such file") + unless -f $path; + + my $config; + eval { $config = LoadFile($path); 1 } + or do { _log(fatal => "Can't load config: $@"); }; + + _log(debug => "Sanity checks for config file"); + _log(fatal => "Missing section `defaults` in config file") + unless (exists $config->{defaults} and ref $config->{defaults} eq 'HASH'); + _log(fatal => "Missing section `feeds` in config file") + unless (exists $config->{feeds} and ref $config->{feeds} eq 'ARRAY'); + _log(warn => "'outdir' not set in config, using /tmp"), $config->{outdir} = '/tmp' + unless ($config->{outdir}); + + my $i = 0; + foreach my $feed (@{ $config->{feeds} }) { + $i++; + _log(info => "Processing feed #$i: $feed->{url}"); + _log(fatal => "Feed is not a hashref") + unless (ref $feed eq 'HASH'); + _log(fatal => "Missing 'url' in feed") + unless $feed->{url} and $feed->{url} =~ m{https?://}oi; + _log(fatal => "Missing 'matches' section in feed") + unless $feed->{matches} and ref $feed->{matches} eq 'ARRAY'; + _log(fatal => "Empty 'matches' section in feed") + unless (scalar @{ $feed->{matches} }); + + foreach my $key (qw(type use lookup)) { + $feed->{$key} //= $config->{defaults}->{$key}; + } + # TODO: more + ($feed->{hostname}) = ($feed->{url} =~ m{://([a-z0-9\.-]+)}oi); + } + + return $config; +} + +sub load_state { + my ($path) = @_; + my $state = {}; + + _log(debug => "Loading file with feed positions"); + + if (-f $path) { + open my $FH, '<', $path; + while (1) { + last if eof($FH); + my $line = <$FH>; + chomp $line; + next unless ($line =~ m{^([a-z0-9\.-]+),(\d+)}oi); + $state->{$1} = $2; # hostname => lastseen + } + close $FH; + } else { + _log(warn => "Missing state file, will create one at exit"); + } + + return $state; +} + +sub process_feed { + my ($ua, $feedconf) = @_; + my $urls = {}; + + my $resp = $ua->get($feedconf->{url}); + unless ($resp->is_success) { + _log(error => "Can't fetch feed"); + return $urls; + } + + my $feed = XML::Feed->parse(\$resp->decoded_content); + unless ($feed) { + _log(error => "Can't parse feed: " . XML::Feed->errstr); + return $urls; + } + + my @entries = reverse $feed->entries; + for my $entry (@entries) { + my $time = (ref $entry->issued) + ? $entry->issued->epoch + : $entry->modified->epoch; + if ($feedconf->{lastseen} >= $time) { + _log(debug => "Already seen before: $time"); + next; + } + my $text = ($feedconf->{lookup} eq 'title') + ? $entry->title + : $entry->content->body; + _log(debug => "Match text is: ". $text); + foreach my $match (@{ $feedconf->{matches} }) { + if ($match =~ m{^/(.*)/$}) { + _log(debug => "Trying regex '$1'"); + next unless $text =~ qr/$1/; + } else { + _log(debug => "Trying substr '$match'"); + next unless index($text, $match) >= 0; + } + # gotcha! + my $msg = sprintf "Got it! URL: %s (%s)", $entry->link, $entry->title; + _log(info => $msg); + $urls->{$entry->link} = $entry->title; + } + $feedconf->{lastseen} = $time; + } + + return $urls; +} + +sub fetch_torrents { + my ($ua, $outdir, $urls) = @_; + + while (my ($url, $title) = each(%{ $urls })) { + my $resp = $ua->get($url); + unless ($resp->is_success) { + _log(error => "Can't fetch torrent file: $url"); + next; + } + my $file = sprintf "%s/%s", $outdir, $resp->filename; + open my $T, '>', $file; + unless ($T) { + _log(error => "Can't save torrent to $file"); + next; + } + print $T $resp->decoded_content; + close $T; + } + + return 1; +} + +sub save_state { + my ($path, $state) = @_; + + open my $FH, '>', "$path.new"; + unless ($FH) { + _log(fatal => "Can't open state file for write"); + return; + } + while (my ($host, $time) = each(%{ $state })) { + printf $FH "%s,%s\n", $host, $time; + } + close $FH; + rename("$path.new", $path) + or _log(error => "Can't update state file: $!"); + + return 1; +} + +##### logic +GetOptions( + "config=s" => \$opts{config}, + "state=s" => \$opts{state}, + "verbose" => \$opts{verbose}, +) or usage(1); + +my $config = load_config($opts{config}); +my $state = load_state ($opts{state}); + +my $ua = LWP::UserAgent->new; +$ua->agent("twatch-lite/$VERSION"); +$ua->show_progress(1) if $opts{verbose}; +$ua->env_proxy; +# TODO: save cookies? + +foreach my $feed (@{ $config->{feeds} }) { + my $host = $feed->{hostname}; + $feed->{lastseen} = $state->{$host} // 0; + my $urls = process_feed($ua, $feed); + fetch_torrents($ua, $config->{outdir}, $urls) + if scalar keys $urls; + $state->{$host} = $feed->{lastseen}; +} +save_state($opts{state}, $state); + +# cleanup +exit 0;