You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
321 lines
8.0 KiB
321 lines
8.0 KiB
#!/usr/bin/perl |
|
|
|
use strict; |
|
use warnings; |
|
use feature qw(say); |
|
use utf8; |
|
|
|
use Getopt::Long; |
|
use YAML::Any qw(LoadFile); |
|
use LWP::UserAgent; |
|
use XML::Feed; |
|
|
|
##### variables |
|
our $VERSION = '0.01'; |
|
my %opts = ( |
|
config => "$ENV{HOME}/.config/twatch-lite/config", |
|
state => "$ENV{HOME}/.config/twatch-lite/state", |
|
verbose => 0, |
|
); |
|
|
|
##### functions |
|
sub usage { |
|
print <<USAGE; |
|
Usage: twatch-lite [-v] [-c <config>] |
|
-c, --config=FILE Path to config file, mandatory |
|
default: $opts{config} |
|
-s, --state=FILE Path to file with feed positions, optional |
|
default: $opts{state} |
|
-v, --verbose Increase verbosity |
|
USAGE |
|
exit shift // 1; |
|
} |
|
|
|
sub _log { |
|
my ($level, $message) = @_; |
|
return if ($level eq 'debug' and not $opts{verbose}); |
|
print STDERR "[$level] ", $message, "\n"; |
|
exit(1) if $level eq 'fatal'; |
|
} |
|
|
|
sub load_config { |
|
my ($path) = @_; |
|
|
|
_log(debug => "Loading config"); |
|
|
|
_log(fatal => "Can't find config: no such file") |
|
unless -f $path; |
|
|
|
my $config; |
|
eval { $config = LoadFile($path); 1 } |
|
or do { _log(fatal => "Can't load config: $@"); }; |
|
|
|
_log(debug => "Sanity checks for config file"); |
|
_log(fatal => "Missing section `defaults` in config file") |
|
unless (exists $config->{defaults} and ref $config->{defaults} eq 'HASH'); |
|
_log(fatal => "Missing section `feeds` in config file") |
|
unless (exists $config->{feeds} and ref $config->{feeds} eq 'ARRAY'); |
|
_log(warn => "'outdir' not set in config, using /tmp"), $config->{outdir} = '/tmp' |
|
unless ($config->{outdir}); |
|
|
|
my $i = 0; |
|
foreach my $feed (@{ $config->{feeds} }) { |
|
$i++; |
|
_log(info => "Processing feed #$i: $feed->{url}"); |
|
_log(fatal => "Feed is not a hashref") |
|
unless (ref $feed eq 'HASH'); |
|
_log(fatal => "Missing 'url' in feed") |
|
unless $feed->{url} and $feed->{url} =~ m{https?://}oi; |
|
_log(fatal => "Missing 'matches' section in feed") |
|
unless $feed->{matches} and ref $feed->{matches} eq 'ARRAY'; |
|
_log(fatal => "Empty 'matches' section in feed") |
|
unless (scalar @{ $feed->{matches} }); |
|
|
|
foreach my $key (qw(use lookup)) { |
|
$feed->{$key} //= $config->{defaults}->{$key}; |
|
} |
|
# TODO: more |
|
($feed->{hostname}) = ($feed->{url} =~ m{://([a-z0-9\.-]+)}oi); |
|
} |
|
|
|
return $config; |
|
} |
|
|
|
sub load_state { |
|
my ($path) = @_; |
|
my $state = {}; |
|
|
|
_log(debug => "Loading file with feed positions"); |
|
|
|
if (-f $path) { |
|
open my $FH, '<', $path; |
|
while (1) { |
|
last if eof($FH); |
|
my $line = <$FH>; |
|
chomp $line; |
|
next unless ($line =~ m{^([a-z0-9\.-]+),(\d+)}oi); |
|
$state->{$1} = $2; # hostname => lastseen |
|
} |
|
close $FH; |
|
} else { |
|
_log(warn => "Missing state file, will create one at exit"); |
|
} |
|
|
|
return $state; |
|
} |
|
|
|
sub process_feed { |
|
my ($ua, $feedconf) = @_; |
|
my $urls = {}; |
|
|
|
my $resp = $ua->get($feedconf->{url}); |
|
unless ($resp->is_success) { |
|
_log(error => "Can't fetch feed"); |
|
return $urls; |
|
} |
|
|
|
my $feed = XML::Feed->parse(\$resp->decoded_content); |
|
unless ($feed) { |
|
_log(error => "Can't parse feed: " . XML::Feed->errstr); |
|
return $urls; |
|
} |
|
|
|
my @entries = reverse $feed->entries; |
|
for my $entry (@entries) { |
|
my $time = (ref $entry->issued) |
|
? $entry->issued->epoch |
|
: $entry->modified->epoch; |
|
if ($feedconf->{lastseen} >= $time) { |
|
_log(debug => "Already seen before: $time"); |
|
next; |
|
} |
|
my $text = ($feedconf->{lookup} eq 'title') |
|
? $entry->title |
|
: $entry->content->body; |
|
_log(debug => "Match text is: ". $text); |
|
foreach my $match (@{ $feedconf->{matches} }) { |
|
if ($match =~ m{^/(.*)/$}) { |
|
_log(debug => "Trying regex '$1'"); |
|
next unless $text =~ qr/$1/; |
|
} else { |
|
_log(debug => "Trying substr '$match'"); |
|
next unless index($text, $match) >= 0; |
|
} |
|
# gotcha! |
|
my $msg = sprintf "Got it! URL: %s (%s)", $entry->link, $entry->title; |
|
_log(info => $msg); |
|
# TODO: handle 'use' feed option |
|
$urls->{$entry->link} = $entry->title; |
|
} |
|
$feedconf->{lastseen} = $time; |
|
} |
|
|
|
return $urls; |
|
} |
|
|
|
sub fetch_torrents { |
|
my ($ua, $outdir, $urls) = @_; |
|
|
|
while (my ($url, $title) = each(%{ $urls })) { |
|
my $resp = $ua->get($url); |
|
unless ($resp->is_success) { |
|
_log(error => "Can't fetch torrent file: $url"); |
|
next; |
|
} |
|
my $file = sprintf "%s/%s", $outdir, $resp->filename; |
|
open my $T, '>', $file; |
|
unless ($T) { |
|
_log(error => "Can't save torrent to $file"); |
|
next; |
|
} |
|
print $T $resp->decoded_content; |
|
close $T; |
|
} |
|
|
|
return 1; |
|
} |
|
|
|
sub save_state { |
|
my ($path, $state) = @_; |
|
|
|
open my $FH, '>', "$path.new"; |
|
unless ($FH) { |
|
_log(fatal => "Can't open state file for write"); |
|
return; |
|
} |
|
while (my ($host, $time) = each(%{ $state })) { |
|
printf $FH "%s,%s\n", $host, $time; |
|
} |
|
close $FH; |
|
rename("$path.new", $path) |
|
or _log(error => "Can't update state file: $!"); |
|
|
|
return 1; |
|
} |
|
|
|
##### logic |
|
GetOptions( |
|
"config=s" => \$opts{config}, |
|
"state=s" => \$opts{state}, |
|
"verbose" => \$opts{verbose}, |
|
) or usage(1); |
|
|
|
my $config = load_config($opts{config}); |
|
my $state = load_state ($opts{state}); |
|
|
|
my $ua = LWP::UserAgent->new; |
|
$ua->agent("twatch-lite/$VERSION"); |
|
$ua->show_progress(1) if $opts{verbose}; |
|
if ($config->{proxy}) { |
|
$ua->proxy(['http', 'https'], $config->{proxy}); |
|
} else { |
|
$ua->env_proxy; |
|
} |
|
if ($config->{cookies}) { |
|
$ua->cookie_jar({ file => "$ENV{HOME}/.config/twatch-lite/cookies.txt" }); |
|
} |
|
|
|
foreach my $feed (@{ $config->{feeds} }) { |
|
my $host = $feed->{hostname}; |
|
$feed->{lastseen} = $state->{$host} // 0; |
|
my $urls = process_feed($ua, $feed); |
|
fetch_torrents($ua, $config->{outdir}, $urls) |
|
if scalar keys $urls; |
|
$state->{$host} = $feed->{lastseen}; |
|
} |
|
save_state($opts{state}, $state); |
|
|
|
exit 0; |
|
|
|
__END__ |
|
|
|
=pod |
|
|
|
=head1 NAME |
|
|
|
twatch-lite -- periodically polls RSS/ATOM feeds and cherry-picks wanted torrent files |
|
|
|
=head1 SYNOPSIS |
|
|
|
man twatch-lite |
|
mkdir -p ~/.config/twatch-lite |
|
vim ~/.config/twatch-lite/config |
|
twatch-lite --verbose # 1st time, debug |
|
|
|
=head1 DESCRIPTION |
|
|
|
This is very simple and specialized alternative to full-featured software called L<Twatch>. It works only with RSS/ATOM feeds. |
|
|
|
=head1 Config file |
|
|
|
File format is simple: YAML. Real config example: |
|
|
|
--- |
|
outdir: /tmp |
|
proxy: http://192.168.1.1:3128 |
|
defaults: |
|
use: link |
|
lookup: title |
|
feeds: |
|
- url: http://torrent-tracker.org/rss.php |
|
matches: |
|
- 'match substring' |
|
- '/match regex/' |
|
|
|
Now let looks closer: we have two mandatory sections, 'defaults', 'feeds' and a lot of global parameters |
|
|
|
=head2 Global parameters |
|
|
|
=head3 C<outdir> |
|
|
|
Where to save downloaded torrent files. You may want configure your torrent-client to pickup files from this dir. If unset, you'll get warning, and '/tmp' will be used. |
|
|
|
=head3 C<proxy> |
|
|
|
If set - will use http proxy, when fetching rss and torrent-files. Otherwise will connect directly (default). |
|
|
|
=head3 C<cookies> |
|
|
|
If set to any non-empty value, will accept and send back cookies in http-requests. Default - unset. |
|
|
|
=head2 Section C<defaults> |
|
|
|
This is default values for each configured feed. |
|
|
|
=head3 C<use> |
|
|
|
Use this field in RSS/ATOM entry as link to torrent file. For now, only acceptable value is 'link'. |
|
|
|
=head3 C<lookup> |
|
|
|
Lookup this field in RSS/ATOM entry for wanted keywords. Acceptable values: 'body', 'title' (default). |
|
|
|
=head2 Section C<feeds> |
|
|
|
Configured feeds. |
|
|
|
=head3 C<url> |
|
|
|
URL with RSS/ATOM feed. This option must be set in each feed. |
|
|
|
=head3 C<matches> |
|
|
|
List of wanted 'keywords'. If keyword surrounded with slashes, like C</this/>, it will be handled as regex, and as substring otherwise. Regex syntax is PCRE and described in L<perlre(3)>. |
|
|
|
If feed entry matches ANY keyword, torrent file will be downloaded and stored to B<outdir>. |
|
|
|
Matches list must contain at least one keyword. If you want all torrents from this feed, use '/.*/' regex keyword. |
|
|
|
=head3 C<lookup>, C<use> |
|
|
|
Normally this options uses values from C<defaults> section, but you may override them for each feed you want. |
|
|
|
=head1 SEE ALSO |
|
|
|
L<Twatch> |
|
|
|
=head1 AUTHORS |
|
|
|
Alex 'AdUser' Z <aduser@cpan.org> |
|
|
|
=cut
|
|
|