You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

314 lines
7.7 KiB

#!/usr/bin/perl
use strict;
use warnings;
use feature qw(say);
use utf8;
use Getopt::Long;
use YAML::Any qw(LoadFile);
use LWP::UserAgent;
use XML::Feed;
use HTML::Entities;
##### variables
our $VERSION = '0.02';
my %opts = (
config => "$ENV{HOME}/.config/twatch-lite/config",
state => "$ENV{HOME}/.config/twatch-lite/state",
verbose => 0,
);
##### functions
sub usage {
print <<USAGE;
Usage: twatch-lite [-v] [-c <config>]
-c, --config=FILE Path to config file, mandatory
default: $opts{config}
-s, --state=FILE Path to file with feed positions, optional
default: $opts{state}
-v, --verbose Increase verbosity
USAGE
exit shift // 1;
}
sub _log {
my ($level, $message) = @_;
return if ($level eq 'debug' and not $opts{verbose});
print STDERR "[$level] ", $message, "\n";
exit(1) if $level eq 'fatal';
}
sub load_config {
my ($path) = @_;
_log(debug => "Loading config");
_log(fatal => "Can't find config: no such file")
unless -f $path;
my $config;
eval { $config = LoadFile($path); 1 }
or do { _log(fatal => "Can't load config: $@"); };
_log(debug => "Sanity checks for config file");
_log(fatal => "Missing section `feeds` in config file")
unless (exists $config->{feeds} and ref $config->{feeds} eq 'ARRAY');
_log(warn => "'outdir' not set in config, using /tmp"), $config->{outdir} = '/tmp'
unless ($config->{outdir});
my $i = 0;
foreach my $feed (@{ $config->{feeds} }) {
$i++;
_log(debug => "Processing feed #$i: $feed->{url}");
_log(fatal => "Feed is not a hashref")
unless (ref $feed eq 'HASH');
_log(fatal => "Missing 'url' in feed")
unless $feed->{url} and $feed->{url} =~ m{https?://}oi;
_log(fatal => "Missing 'matches' section in feed")
unless $feed->{matches} and ref $feed->{matches} eq 'ARRAY';
_log(fatal => "Empty 'matches' section in feed")
unless (scalar @{ $feed->{matches} });
$feed->{linktype} //= 'direct';
$feed->{lookup} //= 'title';
# TODO: more
($feed->{hostname}) = ($feed->{url} =~ m{://([a-z0-9\.-]+)}oi);
}
return $config;
}
sub load_state {
my ($path) = @_;
my $state = {};
_log(debug => "Loading file with feed positions");
if (-f $path) {
open my $FH, '<', $path;
while (1) {
last if eof($FH);
my $line = <$FH>;
chomp $line;
next unless ($line =~ m{^([a-z0-9\.-]+),(\d+)}oi);
$state->{$1} = $2; # hostname => lastseen
}
close $FH;
} else {
_log(warn => "Missing state file, will create one at exit");
}
return $state;
}
sub process_feed {
my ($ua, $feedconf) = @_;
my $urls = {};
my $resp = $ua->get($feedconf->{url});
unless ($resp->is_success) {
_log(error => "Can't fetch feed");
return $urls;
}
my $data = decode_entities($resp->decoded_content);
my $feed = XML::Feed->parse(\$data);
unless ($feed) {
_log(error => "Can't parse feed: " . XML::Feed->errstr);
return $urls;
}
my @entries = reverse $feed->entries;
for my $entry (@entries) {
my $time = (ref $entry->issued)
? $entry->issued->epoch
: $entry->modified->epoch;
if ($feedconf->{lastseen} >= $time) {
_log(debug => "Already seen before: $time");
next;
}
my $text = ($feedconf->{lookup} eq 'body')
? $entry->content->body
: $entry->title;
_log(info => "Processing: " . $entry->title);
_log(debug => "Match text is: ". $text);
foreach my $match (@{ $feedconf->{matches} }) {
if ($match =~ m{^/(.*)/$}) {
_log(debug => "Trying regex '$1'");
next unless $text =~ qr/$1/i;
} else {
_log(debug => "Trying substr '$match'");
next unless index(lc($text), lc($match)) >= 0;
}
# gotcha!
my $msg = sprintf "Got it! URL: %s (%s)", $entry->link, $entry->title;
_log(info => $msg);
$urls->{$entry->link} = $entry->title;
}
$feedconf->{lastseen} = $time;
}
return $urls;
}
sub fetch_torrents {
my ($ua, $outdir, $urls) = @_;
while (my ($url, $title) = each(%{ $urls })) {
my $resp = $ua->get($url);
unless ($resp->is_success) {
_log(error => "Can't fetch torrent file: $url");
next;
}
my $file = sprintf "%s/%s", $outdir, $resp->filename;
open my $T, '>', $file;
unless ($T) {
_log(error => "Can't save torrent to $file");
next;
}
print $T $resp->decoded_content;
close $T;
}
return 1;
}
sub save_state {
my ($path, $state) = @_;
open my $FH, '>', "$path.new";
unless ($FH) {
_log(fatal => "Can't open state file for write");
return;
}
while (my ($host, $time) = each(%{ $state })) {
printf $FH "%s,%s\n", $host, $time;
}
close $FH;
rename("$path.new", $path)
or _log(error => "Can't update state file: $!");
return 1;
}
##### logic
GetOptions(
"config=s" => \$opts{config},
"state=s" => \$opts{state},
"verbose" => \$opts{verbose},
) or usage(1);
my $config = load_config($opts{config});
my $state = load_state ($opts{state});
my $ua = LWP::UserAgent->new;
$ua->agent("twatch-lite/$VERSION");
$ua->show_progress(1) if $opts{verbose};
if ($config->{proxy}) {
$ua->proxy(['http', 'https'], $config->{proxy});
} else {
$ua->env_proxy;
}
if ($config->{cookies}) {
$ua->cookie_jar({ file => "$ENV{HOME}/.config/twatch-lite/cookies.txt" });
}
foreach my $feed (@{ $config->{feeds} }) {
my $host = $feed->{hostname};
$feed->{lastseen} = $state->{$host} // 0;
my $urls = process_feed($ua, $feed);
fetch_torrents($ua, $config->{outdir}, $urls)
if scalar keys $urls;
$state->{$host} = $feed->{lastseen};
}
save_state($opts{state}, $state);
exit 0;
__END__
=pod
=head1 NAME
twatch-lite -- periodically polls RSS/ATOM feeds and cherry-picks wanted torrent files
=head1 SYNOPSIS
man twatch-lite
mkdir -p ~/.config/twatch-lite
vim ~/.config/twatch-lite/config
twatch-lite --verbose # 1st time, debug
=head1 DESCRIPTION
This is very simple and specialized alternative to full-featured software called L<Twatch>. It works only with RSS/ATOM feeds.
=head1 Config file
File format is simple: YAML. Real config example:
---
outdir: /tmp
proxy: http://192.168.1.1:3128
feeds:
- url: http://torrent-tracker.org/rss.php
linktype: direct
lookup: title
matches:
- 'match substring'
- '/match regex/'
Now let looks closer: we have a lot of global parameters and section 'feeds'
=head2 Global parameters
=head3 C<outdir>
Where to save downloaded torrent files. You may want configure your torrent-client to pickup files from this dir. If unset, you'll get warning, and '/tmp' will be used.
=head3 C<proxy>
If set - will use http proxy, when fetching rss and torrent-files. Otherwise will connect directly (default).
=head3 C<cookies>
If set to any non-empty value, will accept and send back cookies in http-requests. Default - unset.
=head2 Section C<feeds>
Configured feeds.
=head3 C<url>
URL with RSS/ATOM feed. This option must be set in each feed.
=head3 C<matches>
List of wanted 'keywords'. If keyword surrounded with slashes, like C</this/>, it will be handled as regex, and as substring otherwise. Regex syntax is PCRE and described in L<perlre(3)>.
If feed entry matches ANY keyword, torrent file will be downloaded and stored to B<outdir>.
Matches list must contain at least one keyword. If you want all torrents from this feed, use '/.*/' regex keyword.
=head3 C<lookup>
Lookup this field in RSS/ATOM entry for wanted keywords. Acceptable values: 'body', 'title' (default).
=head3 C<linktype>
Type of link in RSS/ATOM entry. Possible values:
* direct -- link points directly to torrent file (default)
* page -- link points to page with link to torrent file (not implemented yet)
=head1 SEE ALSO
L<Twatch>
=head1 AUTHORS
Alex 'AdUser' Z <aduser@cpan.org>
=cut