commit 6c812dbf454879eaaf478ad4a9c7c7f7996331ef Author: Alex 'AdUser' Z Date: Fri May 31 17:01:35 2019 +1000 + initial diff --git a/qrss-export b/qrss-export new file mode 100755 index 0000000..5bb781b --- /dev/null +++ b/qrss-export @@ -0,0 +1,123 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use utf8; + +use Getopt::Std; +use IO::File; +use Date::Parse; +use DBIx::Simple; +use SQL::Abstract; +use Term::Prompt; +use YAML; + +my $opts = { f => undef, F => undef }; +getopts('f:F:', $opts) + or die "Usage: $0 [-f ] [-F ]\n"; + +die "Usage: $0 [-f ]\n" + unless $opts->{f} and -f $opts->{f}; + +my $dbopts = { sqlite_unicode => 1, AutoCommit => 1, RaiseError => 1 }; +my $dsn = sprintf 'dbi:SQLite:dbname=%s', $opts->{f}; +my $db = DBIx::Simple->new($dsn, undef, undef, $dbopts); + +sub list_feeds { + my $filter = $opts->{F} + ? { xmlUrl => {-like => "%$opts->{F}%"} } + : { xmlUrl => {'!=' => ''} }; + my @feeds = $db->select('feeds', [qw(id title text xmlUrl)], $filter, 'title')->hashes; + print " ID | Feed name (URL)\n"; + print "-----+-----------------------------------------------------------\n"; + foreach my $f (@feeds) { + printf " %3d | %s (%s)\n", $f->{id}, $f->{title} || $f->{text}, $f->{xmlurl}; + } + print "\n"; +} + +sub export_feed { + my $fid = shift; + my $feed = $db->select('feeds', '*', {id => $fid})->hash + or return warn "no such feed: $fid\n"; + my $fname = sprintf "%d - %s", $fid, substr($feed->{title} || $feed->{text}, 0, 128); + $fname =~ tr[/][]d; + $fname .= '.yml'; + my $file = IO::File->new($fname, 'w'); + my $yml = YAML::Dump({ + type => 'feed', + url => $feed->{xmlurl} || $feed->{htmlurl}, + name => $feed->{title} || $feed->{text}, + note => $feed->{description}, + }); + if (utf8::is_utf8($yml)) { utf8::encode($yml); } + $file->print($yml); + my @news = $db->select('news', '*', {feedId => $fid})->hashes; + my $exported = 0; + print "exporting: "; + while (my $n = shift @news) { + $n->{flags} = ''; + $n->{flags} .= ($n->{read}) ? 'u' : 'U'; + $n->{flags} .= ($n->{starred}) ? 'S' : 's'; + $n->{ctime} = str2time($n->{published} || $n->{modified}); + if ($n->{description} and not $n->{content}) { + $n->{content} = $n->{description}; + undef $n->{description}; + } + foreach my $attr (qw(guid link_href)) { + next unless $n->{$attr} and index($n->{$attr}, '://') >= 0; + $n->{url} = $n->{$attr}; + last; + } + $yml = YAML::Dump({ + type => 'entry', + url => $n->{url}, + ctime => $n->{ctime}, + flags => $n->{flags}, + author => $n->{author_name}, + category => $n->{category}, + title => $n->{title}, + summary => $n->{description}, + content => $n->{content}, + }); + if (utf8::is_utf8($yml)) { utf8::encode($yml); } + $file->print($yml); + $exported++; + if ($exported % 100 == 0) { print '.'; } + } # foreach @news + $file->close; + print "\n"; + printf "exported: %s (%d entries)\n", $fname, $exported; +} + +sub show_help { + print <<"HELP"; + l\tList feeds + f\tSet list filter + e\tExport + h\tHelp + q\tQuit +HELP +} + +binmode STDOUT => ':utf8'; +$| = 1; + +while (1) { + my $a = prompt('c', 'command', 'h for help', '', qw(l f e h q)); + if ($a eq 'l') { + list_feeds(); + } elsif ($a eq 'f') { + $opts->{F} = prompt('a', 'Filter by URL', '', 'example.com', ''); + } elsif ($a eq 'e') { + my $fid = prompt('n', 'feed id:', '', ''); + export_feed($fid); + } elsif ($a eq 'q') { + print "exiting...\n"; + exit 0; + } else { + show_help(); + } +} + +exit 0; diff --git a/qrss-stats b/qrss-stats new file mode 100755 index 0000000..c9d5cb2 --- /dev/null +++ b/qrss-stats @@ -0,0 +1,44 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use utf8; + +use Getopt::Std; +use DBIx::Simple; +use SQL::Abstract; + +my $opts = { f => undef }; +getopts('f:', $opts) + or die "Usage: $0 [-f ]\n"; + +die "Usage: $0 [-f ]\n" + unless $opts->{f} and -f $opts->{f}; + +my $dbopts = { sqlite_unicode => 1, AutoCommit => 1, RaiseError => 1 }; +my $dsn = sprintf 'dbi:SQLite:dbname=%s', $opts->{f}; +my $db = DBIx::Simple->new($dsn, undef, undef, $dbopts); + +my $query = 'SELECT f.id, f.title, COUNT(*) AS cnt, ' + . ' SUM(LENGTH(n.description)) AS len1, SUM(LENGTH(n.content)) AS len2 FROM feeds f' + . ' JOIN news n ON (f.id = feedId) GROUP BY f.id ORDER BY cnt DESC'; +my $stats = $db->query($query)->hashes; + +printf " ID | News | Summary | Content | Title \n"; +print "---------------" x 4, "\n"; +my $total = { cnt => 0, len1 => 0, len2 => 0 }; +foreach my $s (@{ $stats }) { + utf8::encode($s->{title}); + $s->{len1} //= 0; + $s->{len2} //= 0; + printf " %4d | %6d | %8.1fK | %8.1fK | %s\n", $s->{id}, $s->{cnt}, + $s->{len1} / 1024, $s->{len2} / 1024, $s->{title}; + $total->{cnt} += $s->{cnt}; + $total->{len1} += $s->{len1}; + $total->{len2} += $s->{len2}; +} +print "---------------" x 4, "\n"; +printf "%5s | %6d | %8.1fK | %8.1fK | %s\n", 'Total', $total->{cnt}, + $total->{len1} / 1024, $total->{len2} / 1024, ''; + +exit 0; diff --git a/qrss.sql b/qrss.sql new file mode 100644 index 0000000..1fce956 --- /dev/null +++ b/qrss.sql @@ -0,0 +1,116 @@ +CREATE TABLE feeds ( + id integer primary key, + text varchar, + title varchar, + description varchar, + xmlUrl varchar, + htmlUrl varchar, + language varchar, + copyrights varchar, + author_name varchar, + author_email varchar, + author_uri varchar, + webMaster varchar, + pubdate varchar, + lastBuildDate varchar, + category varchar, + contributor varchar, + generator varchar, + docs varchar, + cloud_domain varchar, + cloud_port varchar, + cloud_path varchar, + cloud_procedure varchar, + cloud_protocal varchar, + ttl integer, + skipHours varchar, + skipDays varchar, + image blob, + unread integer, + newCount integer, + currentNews integer, + label varchar, + undeleteCount integer, + tags varchar, + hasChildren integer default 0, + parentId integer default 0, + rowToParent integer, + updateIntervalEnable int, + updateInterval int, + updateIntervalType varchar, + updateOnStartup int, + displayOnStartup int, + markReadAfterSecondsEnable int, + markReadAfterSeconds int, + markReadInNewspaper int, + markDisplayedOnSwitchingFeed int, + markDisplayedOnClosingTab int, + markDisplayedOnMinimize int, + layout text, + filter text, + groupBy int, + displayNews int, + displayEmbeddedImages integer default 1, + loadTypes text, + openLinkOnEmptyContent int, + columns text, + sort text, + sortType int, + maximumToKeep int, + maximumToKeepEnable int, + maximumAgeOfNews int, + maximumAgoOfNewEnable int, + deleteReadNews int, + neverDeleteUnreadNews int, + neverDeleteStarredNews int, + neverDeleteLabeledNews int, + status text, + created text, + updated text, + lastDisplayed text, + f_Expanded integer default 1, + flags text, + authentication integer default 0, + duplicateNewsMode integer default 0, + typeFeed integer default 0, + showNotification integer default 0, + disableUpdate integer default 0, + javaScriptEnable integer default 1, + layoutDirection integer default 0 +); + +CREATE TABLE news ( + id integer primary key, + feedId integer, + guid varchar, + guidislink varchar default 'true', + description varchar, + content varchar, + title varchar, + published varchar, + modified varchar, + received varchar, + author_name varchar, + author_uri varchar, + author_email varchar, + category varchar, + label varchar, + new integer default 1, -- actually "seen" + read integer default 0, -- read/unread + starred integer default 0, + deleted integer default 0, + attachment varchar, + comments varchar, + enclosure_length, + enclosure_type, + enclosure_url, + source varchar, + link_href varchar, + link_enclosure varchar, + link_related varchar, + link_alternate varchar, + contributor varchar, + rights varchar, + deleteDate varchar, + feedParentId integer default 0 +);