Browse Source

+ Subtitle::Format::SMI

master
Alex 'AdUser' Z 7 years ago
parent
commit
bb2caa2869
  1. 235
      lib/Subtitle/Format/SMI.pm

235
lib/Subtitle/Format/SMI.pm

@ -0,0 +1,235 @@
package Subtitle::Format::SMI;
use strict;
use warnings;
use utf8;
use Subtitle::Event;
use Subtitle::Utils qw(:string :timing);
use base 'Subtitle::Format';
sub new {
my ($class, %args) = @_;
my $self = {
debug => $args{debug} || 0,
eol => $args{eol} || "\n",
title => '',
style => undef,
params => {},
events => [],
log => [],
};
return bless($self, $class);
}
sub new_event { return Subtitle::Event->new; }
sub parse {
my ($self, $lines) = @_;
my $text = join("\n", @{ $lines });
## the code below taken from HTML::SimpleParse 0.12
## and modified for modern perl versions
my @tokens;
while (1) {
my ($content, $type);
# First we try to pull off any plain text (anything before a "<" char)
if ($text =~ /\G([^<]+)/gcs) {
$content = $1; $type = 'text';
trim($content);
chomp_all($content);
# Then, SSI, comments, and markup declarations (usually <!DOCTYPE...>)
# ssi: <!--#stuff-->
# comment: <!--stuff-->
# markup: <!stuff>
} elsif ($text =~ /\G<(!--(\#?).*?--)>/gcs) {
$type = ($2 ? 'ssi' : 'comment');
$content = $1;
} elsif ($text =~ /\G<(!.*?)>/gcs) {
$type = 'markup';
$content = $1;
# Then, look for an end tag
} elsif ($text =~ m|\G</([a-zA-Z][a-zA-Z0-9\.\-]*\s*)>|gcs) {
$content = lc($1); $type = 'endtag';
# Then, finally we look for a start tag
# We know the first char is <, make sure there's a >
} elsif ($text =~ /\G<(.*?)>/gcs) {
$content = lc($1); $type = 'starttag';
} else {
# the string is exhausted, or there's no > in it.
push @tokens, {
'content' => substr($text, pos $text),
'type' => 'text',
} unless (pos($text) eq length($text));
last;
}
## end of imported code
push @tokens, {
'content' => $content,
'type' => $type,
'offset' => ($type eq 'text' ?
pos($text) - length($content) :
pos($text) - length($content) - 2),
} unless $type eq 'text' and length($content) == 0;
}
my @stack;
my $event;
my $path = '';
foreach my $t (@tokens) {
if ($t->{type} eq 'starttag') {
my ($n) = ($t->{content} =~ m{^(\S+)});
$t->{name} = $n;
pos($t->{content}) = length($n);
while ($t->{content} =~ m<\G\s+(\S+)=['"]?(\S+)['"]?>gsc) {
$t->{params}->{$1} = $2;
}
if (_is_markup_tag($n)) {
# skip: ignore
} elsif (@stack and $stack[-1]->{name} eq $n) {
# skip: same tag
} else {
$path .= '/' . lc($n);
push @stack, $t;
}
} elsif ($t->{type} eq 'endtag') {
unless (_is_markup_tag($t->{content})) {
my $n = $t->{content};
$path =~ s</$n.*?$><>i;
pop @stack;
}
next;
}
#
if ($path eq '/sami/head/title' and $t->{type} eq 'text') {
$self->{title} = $t->{content};
} elsif ($path eq '/sami/head/samiparam' and $t->{type} eq 'comment') {
my $c = _comment_content($t->{content});
$c =~ m{(\S+):\s*(\S+)};
$self->{params}->{lc($1)} = $2 if $1;
} elsif ($path eq '/sami/head/style' and $t->{type} eq 'comment') {
$self->{style} = _comment_content($t->{content});
} elsif ($path eq '/sami/body/table/sync') {
if ($t->{type} eq 'starttag' and $t->{name} eq 'sync') {
my $time = $t->{params}->{start};
if ($event and $time) {
$event->t_end($time);
push @{ $self->{events} }, $event;
undef $event;
}
$event = $self->new_event;
$event->t_start($time) if defined $time;
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'p') {
$self->log(error => "<p> tag outside event at offset $t->{offset}")
unless $event;
$t->{content} =~ m<class=['"]?([^'"]+)>i;
$event->{class} = $1 if $1;
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'br') {
my $c = $event->text;
$event->text($c . "\\n");
} elsif ($t->{type} eq 'text') {
if ($t->{content} eq '&nbsp;') {
undef $event; # empty event used to hide previous one
} else {
my $c = $event->text;
$c .= $t->{content};
$c =~ tr/\ \t//s; # remove multiple spaces
$event->text($c);
}
} else {
}
} else {
#print "unhandled token: $t->{type} -- $t->{content}\n";
}
} # foreach @tokens
if ($event and my $t = $self->{params}->{length}) {
$event->t_end($t);
push @{ $self->{events} }, $event;
undef $event;
}
return scalar @{ $self->{events} };
}
sub build {
my ($self) = @_;
my @lines;
push @lines, "<sami>";
push @lines, " <head>";
if ($self->{title}) {
my $line = " " x 4 . _tag_wrap('title', $self->{title});
push @lines, $line;
}
if (scalar keys %{ $self->{params} }) {
while (my ($k, $v) = each %{ $self->{params} }) {
my $c = sprintf '<!-- %s: %s -->', ucfirst($k), $v;
my $line = " " x 4 . _tag_wrap('samiparam', $c);
push @lines, $line;
}
}
if ($self->{style}) {
push @lines, ' <style type="text/css"><!--';
push @lines, $self->{style};
push @lines, ' --></style>';
}
push @lines, " </head>";
push @lines, " <body>";
push @lines, " <table>";
my @events = $self->events;
while (my $e = shift @events) {
my $t = $e->text;
$t =~ s{\\n}{<br>}smg;
my $l = _tag_wrap('p', $t, {class => $e->{class}});
push @lines, " " x 6 . _tag_wrap('sync', $l, {start => $e->t_start});
if (@events and $e->t_end == $events[0]->t_start) {
# next event replaces current without gap,
# no need to hide if explicitly
next;
} else {
my $t = _tag_wrap('p', '&nbsp;');
push @lines, " " x 6 . _tag_wrap('sync', $t, {start => $e->t_end});
}
}
push @lines, " </table>";
push @lines, " </body>";
push @lines, "</sami>", "";
return join($self->{eol}, @lines);
}
sub _tag_wrap {
my ($name, $content, $attrs) = @_;
my @attrs;
if ($attrs and ref($attrs) eq 'HASH') {
while (my ($k, $v) = each %{ $attrs }) {
next unless defined $v;
push @attrs, sprintf(' %s=%s', $k, $v);
}
}
return sprintf("<%s%s>%s</%s>", $name, join('', @attrs), $content, $name);
}
sub _is_markup_tag {
my $t = lc(shift);
return 1 if $t eq 'p';
return 1 if $t eq 'br';
return;
}
sub _comment_content {
my $c = shift;
$c =~ s<(^!-*\s*|\s*-*$)><>go; # strip !-- and --
$c =~ s<^[\r\n]*><>o;
$c =~ s<[\r\n]*$><>o;
$c =~ s<^\s+><>gms;
trim($c);
return $c;
}
1;
Loading…
Cancel
Save