You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
236 lines
6.5 KiB
236 lines
6.5 KiB
6 years ago
|
package Subtitle::Format::SMI;
|
||
|
|
||
|
use strict;
|
||
|
use warnings;
|
||
|
use utf8;
|
||
|
|
||
|
use Subtitle::Event;
|
||
|
use Subtitle::Utils qw(:string :timing);
|
||
|
|
||
|
use base 'Subtitle::Format';
|
||
|
|
||
|
sub new {
|
||
|
my ($class, %args) = @_;
|
||
|
my $self = {
|
||
|
debug => $args{debug} || 0,
|
||
|
eol => $args{eol} || "\n",
|
||
|
title => '',
|
||
|
style => undef,
|
||
|
params => {},
|
||
|
events => [],
|
||
|
log => [],
|
||
|
};
|
||
|
|
||
|
return bless($self, $class);
|
||
|
}
|
||
|
|
||
|
sub new_event { return Subtitle::Event->new; }
|
||
|
|
||
|
sub parse {
|
||
|
my ($self, $lines) = @_;
|
||
|
my $text = join("\n", @{ $lines });
|
||
|
|
||
|
## the code below taken from HTML::SimpleParse 0.12
|
||
|
## and modified for modern perl versions
|
||
|
my @tokens;
|
||
|
while (1) {
|
||
|
my ($content, $type);
|
||
|
|
||
|
# First we try to pull off any plain text (anything before a "<" char)
|
||
|
if ($text =~ /\G([^<]+)/gcs) {
|
||
|
$content = $1; $type = 'text';
|
||
|
trim($content);
|
||
|
chomp_all($content);
|
||
|
|
||
|
# Then, SSI, comments, and markup declarations (usually <!DOCTYPE...>)
|
||
|
# ssi: <!--#stuff-->
|
||
|
# comment: <!--stuff-->
|
||
|
# markup: <!stuff>
|
||
|
} elsif ($text =~ /\G<(!--(\#?).*?--)>/gcs) {
|
||
|
$type = ($2 ? 'ssi' : 'comment');
|
||
|
$content = $1;
|
||
|
} elsif ($text =~ /\G<(!.*?)>/gcs) {
|
||
|
$type = 'markup';
|
||
|
$content = $1;
|
||
|
|
||
|
# Then, look for an end tag
|
||
|
} elsif ($text =~ m|\G</([a-zA-Z][a-zA-Z0-9\.\-]*\s*)>|gcs) {
|
||
|
$content = lc($1); $type = 'endtag';
|
||
|
|
||
|
# Then, finally we look for a start tag
|
||
|
# We know the first char is <, make sure there's a >
|
||
|
} elsif ($text =~ /\G<(.*?)>/gcs) {
|
||
|
$content = lc($1); $type = 'starttag';
|
||
|
|
||
|
} else {
|
||
|
# the string is exhausted, or there's no > in it.
|
||
|
push @tokens, {
|
||
|
'content' => substr($text, pos $text),
|
||
|
'type' => 'text',
|
||
|
} unless (pos($text) eq length($text));
|
||
|
last;
|
||
|
}
|
||
|
## end of imported code
|
||
|
|
||
|
push @tokens, {
|
||
|
'content' => $content,
|
||
|
'type' => $type,
|
||
|
'offset' => ($type eq 'text' ?
|
||
|
pos($text) - length($content) :
|
||
|
pos($text) - length($content) - 2),
|
||
|
} unless $type eq 'text' and length($content) == 0;
|
||
|
}
|
||
|
|
||
|
my @stack;
|
||
|
my $event;
|
||
|
my $path = '';
|
||
|
foreach my $t (@tokens) {
|
||
|
if ($t->{type} eq 'starttag') {
|
||
|
my ($n) = ($t->{content} =~ m{^(\S+)});
|
||
|
$t->{name} = $n;
|
||
|
pos($t->{content}) = length($n);
|
||
|
while ($t->{content} =~ m<\G\s+(\S+)=['"]?(\S+)['"]?>gsc) {
|
||
|
$t->{params}->{$1} = $2;
|
||
|
}
|
||
|
if (_is_markup_tag($n)) {
|
||
|
# skip: ignore
|
||
|
} elsif (@stack and $stack[-1]->{name} eq $n) {
|
||
|
# skip: same tag
|
||
|
} else {
|
||
|
$path .= '/' . lc($n);
|
||
|
push @stack, $t;
|
||
|
}
|
||
|
} elsif ($t->{type} eq 'endtag') {
|
||
|
unless (_is_markup_tag($t->{content})) {
|
||
|
my $n = $t->{content};
|
||
|
$path =~ s</$n.*?$><>i;
|
||
|
pop @stack;
|
||
|
}
|
||
|
next;
|
||
|
}
|
||
|
#
|
||
|
if ($path eq '/sami/head/title' and $t->{type} eq 'text') {
|
||
|
$self->{title} = $t->{content};
|
||
|
} elsif ($path eq '/sami/head/samiparam' and $t->{type} eq 'comment') {
|
||
|
my $c = _comment_content($t->{content});
|
||
|
$c =~ m{(\S+):\s*(\S+)};
|
||
|
$self->{params}->{lc($1)} = $2 if $1;
|
||
|
} elsif ($path eq '/sami/head/style' and $t->{type} eq 'comment') {
|
||
|
$self->{style} = _comment_content($t->{content});
|
||
|
} elsif ($path eq '/sami/body/table/sync') {
|
||
|
if ($t->{type} eq 'starttag' and $t->{name} eq 'sync') {
|
||
|
my $time = $t->{params}->{start};
|
||
|
if ($event and $time) {
|
||
|
$event->t_end($time);
|
||
|
push @{ $self->{events} }, $event;
|
||
|
undef $event;
|
||
|
}
|
||
|
$event = $self->new_event;
|
||
|
$event->t_start($time) if defined $time;
|
||
|
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'p') {
|
||
|
$self->log(error => "<p> tag outside event at offset $t->{offset}")
|
||
|
unless $event;
|
||
|
$t->{content} =~ m<class=['"]?([^'"]+)>i;
|
||
|
$event->{class} = $1 if $1;
|
||
|
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'br') {
|
||
|
my $c = $event->text;
|
||
|
$event->text($c . "\\n");
|
||
|
} elsif ($t->{type} eq 'text') {
|
||
|
if ($t->{content} eq ' ') {
|
||
|
undef $event; # empty event used to hide previous one
|
||
|
} else {
|
||
|
my $c = $event->text;
|
||
|
$c .= $t->{content};
|
||
|
$c =~ tr/\ \t//s; # remove multiple spaces
|
||
|
$event->text($c);
|
||
|
}
|
||
|
} else {
|
||
|
}
|
||
|
} else {
|
||
|
#print "unhandled token: $t->{type} -- $t->{content}\n";
|
||
|
}
|
||
|
} # foreach @tokens
|
||
|
if ($event and my $t = $self->{params}->{length}) {
|
||
|
$event->t_end($t);
|
||
|
push @{ $self->{events} }, $event;
|
||
|
undef $event;
|
||
|
}
|
||
|
return scalar @{ $self->{events} };
|
||
|
}
|
||
|
|
||
|
sub build {
|
||
|
my ($self) = @_;
|
||
|
my @lines;
|
||
|
push @lines, "<sami>";
|
||
|
push @lines, " <head>";
|
||
|
if ($self->{title}) {
|
||
|
my $line = " " x 4 . _tag_wrap('title', $self->{title});
|
||
|
push @lines, $line;
|
||
|
}
|
||
|
if (scalar keys %{ $self->{params} }) {
|
||
|
while (my ($k, $v) = each %{ $self->{params} }) {
|
||
|
my $c = sprintf '<!-- %s: %s -->', ucfirst($k), $v;
|
||
|
my $line = " " x 4 . _tag_wrap('samiparam', $c);
|
||
|
push @lines, $line;
|
||
|
}
|
||
|
}
|
||
|
if ($self->{style}) {
|
||
|
push @lines, ' <style type="text/css"><!--';
|
||
|
push @lines, $self->{style};
|
||
|
push @lines, ' --></style>';
|
||
|
}
|
||
|
push @lines, " </head>";
|
||
|
push @lines, " <body>";
|
||
|
push @lines, " <table>";
|
||
|
my @events = $self->events;
|
||
|
while (my $e = shift @events) {
|
||
|
my $t = $e->text;
|
||
|
$t =~ s{\\n}{<br>}smg;
|
||
|
my $l = _tag_wrap('p', $t, {class => $e->{class}});
|
||
|
push @lines, " " x 6 . _tag_wrap('sync', $l, {start => $e->t_start});
|
||
|
if (@events and $e->t_end == $events[0]->t_start) {
|
||
|
# next event replaces current without gap,
|
||
|
# no need to hide if explicitly
|
||
|
next;
|
||
|
} else {
|
||
|
my $t = _tag_wrap('p', ' ');
|
||
|
push @lines, " " x 6 . _tag_wrap('sync', $t, {start => $e->t_end});
|
||
|
}
|
||
|
}
|
||
|
push @lines, " </table>";
|
||
|
push @lines, " </body>";
|
||
|
push @lines, "</sami>", "";
|
||
|
return join($self->{eol}, @lines);
|
||
|
}
|
||
|
|
||
|
sub _tag_wrap {
|
||
|
my ($name, $content, $attrs) = @_;
|
||
|
my @attrs;
|
||
|
if ($attrs and ref($attrs) eq 'HASH') {
|
||
|
while (my ($k, $v) = each %{ $attrs }) {
|
||
|
next unless defined $v;
|
||
|
push @attrs, sprintf(' %s=%s', $k, $v);
|
||
|
}
|
||
|
}
|
||
|
return sprintf("<%s%s>%s</%s>", $name, join('', @attrs), $content, $name);
|
||
|
}
|
||
|
|
||
|
sub _is_markup_tag {
|
||
|
my $t = lc(shift);
|
||
|
return 1 if $t eq 'p';
|
||
|
return 1 if $t eq 'br';
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
sub _comment_content {
|
||
|
my $c = shift;
|
||
|
$c =~ s<(^!-*\s*|\s*-*$)><>go; # strip !-- and --
|
||
|
$c =~ s<^[\r\n]*><>o;
|
||
|
$c =~ s<[\r\n]*$><>o;
|
||
|
$c =~ s<^\s+><>gms;
|
||
|
trim($c);
|
||
|
return $c;
|
||
|
}
|
||
|
|
||
|
1;
|