You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
235 lines
6.5 KiB
235 lines
6.5 KiB
package Subtitle::Format::SMI; |
|
|
|
use strict; |
|
use warnings; |
|
use utf8; |
|
|
|
use Subtitle::Event; |
|
use Subtitle::Utils qw(:string :timing); |
|
|
|
use base 'Subtitle::Format'; |
|
|
|
sub new { |
|
my ($class, %args) = @_; |
|
my $self = { |
|
debug => $args{debug} || 0, |
|
eol => $args{eol} || "\n", |
|
title => '', |
|
style => undef, |
|
params => {}, |
|
events => [], |
|
log => [], |
|
}; |
|
|
|
return bless($self, $class); |
|
} |
|
|
|
sub new_event { return Subtitle::Event->new; } |
|
|
|
sub parse { |
|
my ($self, $lines) = @_; |
|
my $text = join("\n", @{ $lines }); |
|
|
|
## the code below taken from HTML::SimpleParse 0.12 |
|
## and modified for modern perl versions |
|
my @tokens; |
|
while (1) { |
|
my ($content, $type); |
|
|
|
# First we try to pull off any plain text (anything before a "<" char) |
|
if ($text =~ /\G([^<]+)/gcs) { |
|
$content = $1; $type = 'text'; |
|
trim($content); |
|
chomp_all($content); |
|
|
|
# Then, SSI, comments, and markup declarations (usually <!DOCTYPE...>) |
|
# ssi: <!--#stuff--> |
|
# comment: <!--stuff--> |
|
# markup: <!stuff> |
|
} elsif ($text =~ /\G<(!--(\#?).*?--)>/gcs) { |
|
$type = ($2 ? 'ssi' : 'comment'); |
|
$content = $1; |
|
} elsif ($text =~ /\G<(!.*?)>/gcs) { |
|
$type = 'markup'; |
|
$content = $1; |
|
|
|
# Then, look for an end tag |
|
} elsif ($text =~ m|\G</([a-zA-Z][a-zA-Z0-9\.\-]*\s*)>|gcs) { |
|
$content = lc($1); $type = 'endtag'; |
|
|
|
# Then, finally we look for a start tag |
|
# We know the first char is <, make sure there's a > |
|
} elsif ($text =~ /\G<(.*?)>/gcs) { |
|
$content = lc($1); $type = 'starttag'; |
|
|
|
} else { |
|
# the string is exhausted, or there's no > in it. |
|
push @tokens, { |
|
'content' => substr($text, pos $text), |
|
'type' => 'text', |
|
} unless (pos($text) eq length($text)); |
|
last; |
|
} |
|
## end of imported code |
|
|
|
push @tokens, { |
|
'content' => $content, |
|
'type' => $type, |
|
'offset' => ($type eq 'text' ? |
|
pos($text) - length($content) : |
|
pos($text) - length($content) - 2), |
|
} unless $type eq 'text' and length($content) == 0; |
|
} |
|
|
|
my @stack; |
|
my $event; |
|
my $path = ''; |
|
foreach my $t (@tokens) { |
|
if ($t->{type} eq 'starttag') { |
|
my ($n) = ($t->{content} =~ m{^(\S+)}); |
|
$t->{name} = $n; |
|
pos($t->{content}) = length($n); |
|
while ($t->{content} =~ m<\G\s+(\S+)=['"]?(\S+)['"]?>gsc) { |
|
$t->{params}->{$1} = $2; |
|
} |
|
if (_is_markup_tag($n)) { |
|
# skip: ignore |
|
} elsif (@stack and $stack[-1]->{name} eq $n) { |
|
# skip: same tag |
|
} else { |
|
$path .= '/' . lc($n); |
|
push @stack, $t; |
|
} |
|
} elsif ($t->{type} eq 'endtag') { |
|
unless (_is_markup_tag($t->{content})) { |
|
my $n = $t->{content}; |
|
$path =~ s</$n.*?$><>i; |
|
pop @stack; |
|
} |
|
next; |
|
} |
|
# |
|
if ($path eq '/sami/head/title' and $t->{type} eq 'text') { |
|
$self->{title} = $t->{content}; |
|
} elsif ($path eq '/sami/head/samiparam' and $t->{type} eq 'comment') { |
|
my $c = _comment_content($t->{content}); |
|
$c =~ m{(\S+):\s*(\S+)}; |
|
$self->{params}->{lc($1)} = $2 if $1; |
|
} elsif ($path eq '/sami/head/style' and $t->{type} eq 'comment') { |
|
$self->{style} = _comment_content($t->{content}); |
|
} elsif ($path eq '/sami/body/table/sync') { |
|
if ($t->{type} eq 'starttag' and $t->{name} eq 'sync') { |
|
my $time = $t->{params}->{start}; |
|
if ($event and $time) { |
|
$event->t_end($time); |
|
push @{ $self->{events} }, $event; |
|
undef $event; |
|
} |
|
$event = $self->new_event; |
|
$event->t_start($time) if defined $time; |
|
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'p') { |
|
$self->log(error => "<p> tag outside event at offset $t->{offset}") |
|
unless $event; |
|
$t->{content} =~ m<class=['"]?([^'"]+)>i; |
|
$event->{class} = $1 if $1; |
|
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'br') { |
|
my $c = $event->text; |
|
$event->text($c . "\\n"); |
|
} elsif ($t->{type} eq 'text') { |
|
if ($t->{content} eq ' ') { |
|
undef $event; # empty event used to hide previous one |
|
} else { |
|
my $c = $event->text; |
|
$c .= $t->{content}; |
|
$c =~ tr/\ \t//s; # remove multiple spaces |
|
$event->text($c); |
|
} |
|
} else { |
|
} |
|
} else { |
|
#print "unhandled token: $t->{type} -- $t->{content}\n"; |
|
} |
|
} # foreach @tokens |
|
if ($event and my $t = $self->{params}->{length}) { |
|
$event->t_end($t); |
|
push @{ $self->{events} }, $event; |
|
undef $event; |
|
} |
|
return scalar @{ $self->{events} }; |
|
} |
|
|
|
sub build { |
|
my ($self) = @_; |
|
my @lines; |
|
push @lines, "<sami>"; |
|
push @lines, " <head>"; |
|
if ($self->{title}) { |
|
my $line = " " x 4 . _tag_wrap('title', $self->{title}); |
|
push @lines, $line; |
|
} |
|
if (scalar keys %{ $self->{params} }) { |
|
while (my ($k, $v) = each %{ $self->{params} }) { |
|
my $c = sprintf '<!-- %s: %s -->', ucfirst($k), $v; |
|
my $line = " " x 4 . _tag_wrap('samiparam', $c); |
|
push @lines, $line; |
|
} |
|
} |
|
if ($self->{style}) { |
|
push @lines, ' <style type="text/css"><!--'; |
|
push @lines, $self->{style}; |
|
push @lines, ' --></style>'; |
|
} |
|
push @lines, " </head>"; |
|
push @lines, " <body>"; |
|
push @lines, " <table>"; |
|
my @events = $self->events; |
|
while (my $e = shift @events) { |
|
my $t = $e->text; |
|
$t =~ s{\\n}{<br>}smg; |
|
my $l = _tag_wrap('p', $t, {class => $e->{class}}); |
|
push @lines, " " x 6 . _tag_wrap('sync', $l, {start => $e->t_start}); |
|
if (@events and $e->t_end == $events[0]->t_start) { |
|
# next event replaces current without gap, |
|
# no need to hide if explicitly |
|
next; |
|
} else { |
|
my $t = _tag_wrap('p', ' '); |
|
push @lines, " " x 6 . _tag_wrap('sync', $t, {start => $e->t_end}); |
|
} |
|
} |
|
push @lines, " </table>"; |
|
push @lines, " </body>"; |
|
push @lines, "</sami>", ""; |
|
return join($self->{eol}, @lines); |
|
} |
|
|
|
sub _tag_wrap { |
|
my ($name, $content, $attrs) = @_; |
|
my @attrs; |
|
if ($attrs and ref($attrs) eq 'HASH') { |
|
while (my ($k, $v) = each %{ $attrs }) { |
|
next unless defined $v; |
|
push @attrs, sprintf(' %s=%s', $k, $v); |
|
} |
|
} |
|
return sprintf("<%s%s>%s</%s>", $name, join('', @attrs), $content, $name); |
|
} |
|
|
|
sub _is_markup_tag { |
|
my $t = lc(shift); |
|
return 1 if $t eq 'p'; |
|
return 1 if $t eq 'br'; |
|
return; |
|
} |
|
|
|
sub _comment_content { |
|
my $c = shift; |
|
$c =~ s<(^!-*\s*|\s*-*$)><>go; # strip !-- and -- |
|
$c =~ s<^[\r\n]*><>o; |
|
$c =~ s<[\r\n]*$><>o; |
|
$c =~ s<^\s+><>gms; |
|
trim($c); |
|
return $c; |
|
} |
|
|
|
1;
|
|
|