Alex 'AdUser' Z
6 years ago
1 changed files with 235 additions and 0 deletions
@ -0,0 +1,235 @@
|
||||
package Subtitle::Format::SMI; |
||||
|
||||
use strict; |
||||
use warnings; |
||||
use utf8; |
||||
|
||||
use Subtitle::Event; |
||||
use Subtitle::Utils qw(:string :timing); |
||||
|
||||
use base 'Subtitle::Format'; |
||||
|
||||
sub new { |
||||
my ($class, %args) = @_; |
||||
my $self = { |
||||
debug => $args{debug} || 0, |
||||
eol => $args{eol} || "\n", |
||||
title => '', |
||||
style => undef, |
||||
params => {}, |
||||
events => [], |
||||
log => [], |
||||
}; |
||||
|
||||
return bless($self, $class); |
||||
} |
||||
|
||||
sub new_event { return Subtitle::Event->new; } |
||||
|
||||
sub parse { |
||||
my ($self, $lines) = @_; |
||||
my $text = join("\n", @{ $lines }); |
||||
|
||||
## the code below taken from HTML::SimpleParse 0.12 |
||||
## and modified for modern perl versions |
||||
my @tokens; |
||||
while (1) { |
||||
my ($content, $type); |
||||
|
||||
# First we try to pull off any plain text (anything before a "<" char) |
||||
if ($text =~ /\G([^<]+)/gcs) { |
||||
$content = $1; $type = 'text'; |
||||
trim($content); |
||||
chomp_all($content); |
||||
|
||||
# Then, SSI, comments, and markup declarations (usually <!DOCTYPE...>) |
||||
# ssi: <!--#stuff--> |
||||
# comment: <!--stuff--> |
||||
# markup: <!stuff> |
||||
} elsif ($text =~ /\G<(!--(\#?).*?--)>/gcs) { |
||||
$type = ($2 ? 'ssi' : 'comment'); |
||||
$content = $1; |
||||
} elsif ($text =~ /\G<(!.*?)>/gcs) { |
||||
$type = 'markup'; |
||||
$content = $1; |
||||
|
||||
# Then, look for an end tag |
||||
} elsif ($text =~ m|\G</([a-zA-Z][a-zA-Z0-9\.\-]*\s*)>|gcs) { |
||||
$content = lc($1); $type = 'endtag'; |
||||
|
||||
# Then, finally we look for a start tag |
||||
# We know the first char is <, make sure there's a > |
||||
} elsif ($text =~ /\G<(.*?)>/gcs) { |
||||
$content = lc($1); $type = 'starttag'; |
||||
|
||||
} else { |
||||
# the string is exhausted, or there's no > in it. |
||||
push @tokens, { |
||||
'content' => substr($text, pos $text), |
||||
'type' => 'text', |
||||
} unless (pos($text) eq length($text)); |
||||
last; |
||||
} |
||||
## end of imported code |
||||
|
||||
push @tokens, { |
||||
'content' => $content, |
||||
'type' => $type, |
||||
'offset' => ($type eq 'text' ? |
||||
pos($text) - length($content) : |
||||
pos($text) - length($content) - 2), |
||||
} unless $type eq 'text' and length($content) == 0; |
||||
} |
||||
|
||||
my @stack; |
||||
my $event; |
||||
my $path = ''; |
||||
foreach my $t (@tokens) { |
||||
if ($t->{type} eq 'starttag') { |
||||
my ($n) = ($t->{content} =~ m{^(\S+)}); |
||||
$t->{name} = $n; |
||||
pos($t->{content}) = length($n); |
||||
while ($t->{content} =~ m<\G\s+(\S+)=['"]?(\S+)['"]?>gsc) { |
||||
$t->{params}->{$1} = $2; |
||||
} |
||||
if (_is_markup_tag($n)) { |
||||
# skip: ignore |
||||
} elsif (@stack and $stack[-1]->{name} eq $n) { |
||||
# skip: same tag |
||||
} else { |
||||
$path .= '/' . lc($n); |
||||
push @stack, $t; |
||||
} |
||||
} elsif ($t->{type} eq 'endtag') { |
||||
unless (_is_markup_tag($t->{content})) { |
||||
my $n = $t->{content}; |
||||
$path =~ s</$n.*?$><>i; |
||||
pop @stack; |
||||
} |
||||
next; |
||||
} |
||||
# |
||||
if ($path eq '/sami/head/title' and $t->{type} eq 'text') { |
||||
$self->{title} = $t->{content}; |
||||
} elsif ($path eq '/sami/head/samiparam' and $t->{type} eq 'comment') { |
||||
my $c = _comment_content($t->{content}); |
||||
$c =~ m{(\S+):\s*(\S+)}; |
||||
$self->{params}->{lc($1)} = $2 if $1; |
||||
} elsif ($path eq '/sami/head/style' and $t->{type} eq 'comment') { |
||||
$self->{style} = _comment_content($t->{content}); |
||||
} elsif ($path eq '/sami/body/table/sync') { |
||||
if ($t->{type} eq 'starttag' and $t->{name} eq 'sync') { |
||||
my $time = $t->{params}->{start}; |
||||
if ($event and $time) { |
||||
$event->t_end($time); |
||||
push @{ $self->{events} }, $event; |
||||
undef $event; |
||||
} |
||||
$event = $self->new_event; |
||||
$event->t_start($time) if defined $time; |
||||
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'p') { |
||||
$self->log(error => "<p> tag outside event at offset $t->{offset}") |
||||
unless $event; |
||||
$t->{content} =~ m<class=['"]?([^'"]+)>i; |
||||
$event->{class} = $1 if $1; |
||||
} elsif ($t->{type} eq 'starttag' and $t->{name} eq 'br') { |
||||
my $c = $event->text; |
||||
$event->text($c . "\\n"); |
||||
} elsif ($t->{type} eq 'text') { |
||||
if ($t->{content} eq ' ') { |
||||
undef $event; # empty event used to hide previous one |
||||
} else { |
||||
my $c = $event->text; |
||||
$c .= $t->{content}; |
||||
$c =~ tr/\ \t//s; # remove multiple spaces |
||||
$event->text($c); |
||||
} |
||||
} else { |
||||
} |
||||
} else { |
||||
#print "unhandled token: $t->{type} -- $t->{content}\n"; |
||||
} |
||||
} # foreach @tokens |
||||
if ($event and my $t = $self->{params}->{length}) { |
||||
$event->t_end($t); |
||||
push @{ $self->{events} }, $event; |
||||
undef $event; |
||||
} |
||||
return scalar @{ $self->{events} }; |
||||
} |
||||
|
||||
sub build { |
||||
my ($self) = @_; |
||||
my @lines; |
||||
push @lines, "<sami>"; |
||||
push @lines, " <head>"; |
||||
if ($self->{title}) { |
||||
my $line = " " x 4 . _tag_wrap('title', $self->{title}); |
||||
push @lines, $line; |
||||
} |
||||
if (scalar keys %{ $self->{params} }) { |
||||
while (my ($k, $v) = each %{ $self->{params} }) { |
||||
my $c = sprintf '<!-- %s: %s -->', ucfirst($k), $v; |
||||
my $line = " " x 4 . _tag_wrap('samiparam', $c); |
||||
push @lines, $line; |
||||
} |
||||
} |
||||
if ($self->{style}) { |
||||
push @lines, ' <style type="text/css"><!--'; |
||||
push @lines, $self->{style}; |
||||
push @lines, ' --></style>'; |
||||
} |
||||
push @lines, " </head>"; |
||||
push @lines, " <body>"; |
||||
push @lines, " <table>"; |
||||
my @events = $self->events; |
||||
while (my $e = shift @events) { |
||||
my $t = $e->text; |
||||
$t =~ s{\\n}{<br>}smg; |
||||
my $l = _tag_wrap('p', $t, {class => $e->{class}}); |
||||
push @lines, " " x 6 . _tag_wrap('sync', $l, {start => $e->t_start}); |
||||
if (@events and $e->t_end == $events[0]->t_start) { |
||||
# next event replaces current without gap, |
||||
# no need to hide if explicitly |
||||
next; |
||||
} else { |
||||
my $t = _tag_wrap('p', ' '); |
||||
push @lines, " " x 6 . _tag_wrap('sync', $t, {start => $e->t_end}); |
||||
} |
||||
} |
||||
push @lines, " </table>"; |
||||
push @lines, " </body>"; |
||||
push @lines, "</sami>", ""; |
||||
return join($self->{eol}, @lines); |
||||
} |
||||
|
||||
sub _tag_wrap { |
||||
my ($name, $content, $attrs) = @_; |
||||
my @attrs; |
||||
if ($attrs and ref($attrs) eq 'HASH') { |
||||
while (my ($k, $v) = each %{ $attrs }) { |
||||
next unless defined $v; |
||||
push @attrs, sprintf(' %s=%s', $k, $v); |
||||
} |
||||
} |
||||
return sprintf("<%s%s>%s</%s>", $name, join('', @attrs), $content, $name); |
||||
} |
||||
|
||||
sub _is_markup_tag { |
||||
my $t = lc(shift); |
||||
return 1 if $t eq 'p'; |
||||
return 1 if $t eq 'br'; |
||||
return; |
||||
} |
||||
|
||||
sub _comment_content { |
||||
my $c = shift; |
||||
$c =~ s<(^!-*\s*|\s*-*$)><>go; # strip !-- and -- |
||||
$c =~ s<^[\r\n]*><>o; |
||||
$c =~ s<[\r\n]*$><>o; |
||||
$c =~ s<^\s+><>gms; |
||||
trim($c); |
||||
return $c; |
||||
} |
||||
|
||||
1; |
Loading…
Reference in new issue