package Subtitle::Format::SMI; use strict; use warnings; use utf8; use Subtitle::Event; use Subtitle::Utils qw(:string :timing); use base 'Subtitle::Format'; sub new { my ($class, %args) = @_; my $self = { debug => $args{debug} || 0, eol => $args{eol} || "\n", title => '', style => undef, params => {}, events => [], log => [], }; return bless($self, $class); } sub new_event { return Subtitle::Event->new; } sub parse { my ($self, $lines) = @_; my $text = join("\n", @{ $lines }); ## the code below taken from HTML::SimpleParse 0.12 ## and modified for modern perl versions my @tokens; while (1) { my ($content, $type); # First we try to pull off any plain text (anything before a "<" char) if ($text =~ /\G([^<]+)/gcs) { $content = $1; $type = 'text'; trim($content); chomp_all($content); # Then, SSI, comments, and markup declarations (usually ) # ssi: # comment: # markup: } elsif ($text =~ /\G<(!--(\#?).*?--)>/gcs) { $type = ($2 ? 'ssi' : 'comment'); $content = $1; } elsif ($text =~ /\G<(!.*?)>/gcs) { $type = 'markup'; $content = $1; # Then, look for an end tag } elsif ($text =~ m|\G([a-zA-Z][a-zA-Z0-9\.\-]*\s*)>|gcs) { $content = lc($1); $type = 'endtag'; # Then, finally we look for a start tag # We know the first char is <, make sure there's a > } elsif ($text =~ /\G<(.*?)>/gcs) { $content = lc($1); $type = 'starttag'; } else { # the string is exhausted, or there's no > in it. push @tokens, { 'content' => substr($text, pos $text), 'type' => 'text', } unless (pos($text) eq length($text)); last; } ## end of imported code push @tokens, { 'content' => $content, 'type' => $type, 'offset' => ($type eq 'text' ? pos($text) - length($content) : pos($text) - length($content) - 2), } unless $type eq 'text' and length($content) == 0; } my @stack; my $event; my $path = ''; foreach my $t (@tokens) { if ($t->{type} eq 'starttag') { my ($n) = ($t->{content} =~ m{^(\S+)}); $t->{name} = $n; pos($t->{content}) = length($n); while ($t->{content} =~ m<\G\s+(\S+)=['"]?(\S+)['"]?>gsc) { $t->{params}->{$1} = $2; } if (_is_markup_tag($n)) { # skip: ignore } elsif (@stack and $stack[-1]->{name} eq $n) { # skip: same tag } else { $path .= '/' . lc($n); push @stack, $t; } } elsif ($t->{type} eq 'endtag') { unless (_is_markup_tag($t->{content})) { my $n = $t->{content}; $path =~ s$n.*?$><>i; pop @stack; } next; } # if ($path eq '/sami/head/title' and $t->{type} eq 'text') { $self->{title} = $t->{content}; } elsif ($path eq '/sami/head/samiparam' and $t->{type} eq 'comment') { my $c = _comment_content($t->{content}); $c =~ m{(\S+):\s*(\S+)}; $self->{params}->{lc($1)} = $2 if $1; } elsif ($path eq '/sami/head/style' and $t->{type} eq 'comment') { $self->{style} = _comment_content($t->{content}); } elsif ($path eq '/sami/body/table/sync') { if ($t->{type} eq 'starttag' and $t->{name} eq 'sync') { my $time = $t->{params}->{start}; if ($event and $time) { $event->t_end($time); push @{ $self->{events} }, $event; undef $event; } $event = $self->new_event; $event->t_start($time) if defined $time; } elsif ($t->{type} eq 'starttag' and $t->{name} eq 'p') { $self->log(error => "
tag outside event at offset $t->{offset}")
unless $event;
$t->{content} =~ m";
my @events = $self->events;
while (my $e = shift @events) {
my $t = $e->text;
$t =~ s{\\n}{
";
push @lines, " ";
push @lines, "
}smg;
my $l = _tag_wrap('p', $t, {class => $e->{class}});
push @lines, " " x 6 . _tag_wrap('sync', $l, {start => $e->t_start});
if (@events and $e->t_end == $events[0]->t_start) {
# next event replaces current without gap,
# no need to hide if explicitly
next;
} else {
my $t = _tag_wrap('p', ' ');
push @lines, " " x 6 . _tag_wrap('sync', $t, {start => $e->t_end});
}
}
push @lines, "