diff --git a/lib/Subtitle/Format/SMI.pm b/lib/Subtitle/Format/SMI.pm new file mode 100644 index 0000000..5f84462 --- /dev/null +++ b/lib/Subtitle/Format/SMI.pm @@ -0,0 +1,235 @@ +package Subtitle::Format::SMI; + +use strict; +use warnings; +use utf8; + +use Subtitle::Event; +use Subtitle::Utils qw(:string :timing); + +use base 'Subtitle::Format'; + +sub new { + my ($class, %args) = @_; + my $self = { + debug => $args{debug} || 0, + eol => $args{eol} || "\n", + title => '', + style => undef, + params => {}, + events => [], + log => [], + }; + + return bless($self, $class); +} + +sub new_event { return Subtitle::Event->new; } + +sub parse { + my ($self, $lines) = @_; + my $text = join("\n", @{ $lines }); + + ## the code below taken from HTML::SimpleParse 0.12 + ## and modified for modern perl versions + my @tokens; + while (1) { + my ($content, $type); + + # First we try to pull off any plain text (anything before a "<" char) + if ($text =~ /\G([^<]+)/gcs) { + $content = $1; $type = 'text'; + trim($content); + chomp_all($content); + + # Then, SSI, comments, and markup declarations (usually ) + # ssi: + # comment: + # markup: + } elsif ($text =~ /\G<(!--(\#?).*?--)>/gcs) { + $type = ($2 ? 'ssi' : 'comment'); + $content = $1; + } elsif ($text =~ /\G<(!.*?)>/gcs) { + $type = 'markup'; + $content = $1; + + # Then, look for an end tag + } elsif ($text =~ m|\G([a-zA-Z][a-zA-Z0-9\.\-]*\s*)>|gcs) { + $content = lc($1); $type = 'endtag'; + + # Then, finally we look for a start tag + # We know the first char is <, make sure there's a > + } elsif ($text =~ /\G<(.*?)>/gcs) { + $content = lc($1); $type = 'starttag'; + + } else { + # the string is exhausted, or there's no > in it. + push @tokens, { + 'content' => substr($text, pos $text), + 'type' => 'text', + } unless (pos($text) eq length($text)); + last; + } + ## end of imported code + + push @tokens, { + 'content' => $content, + 'type' => $type, + 'offset' => ($type eq 'text' ? + pos($text) - length($content) : + pos($text) - length($content) - 2), + } unless $type eq 'text' and length($content) == 0; + } + + my @stack; + my $event; + my $path = ''; + foreach my $t (@tokens) { + if ($t->{type} eq 'starttag') { + my ($n) = ($t->{content} =~ m{^(\S+)}); + $t->{name} = $n; + pos($t->{content}) = length($n); + while ($t->{content} =~ m<\G\s+(\S+)=['"]?(\S+)['"]?>gsc) { + $t->{params}->{$1} = $2; + } + if (_is_markup_tag($n)) { + # skip: ignore + } elsif (@stack and $stack[-1]->{name} eq $n) { + # skip: same tag + } else { + $path .= '/' . lc($n); + push @stack, $t; + } + } elsif ($t->{type} eq 'endtag') { + unless (_is_markup_tag($t->{content})) { + my $n = $t->{content}; + $path =~ s$n.*?$><>i; + pop @stack; + } + next; + } + # + if ($path eq '/sami/head/title' and $t->{type} eq 'text') { + $self->{title} = $t->{content}; + } elsif ($path eq '/sami/head/samiparam' and $t->{type} eq 'comment') { + my $c = _comment_content($t->{content}); + $c =~ m{(\S+):\s*(\S+)}; + $self->{params}->{lc($1)} = $2 if $1; + } elsif ($path eq '/sami/head/style' and $t->{type} eq 'comment') { + $self->{style} = _comment_content($t->{content}); + } elsif ($path eq '/sami/body/table/sync') { + if ($t->{type} eq 'starttag' and $t->{name} eq 'sync') { + my $time = $t->{params}->{start}; + if ($event and $time) { + $event->t_end($time); + push @{ $self->{events} }, $event; + undef $event; + } + $event = $self->new_event; + $event->t_start($time) if defined $time; + } elsif ($t->{type} eq 'starttag' and $t->{name} eq 'p') { + $self->log(error => "
tag outside event at offset $t->{offset}")
+ unless $event;
+ $t->{content} =~ m";
+ my @events = $self->events;
+ while (my $e = shift @events) {
+ my $t = $e->text;
+ $t =~ s{\\n}{
";
+ push @lines, " ";
+ push @lines, "
}smg;
+ my $l = _tag_wrap('p', $t, {class => $e->{class}});
+ push @lines, " " x 6 . _tag_wrap('sync', $l, {start => $e->t_start});
+ if (@events and $e->t_end == $events[0]->t_start) {
+ # next event replaces current without gap,
+ # no need to hide if explicitly
+ next;
+ } else {
+ my $t = _tag_wrap('p', ' ');
+ push @lines, " " x 6 . _tag_wrap('sync', $t, {start => $e->t_end});
+ }
+ }
+ push @lines, "