From b25045cb4c7d98a8351bcdef9500828cf38a2c32 Mon Sep 17 00:00:00 2001 From: Alex 'AdUser' Z Date: Thu, 19 Feb 2015 22:29:27 +1000 Subject: [PATCH] * Subtitle::SRT->parse : completed --- lib/Subtitle/SRT.pm | 105 ++++++++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 32 deletions(-) diff --git a/lib/Subtitle/SRT.pm b/lib/Subtitle/SRT.pm index a80c50b..7c943dd 100644 --- a/lib/Subtitle/SRT.pm +++ b/lib/Subtitle/SRT.pm @@ -6,22 +6,21 @@ use feature qw(switch); use utf8; sub new { - my ($class) = @_; - my $self = { events => [], debug => 0, log => [] }; + my ($class, %args) = @_; + my $self = { + debug => 0, + eol => "\n", + %args, + events => [], + log => [], + }; return bless($self, $class); } -sub load { - my ($self, $file) = @_; - my $cnt = 0; - - return $cnt; -} - sub trim { my ($self, $line) = @_; - return unless $line; + return unless defined $line; return $line =~ s/(^\s+|\s+$)//or; } @@ -41,7 +40,35 @@ sub log { sub parse_timing { my ($self, $str) = @_; - ... + my $time = 0.0; + return unless $str =~ m/(\d+):(\d+):(\d+)([.,])(\d+)/oi; + my ($hrs, $min, $sec, $delim, $msec) = ($1, $2, $3, $4, $5); + if ($msec < 0 or $msec > 999) { + $self->log(warn => "wrong mseconds part of timing: $msec"); + return; + } + if ($sec < 0 or $sec > 59) { + $self->log(warn => "wrong seconds part of timing: $sec"); + return; + } + if ($min < 0 or $min > 59) { + $self->log(warn => "wrong minutes part of timing: $sec"); + return; + } + if ($hrs < 0) { + $self->log(warn => "wrong minutes part of timing: $sec"); + return; + } + given (length("$msec")) { + when ("3") { $time += $msec * 0.001; } + when ("2") { $time += $msec * 0.01; } + when ("1") { $time += $msec * 0.1; } + default { die("abnormal length of mseconds part"); } + } + $time += $sec; + $time += $min * 60; + $time += $hrs * 60 * 60; + return $time; } sub new_event { return +{ id => undef, timing => undef, text => undef }; } @@ -54,33 +81,33 @@ sub parse { foreach my $line (@$lines) { $linenum++; - unless ($line and $event) { - $self->log(debug => "empty line and event"); - next; - } + $line =~ s/[\r\n]+$//o; # chomp # expected: event id - if ($line and not defined $event) { + if ($line and not $event) { $event = $self->new_event; $event->{id} = $self->trim($line); - $self->log(debug => "new event with id: $event->{id}"); + $self->log(debug => "New event with id: $event->{id}"); + next; } # expected: timing - if ($event and not $event->{timing}) { + if ($line and $event and not $event->{timing}) { + $self->log(debug => "Expecting timing line at $linenum"); my $timing = []; my ($start, $end, $rest) = ($line =~ m/(\S+)\s*-->\s*(\S+)(.*)/o); unless ($start and $end) { - $self->log(warn => "expected timing but found `$line` at $linenum, skipped"); + $self->log(warn => "Expected timing but found `$line` at $linenum, skipped"); next; } - unless ($timing->[0] = $self->parse_timing($start)) { + unless (defined ($timing->[0] = $self->parse_timing($start))) { $self->log(warn => "Can't parse timing at line $linenum: $start"); next; } - unless ($timing->[1] = $self->parse_timing($end)) { + unless (defined ($timing->[1] = $self->parse_timing($end))) { $self->log(warn => "Can't parse timing at line $linenum: $end"); next; } $event->{timing} = $timing; + $rest = $self->trim($rest); next unless $rest; # extension: timing # example: X1:050 X2:669 Y1:234 Y2:436 @@ -93,7 +120,7 @@ sub parse { } $self->log(warn => "Garbage detected instead event coord: $token"); } - unless (scalar @{ $event->{coords}} == 4) { + unless (scalar keys %{ $event->{coords} } == 4) { delete $event->{coords}; $self->log(warn => "Incomplete coord set for event at line $linenum"); } @@ -101,25 +128,39 @@ sub parse { } # extension: ssa-like style # example: SSA: Dialogue, Layer: 0, Style: Reply, Name: NTP, MarginL: 0300, MarginR: 0000, MarginV: 0300, Effect: !Effect - ... - } - # expected: event text - if ($event and $event->{timing}) { - if ($event->{text}) { - $event->{text} .= " " . $self->trim($line); - } else { - $event->{text} = $self->trim($line); + if ($rest =~ m/SSA:\s*\S+,\s*(.+)/o) { + $rest = $1; + foreach my $token (split(/\s*,\s*/, $rest)) { + next unless $token =~ m/^(\S+):\s*(.+)/o; + $event->{style} //= {}; + $event->{style}->{lc($1)} = $self->trim($2); + } + next; } + $self->log(warn => "garbage after timing at line $linenum: $rest"); next; } # expected: empty line if ($event and not $line) { - $self->log(debug => "finalize event line $linenum"); + $self->log(debug => "Empty line at line $linenum -> finalize"); push @{ $self->{events} }, $event; undef $event; next; } - die("unhandled line: $linenum:$line\n"); + # expected: event text + if ($line and $event and $event->{timing}) { + $self->log(debug => "Text line at $linenum -> append"); + if ($event->{text}) { + $event->{text} .= $self->{eol} . $self->trim($line); + } else { + $event->{text} = $self->trim($line); + } + next; + } + } + # finalize last event + if ($event and $event->{timing} and $event->{text}) { + push @{ $self->{events} }, $event; } return scalar @{ $self->{events} };