package Subtitle::SRT; use strict; use warnings; use feature qw(switch); use utf8; sub new { my ($class, %args) = @_; my $self = { debug => 0, eol => "\n", %args, events => [], log => [], }; return bless($self, $class); } sub trim { my ($self, $line) = @_; return unless defined $line; return $line =~ s/(^\s+|\s+$)//or; } sub log { my ($self, $level, $msg) = @_; given ($level) { when ("error") { push @{ $self->{log} }, "E: $msg"; } when ("warn") { push @{ $self->{log} }, "W: $msg"; } when ("info") { push @{ $self->{log} }, "I: $msg"; } when ("debug") { push @{ $self->{log} }, "D: $msg" if $self->{debug}; } default { warn "Unknown loglevel $level of $msg\n"; } } return 1; } sub parse_timing { my ($self, $str) = @_; my $time = 0.0; return unless $str =~ m/(\d+):(\d+):(\d+)([.,])(\d+)/oi; my ($hrs, $min, $sec, $delim, $msec) = ($1, $2, $3, $4, $5); if ($msec < 0 or $msec > 999) { $self->log(warn => "wrong mseconds part of timing: $msec"); return; } if ($sec < 0 or $sec > 59) { $self->log(warn => "wrong seconds part of timing: $sec"); return; } if ($min < 0 or $min > 59) { $self->log(warn => "wrong minutes part of timing: $sec"); return; } if ($hrs < 0) { $self->log(warn => "wrong minutes part of timing: $sec"); return; } given (length("$msec")) { when ("3") { $time += $msec * 0.001; } when ("2") { $time += $msec * 0.01; } when ("1") { $time += $msec * 0.1; } default { die("abnormal length of mseconds part"); } } $time += $sec; $time += $min * 60; $time += $hrs * 60 * 60; return $time; } sub new_event { return +{ id => undef, timing => undef, text => undef }; } sub parse { my ($self, $lines) = @_; my $expected = 'id'; my $linenum = 0; my $event; foreach my $line (@$lines) { $linenum++; $line =~ s/[\r\n]+$//o; # chomp # expected: event id if ($line and not $event) { $event = $self->new_event; $event->{id} = $self->trim($line); $self->log(debug => "New event with id: $event->{id}"); next; } # expected: timing if ($line and $event and not $event->{timing}) { $self->log(debug => "Expecting timing line at $linenum"); my $timing = []; my ($start, $end, $rest) = ($line =~ m/(\S+)\s*-->\s*(\S+)(.*)/o); unless ($start and $end) { $self->log(warn => "Expected timing but found `$line` at $linenum, skipped"); next; } unless (defined ($timing->[0] = $self->parse_timing($start))) { $self->log(warn => "Can't parse timing at line $linenum: $start"); next; } unless (defined ($timing->[1] = $self->parse_timing($end))) { $self->log(warn => "Can't parse timing at line $linenum: $end"); next; } $event->{timing} = $timing; $rest = $self->trim($rest); next unless $rest; # extension: timing # example: X1:050 X2:669 Y1:234 Y2:436 if ($rest =~ m/([XY][12]:(\d+))/io) { foreach my $token (split(/\s+/, $rest)) { $event->{coords} //= {}; if ($token =~ m/([XY][12]):(\d+)/io) { $event->{coords}->{lc($1)} = $2; next; } $self->log(warn => "Garbage detected instead event coord: $token"); } unless (scalar keys %{ $event->{coords} } == 4) { delete $event->{coords}; $self->log(warn => "Incomplete coord set for event at line $linenum"); } next; } # extension: ssa-like style # example: SSA: Dialogue, Layer: 0, Style: Reply, Name: NTP, MarginL: 0300, MarginR: 0000, MarginV: 0300, Effect: !Effect if ($rest =~ m/SSA:\s*\S+,\s*(.+)/o) { $rest = $1; foreach my $token (split(/\s*,\s*/, $rest)) { next unless $token =~ m/^(\S+):\s*(.+)/o; $event->{style} //= {}; $event->{style}->{lc($1)} = $self->trim($2); } next; } $self->log(warn => "garbage after timing at line $linenum: $rest"); next; } # expected: empty line if ($event and not $line) { $self->log(debug => "Empty line at line $linenum -> finalize"); push @{ $self->{events} }, $event; undef $event; next; } # expected: event text if ($line and $event and $event->{timing}) { $self->log(debug => "Text line at $linenum -> append"); if ($event->{text}) { $event->{text} .= $self->{eol} . $self->trim($line); } else { $event->{text} = $self->trim($line); } next; } } # finalize last event if ($event and $event->{timing} and $event->{text}) { push @{ $self->{events} }, $event; } return scalar @{ $self->{events} }; } 1;