You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
297 lines
8.1 KiB
297 lines
8.1 KiB
package Text::Dokuwiki::Parser; |
|
|
|
use strict; |
|
use warnings; |
|
use utf8; |
|
|
|
use Text::Dokuwiki::Regexps; |
|
|
|
sub new { |
|
my ($class) = @_; |
|
my $self = { |
|
footnotes => [], |
|
}; |
|
|
|
return bless($self, $class); |
|
} |
|
|
|
sub _parse_list { |
|
my ($self, $lines) = @_; |
|
my @lists = (); |
|
my @stack = ( \@lists ); |
|
my $types = {curr => '', last => ''}; |
|
my $level = {curr => 0, last => 0}; |
|
|
|
foreach my $line (@{ $lines }) { |
|
$line =~ m/^(\s+)/o; |
|
my ($ident, $dot, $rest) = ($line =~ m/^((?:\s{2})+)([\*-])\s*(.+)/); |
|
$level->{last} = $level->{curr}; |
|
$level->{curr} = $ident =~ tr/ / /; |
|
|
|
$types->{last} = $types->{curr}; |
|
$types->{curr} = ($dot eq '-') ? 'ol' : 'ul'; |
|
|
|
if ($level->{curr} == $level->{last} and |
|
$types->{curr} ne $types->{last}) { |
|
pop @stack; |
|
my $list = [$types->{curr} => {}]; |
|
push @{ $stack[-1] }, (@stack > 1) ? [li => {}, $list] : $list; |
|
push @stack, $list; |
|
} |
|
if ($level->{curr} > $level->{last}) { |
|
my $list = [$types->{curr} => {}]; |
|
push @{ $stack[-1] }, (@stack > 1) ? [li => {}, $list] : $list; |
|
push @stack, $list; |
|
} |
|
if ($level->{curr} < $level->{last}) { |
|
pop @stack; |
|
} |
|
push @{ $stack[-1] }, [li => {}, $self->_parse_text($rest)]; |
|
} |
|
pop @stack while @stack; |
|
|
|
return @lists; |
|
} |
|
|
|
sub _parse_table { |
|
my ($self, $lines) = @_; |
|
my ($i, $j, $colspan) = (0, 0, 0); |
|
my @rows = (); |
|
|
|
foreach my $line (@{ $lines }) { |
|
my @row = (); |
|
for ($j = 0; ;$j++) { |
|
last if ($line eq '|' or $line eq '^'); |
|
next if ($line !~ m/^(([\|\^])([^\|\^]*))/o); |
|
my $attrs = {}; |
|
my ($all, $key, $value) = ($1, $2, $3); |
|
$line = substr($line, length($all)); |
|
my $type = ($key eq '^') ? 'th' : 'td'; |
|
my ($lpad, $content, $rpad) = ($value =~ m/^(\s*)(.*?)(\s*)$/o); |
|
if ($line =~ m/^([\|\^]{2,})/o) { |
|
# colspan detected; |
|
$attrs->{colspan} = $colspan = length($1); |
|
} elsif (index($content, ":::") >= 0) { |
|
# rowspan detected |
|
for (my $k = $i - 1; $k >= 0; $k--) { # k is idx of prev rows |
|
my $cellptr = $rows[$k][$j + 2]; # +2 for (tr => {}) |
|
next if (index($cellptr->[2], ":::") >= 0); # also rowspan |
|
$cellptr->[1]->{rowspan} //= 1; # init attr, if missing |
|
$cellptr->[1]->{rowspan} += 1; # incr value |
|
last; |
|
} |
|
$attrs->{skip} = 1; |
|
} elsif ($colspan > 1) { |
|
# colspan-eaten column |
|
$attrs->{skip} = 1; |
|
$colspan -= 1; |
|
} |
|
if ($lpad eq '') { |
|
$attrs->{align} = ($rpad eq '') ? 'center' : 'left'; |
|
} else { |
|
$attrs->{align} = ($rpad eq '') ? 'right' : 'center'; |
|
} |
|
push @row, [$type => $attrs, $content]; |
|
} |
|
push @rows, [tr => {}, @row]; |
|
$i++; |
|
} |
|
|
|
return [table => {}, @rows]; |
|
} |
|
|
|
sub _parse_link { |
|
my ($self, $content) = @_; |
|
my ($href, @text); |
|
|
|
if ((my $pos = index($content, "|")) >= 0) { |
|
$href = substr($content, 0, $pos); |
|
@text = $self->_parse_text(substr($content, $pos + 1)); |
|
} else { |
|
$href = $content; |
|
@text = ($content); |
|
} |
|
|
|
unless ($href =~ m!^[a-z]+://!io) { |
|
$href =~ s{:}{/}go; |
|
} |
|
|
|
return [a => {href => $href}, @text]; |
|
} |
|
|
|
sub _parse_include { |
|
my ($self, $content) = @_; |
|
|
|
$content =~ s/^{{//o; |
|
$content =~ s/}}$//o; |
|
|
|
if ($content =~ m|^([a-z]+)>(.*)|oi) { |
|
warn "ignored: `$content`, unimplemented\n"; |
|
return; |
|
} |
|
|
|
my ($lpad, $rpad, $src, %attrs) = ('', '', ''); |
|
if ($content =~ m!^(\s*)([^\s\|]+)(\s*)[|](.*)!oi) { |
|
($lpad, $src, $rpad) = ($1, $2, $3); |
|
$attrs{title} = $4; |
|
} elsif ($content =~ m!^(\s*)(\S+)(\s*)!) { |
|
($lpad, $src, $rpad) = ($1, $2, $3); |
|
} else { |
|
($src) = ($content =~ s/^(.+)/$1/or); |
|
} |
|
|
|
$attrs{align} = ($lpad ne '') |
|
? (($rpad ne '') ? 'center' : 'right') |
|
: 'left'; |
|
|
|
$src =~ s<\?(\d+)(?:x(\d+))?> |
|
< |
|
$attrs{width} = $1 if $1; |
|
$attrs{height} = $2 if $2; |
|
'' |
|
>ioe; |
|
|
|
$src =~ s|:|/|go; |
|
$attrs{src} = $src; |
|
|
|
return [img => \%attrs]; |
|
} |
|
|
|
my $inlines = { |
|
"__" => "__", # underline |
|
"//" => "//", # italic |
|
"**" => "**", # bold |
|
"''" => "''", # monospace |
|
"{{" => "}}", # module/image |
|
"[[" => "]]", # link |
|
"((" => "))", # footnote |
|
}; |
|
sub _parse_text { |
|
my ($self, $line) = @_; |
|
my ($endtag, $endpos, $content); |
|
my @parts = (); |
|
|
|
while ($line) { |
|
$line =~ m!^(?:(.*?)(__|//|''|\*\*|\[\[|\{\{|\(\(|\\\\ ))?(.*)!o; |
|
my ($before, $match, $after) = ($1, $2, $3); |
|
if ($before) { |
|
push @parts, $before; |
|
$line = $match . $after; |
|
next; |
|
} |
|
if ($after and not $match) { |
|
push @parts, $after; |
|
last; |
|
} |
|
|
|
if ($match eq '\\\\ ') { # force newline |
|
push @parts, [br => {}]; |
|
$line = $after; |
|
next; |
|
} elsif ($match) { |
|
$endtag = $inlines->{$match}; |
|
$endpos = index($after, $endtag, 0); |
|
if ($endpos < 0) { # no closing marker? |
|
push @parts, $match; |
|
$line = $after; |
|
next; |
|
} |
|
$content = substr($after, 0, $endpos); |
|
$line = substr($after, $endpos + 2); |
|
} |
|
|
|
my ($tag, %attrs); |
|
if ($match eq "//") { $tag = 'i'; } |
|
elsif ($match eq "**") { $tag = 'b'; } |
|
elsif ($match eq "__") { $tag = 'u'; } # should be rendered as <span style=""> |
|
elsif ($match eq "''") { $tag = 'code'; } |
|
elsif ($match eq "{{") { push @parts, $self->_parse_include($content); next; } |
|
elsif ($match eq "[[") { push @parts, $self->_parse_link($content); next; } |
|
elsif ($match eq "((") { |
|
push @{ $self->{footnotes} }, $self->_parse_text($content); |
|
my $n = scalar @{ $self->{footnotes} }; |
|
push @parts, [a => {href => "#note_$n", class => 'footnote'}, $n]; |
|
next; |
|
} |
|
elsif ($match) { die("parser failed, unrecognized inline: $match\n"); } |
|
|
|
push @parts, [$tag => \%attrs, $self->_parse_text($content)]; |
|
} |
|
|
|
return @parts; |
|
} |
|
|
|
sub parse { |
|
my ($self, $text) = @_; |
|
|
|
my $rx = $Text::Dokuwiki::Regexps::regexps; |
|
my (@tree); |
|
my $len = length($text); |
|
pos($text) = 0; |
|
|
|
while ($len - pos($text) > 0) { |
|
my $pos = pos($text); |
|
if (0) { # TODO: MACRO |
|
} elsif ($text =~ m/\G$rx->{header}/cgi) { |
|
my $level = length($+{line}); |
|
$level = 7 - $level; # invert |
|
push @tree, ["h$level" => {}, $+{header}]; |
|
} elsif ($text =~ m/\G$rx->{hrule}/cgi) { |
|
push @tree, [hr => {}]; |
|
} elsif ($text =~ m/\G$rx->{codeblock}/cgi) { |
|
my $attrs = {}; |
|
$attrs->{type} = $+{tag}; |
|
$attrs->{file} = $+{filename} if $+{filename}; |
|
$attrs->{syntax} = $+{syntax} if $+{syntax}; |
|
push @tree, [codeblock => $attrs, $+{block}]; |
|
} elsif ($text =~ m/\G$rx->{table}/cgi) { |
|
my $lines = $+{table}; |
|
chomp $lines; |
|
push @tree, $self->_parse_table([ split(/\r?\n/, $lines) ]); |
|
} elsif ($text =~ m/\G$rx->{list}/cgi) { |
|
my $lines = $+{list}; |
|
chomp $lines; |
|
push @tree, $self->_parse_list([ split(/\r?\n/, $lines) ]); |
|
} elsif ($text =~ m/\G$rx->{blockquote}/cgi) { |
|
push @tree, [blockquote => {}, $+{block}]; |
|
} elsif ($text =~ m/\G$rx->{pre}/cgi) { |
|
push @tree, [pre => {}, $+{block}]; |
|
} elsif ($text =~ m/\G$rx->{paragraph}/cgi) { |
|
my $text = $+{text}; |
|
chomp $text; |
|
push @tree, [p => {}, $self->_parse_text($text)]; |
|
} elsif ($text =~ m/\G$rx->{emptyline}/cgi) { |
|
next; |
|
} else { |
|
my $msg = "Unmatched: '" . substr($text, pos($text)) . "'\n"; |
|
die($msg); |
|
} |
|
if (pos($text) <= $pos) { |
|
die("parser failed, abort\n"); |
|
} |
|
} |
|
|
|
if (scalar @{ $self->{footnotes} }) { |
|
my @fn = @{ $self->{footnotes} }; |
|
my @out = (); |
|
for (my $i = 0; $i <= $#fn; $i++) { |
|
my $name = sprintf "note_%d", $i + 1; |
|
push @out, [li => {}, [a => {name => $name}], $fn[$i]]; |
|
} |
|
push @tree, [ol => {}, @out]; |
|
} |
|
|
|
return [div => {}, @tree ]; |
|
} |
|
|
|
1; |
|
|
|
__END__ |
|
|
|
[div => {class => 'block'}, # <div class='block'> |
|
'Hello!', # Hello! |
|
[strong => {}, 'user'], # <strong>user</strong> |
|
', this is converted text.', # , this is converted text. |
|
[br => {}], # <br/> |
|
[p => {}, 'Second paragraph'] # <p>Second paragraph</p> |
|
] # </div>
|
|
|