|
|
|
package Text::Dokuwiki::Parser;
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use feature qw/ switch /;
|
|
|
|
use utf8;
|
|
|
|
|
|
|
|
sub new {
|
|
|
|
my ($class) = @_;
|
|
|
|
my $self = {};
|
|
|
|
|
|
|
|
return bless($self, $class);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _parse_list {
|
|
|
|
my ($self, $lines) = @_;
|
|
|
|
my @lists = ();
|
|
|
|
my @stack = ( \@lists );
|
|
|
|
my $types = {curr => '', last => ''};
|
|
|
|
my $level = {curr => 0, last => 0};
|
|
|
|
|
|
|
|
foreach my $line (@{ $lines }) {
|
|
|
|
$line =~ m/^(\s+)/o;
|
|
|
|
my ($ident, $dot, $rest) = ($line =~ m/^((?:\s{2})+)([\*-])\s*(.+)/);
|
|
|
|
$level->{last} = $level->{curr};
|
|
|
|
$level->{curr} = $ident =~ tr/ / /;
|
|
|
|
|
|
|
|
$types->{last} = $types->{curr};
|
|
|
|
$types->{curr} = ($dot eq '-') ? 'ol' : 'ul';
|
|
|
|
|
|
|
|
if ($level->{curr} == $level->{last} and
|
|
|
|
$types->{curr} ne $types->{last}) {
|
|
|
|
pop @stack;
|
|
|
|
my $list = [$types->{curr} => {}];
|
|
|
|
push @{ $stack[-1] }, (@stack > 1) ? [li => {}, $list] : $list;
|
|
|
|
push @stack, $list;
|
|
|
|
}
|
|
|
|
if ($level->{curr} > $level->{last}) {
|
|
|
|
my $list = [$types->{curr} => {}];
|
|
|
|
push @{ $stack[-1] }, (@stack > 1) ? [li => {}, $list] : $list;
|
|
|
|
push @stack, $list;
|
|
|
|
}
|
|
|
|
if ($level->{curr} < $level->{last}) {
|
|
|
|
pop @stack;
|
|
|
|
}
|
|
|
|
push @{ $stack[-1] }, [li => {}, $self->_parse_text($rest)];
|
|
|
|
}
|
|
|
|
pop @stack while @stack;
|
|
|
|
|
|
|
|
return @lists;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _parse_table {
|
|
|
|
my ($self, $lines) = @_;
|
|
|
|
my ($i, $j, $colspan) = (0, 0, 0);
|
|
|
|
my @rows = ();
|
|
|
|
|
|
|
|
foreach my $line (@{ $lines }) {
|
|
|
|
my @row = ();
|
|
|
|
for ($j = 0; ;$j++) {
|
|
|
|
last if ($line eq '|' or $line eq '^');
|
|
|
|
next if ($line !~ m/^(([\|\^])([^\|\^]*))/o);
|
|
|
|
my $attrs = {};
|
|
|
|
my ($all, $key, $value) = ($1, $2, $3);
|
|
|
|
$line = substr($line, length($all));
|
|
|
|
my $type = ($key eq '^') ? 'th' : 'td';
|
|
|
|
my ($lpad, $content, $rpad) = ($value =~ m/^(\s*)(.*?)(\s*)$/o);
|
|
|
|
if ($line =~ m/^([\|\^]{2,})/o) {
|
|
|
|
# colspan detected;
|
|
|
|
$attrs->{colspan} = $colspan = length($1);
|
|
|
|
} elsif (index($content, ":::") >= 0) {
|
|
|
|
# rowspan detected
|
|
|
|
for (my $k = $i - 1; $k >= 0; $k--) { # k is idx of prev rows
|
|
|
|
my $cellptr = $rows[$k][$j + 2]; # +2 for (tr => {})
|
|
|
|
next if (index($cellptr->[2], ":::") >= 0); # also rowspan
|
|
|
|
$cellptr->[1]->{rowspan} //= 1; # init attr, if missing
|
|
|
|
$cellptr->[1]->{rowspan} += 1; # incr value
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
$attrs->{skip} = 1;
|
|
|
|
} elsif ($colspan > 1) {
|
|
|
|
# colspan-eaten column
|
|
|
|
$attrs->{skip} = 1;
|
|
|
|
$colspan -= 1;
|
|
|
|
}
|
|
|
|
if ($lpad eq '') {
|
|
|
|
$attrs->{align} = ($rpad eq '') ? 'center' : 'left';
|
|
|
|
} else {
|
|
|
|
$attrs->{align} = ($rpad eq '') ? 'right' : 'center';
|
|
|
|
}
|
|
|
|
push @row, [$type => $attrs, $content];
|
|
|
|
}
|
|
|
|
push @rows, [tr => {}, @row];
|
|
|
|
$i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return [table => {}, @rows];
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _parse_link {
|
|
|
|
my ($self, $content) = @_;
|
|
|
|
my ($href, @text);
|
|
|
|
|
|
|
|
if ((my $pos = index($content, "|")) >= 0) {
|
|
|
|
$href = substr($content, 0, $pos);
|
|
|
|
@text = $self->_parse_text(substr($content, $pos + 1));
|
|
|
|
} else {
|
|
|
|
$href = $content;
|
|
|
|
@text = ($content);
|
|
|
|
}
|
|
|
|
|
|
|
|
unless ($href =~ m!^[a-z]+://!io) {
|
|
|
|
$href =~ s{:}{/}go;
|
|
|
|
}
|
|
|
|
|
|
|
|
return [a => {href => $href}, @text];
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _parse_include {
|
|
|
|
my ($self, $content) = @_;
|
|
|
|
|
|
|
|
if ($content =~ m|^([a-z]+)>(.*)|oi) {
|
|
|
|
...
|
|
|
|
}
|
|
|
|
|
|
|
|
my ($lpad, $rpad, $src, %attrs) = ('', '', '');
|
|
|
|
if ($content =~ m!^(\s*)([^\s\|]+)(\s*)[|](.*)!oi) {
|
|
|
|
($lpad, $src, $rpad) = ($1, $2, $3);
|
|
|
|
$attrs{title} = $4;
|
|
|
|
} elsif ($content =~ m!^(\s*)(\S+)(\s*)!) {
|
|
|
|
($lpad, $src, $rpad) = ($1, $2, $3);
|
|
|
|
} else {
|
|
|
|
($src) = ($content =~ s/^(.+)/$1/or);
|
|
|
|
}
|
|
|
|
|
|
|
|
$attrs{align} = ($lpad ne '')
|
|
|
|
? (($rpad ne '') ? 'center' : 'right')
|
|
|
|
: 'left';
|
|
|
|
|
|
|
|
$src =~ s<\?(\d+)(?:x(\d+))?>
|
|
|
|
<
|
|
|
|
$attrs{width} = $1 if $1;
|
|
|
|
$attrs{height} = $2 if $2;
|
|
|
|
''
|
|
|
|
>ioe;
|
|
|
|
|
|
|
|
$src =~ s|:|/|go;
|
|
|
|
$attrs{src} = $src;
|
|
|
|
|
|
|
|
return [img => \%attrs];
|
|
|
|
}
|
|
|
|
|
|
|
|
my $inlines = {
|
|
|
|
"__" => "__", # underline
|
|
|
|
"//" => "//", # italic
|
|
|
|
"**" => "**", # bold
|
|
|
|
"''" => "''", # monospace
|
|
|
|
"{{" => "}}", # module/image
|
|
|
|
"[[" => "]]", # link
|
|
|
|
"((" => "))", # footnote
|
|
|
|
};
|
|
|
|
sub _parse_text {
|
|
|
|
my ($self, $line) = @_;
|
|
|
|
my ($endtag, $endpos, $content);
|
|
|
|
my @parts = ();
|
|
|
|
|
|
|
|
while ($line) {
|
|
|
|
$line =~ m!^(?:(.*?)(__|//|''|\*\*|\[\[|\{\{|\(\(|\\\\ ))?(.*)!o;
|
|
|
|
my ($before, $match, $after) = ($1, $2, $3);
|
|
|
|
if ($before) {
|
|
|
|
push @parts, $before;
|
|
|
|
$line = $match . $after;
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
if ($after and not $match) {
|
|
|
|
push @parts, $after;
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($match eq '\\\\ ') { # force newline
|
|
|
|
push @parts, [br => {}];
|
|
|
|
$line = $after;
|
|
|
|
next;
|
|
|
|
} elsif ($match) {
|
|
|
|
$endtag = $inlines->{$match};
|
|
|
|
$endpos = index($after, $endtag, 0);
|
|
|
|
if ($endpos < 0) { # no closing marker?
|
|
|
|
push @parts, $match;
|
|
|
|
$line = $after;
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
$content = substr($after, 0, $endpos);
|
|
|
|
$line = substr($after, $endpos + 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
my %attrs;
|
|
|
|
if ($match eq "//") { %attrs = ('font-style' => 'italic'); }
|
|
|
|
elsif ($match eq "**") { %attrs = ('font-weight' => 'bold'); }
|
|
|
|
elsif ($match eq "''") { %attrs = ('font-family' => 'monospace'); }
|
|
|
|
elsif ($match eq "__") { %attrs = ('text-decoration' => 'underline'); }
|
|
|
|
elsif ($match eq "{{") { push @parts, $self->_parse_include($content); next; }
|
|
|
|
elsif ($match eq "[[") { push @parts, $self->_parse_link($content); next; }
|
|
|
|
elsif ($match eq "((") { ...; next; }
|
|
|
|
elsif ($match) { die("unrecognized inline: $match\n"); }
|
|
|
|
|
|
|
|
if (%attrs) {
|
|
|
|
push @parts, [span => \%attrs, $self->_parse_text($content)];
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return @parts;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub parse {
|
|
|
|
my ($self, $text) = @_;
|
|
|
|
my ($mode, $attrs, $buf, @tree) = ('text', '', '');
|
|
|
|
|
|
|
|
my @lines = split /\r?\n/o, $text;
|
|
|
|
my $linenum = 0;
|
|
|
|
|
|
|
|
foreach my $line (@lines) {
|
|
|
|
$linenum++;
|
|
|
|
|
|
|
|
if ($mode =~ m!block/(file|code|nowiki)!o) {
|
|
|
|
$buf //= [];
|
|
|
|
if ($line =~ m{^\s*</$1>}o) {
|
|
|
|
$buf = join("\n", @{ $buf });
|
|
|
|
if ($1 eq 'file') {
|
|
|
|
my $dt = [dt => {}, $attrs->{file}];
|
|
|
|
my $dd = [dt => {}, [pre => {class => $attrs->{class}}, $buf]];
|
|
|
|
push @tree, [dl => {class => 'file'}, [$dt, $dd]];
|
|
|
|
} elsif ($1 eq 'nowiki') {
|
|
|
|
push @tree, [pre => {}, $buf];
|
|
|
|
} else {
|
|
|
|
push @tree, [code => {class => $attrs->{class}}, $buf];
|
|
|
|
}
|
|
|
|
($buf, $mode, $attrs) = ('', '', {}); next;
|
|
|
|
}
|
|
|
|
push @{ $buf }, $line;
|
|
|
|
next;
|
|
|
|
} elsif ($mode eq 'code') {
|
|
|
|
if ($line =~ m/^\s{2}(.+)/o) {
|
|
|
|
$buf .= $line . "\n";
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
push @tree, [pre => {}, $buf];
|
|
|
|
($buf, $mode, $attrs) = ('', '', {});
|
|
|
|
} elsif ($mode eq 'list') {
|
|
|
|
if ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) {
|
|
|
|
push @{ $buf }, $line;
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
push @tree, $self->_parse_list($buf);
|
|
|
|
($buf, $mode, $attrs) = ('', '', {});
|
|
|
|
} elsif ($mode eq 'table') {
|
|
|
|
if ($line =~ m/^\s?[\|\^]/o) {
|
|
|
|
push @{ $buf }, $line;
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
push @tree, $self->_parse_table($buf);
|
|
|
|
($buf, $mode, $attrs) = ('', '', {});
|
|
|
|
}
|
|
|
|
|
|
|
|
# macro
|
|
|
|
if ($line =~ m/~~NO(TOC|CACHE)~~/) {
|
|
|
|
next; # ignore
|
|
|
|
} elsif ($line =~ m/^\s?(={2,6}) (.+) \g{1}\s*/o) {
|
|
|
|
# header
|
|
|
|
my $level = $1 =~ tr/=/=/;
|
|
|
|
$level = 7 - $level; # invert
|
|
|
|
push @tree, ["h$level" => {}, $2];
|
|
|
|
next;
|
|
|
|
} elsif ($line =~ m/^\s?<(code|file)(?:\s+(\S+)\s+(\S+))?>\s*$/o) {
|
|
|
|
# code/file block
|
|
|
|
$mode = "block/$1";
|
|
|
|
$attrs = ($2) ? {class => $2, file => $3} : {};
|
|
|
|
next;
|
|
|
|
} elsif ($line =~ m/\s?<nowiki>/o) {
|
|
|
|
# nowiki block
|
|
|
|
$mode = "block/nowiki";
|
|
|
|
next;
|
|
|
|
} elsif ($line = m/^(\s{2})+([\*-])\s+(.+)/o) {
|
|
|
|
# lists
|
|
|
|
$mode = 'list';
|
|
|
|
$buf = [];
|
|
|
|
push @{ $buf }, $line;
|
|
|
|
next;
|
|
|
|
} elsif ($line = m/^\s?(>)+\s*(.+)/o) {
|
|
|
|
# quotes
|
|
|
|
my $level = $1 =~ tr/>/>/;
|
|
|
|
push @tree, [blockquote => {level => $level}, $2];
|
|
|
|
next;
|
|
|
|
} elsif ($line = m/^\s?[\|\^]/o) {
|
|
|
|
# table
|
|
|
|
$mode = 'table';
|
|
|
|
$buf = [];
|
|
|
|
push @{ $buf }, $line;
|
|
|
|
next;
|
|
|
|
} elsif ($line = m/^\s{2}(\S.+)/o) {
|
|
|
|
# code idented with two spaces
|
|
|
|
$mode = 'code';
|
|
|
|
$buf = $line . "\n";
|
|
|
|
next;
|
|
|
|
} elsif ($line =~ m/^\s?(\S.+)/o) {
|
|
|
|
# nonempty line
|
|
|
|
push @tree, [p => {}, $self->_parse_text($1)];
|
|
|
|
next;
|
|
|
|
} elsif ($line =~ m/^\s*$/o) {
|
|
|
|
# empty lines
|
|
|
|
push @tree, [br => {}];
|
|
|
|
$mode = '';
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
# catchall
|
|
|
|
printf "Unmatched % 3d: %s\n", $linenum, $line;
|
|
|
|
}
|
|
|
|
|
|
|
|
return [div => {}, @tree ];
|
|
|
|
}
|
|
|
|
|
|
|
|
1;
|
|
|
|
|
|
|
|
__END__
|
|
|
|
|
|
|
|
[div => {class => 'block'}, # <div class='block'>
|
|
|
|
'Hello!', # Hello!
|
|
|
|
[strong => {}, 'user'], # <strong>user</strong>
|
|
|
|
', this is converted text.', # , this is converted text.
|
|
|
|
[br => {}], # <br/>
|
|
|
|
[p => {}, 'Second paragraph'] # <p>Second paragraph</p>
|
|
|
|
] # </div>
|