package Text::Dokuwiki;
use strict;
use warnings;
use feature qw/ switch /;
use utf8;
use Data::Dumper;
sub new {
my ($class) = @_;
my $self = {};
return bless($self, $class);
sub _parse_list {
my ($self, $lines) = @_;
my @lists = ();
my @stack = ( \@lists );
my $types = {curr => '', last => ''};
my $level = {curr => 0, last => 0};
foreach my $line (@{ $lines }) {
$line =~ m/^(\s+)/o;
my ($ident, $dot, $rest) = ($line =~ m/^((?:\s{2})+)([\*-])\s*(.+)/);
$level->{last} = $level->{curr};
$level->{curr} = $ident =~ tr/ / /;
$types->{last} = $types->{curr};
$types->{curr} = ($dot eq '-') ? 'ol' : 'ul';
if ($level->{curr} == $level->{last} and
$types->{curr} ne $types->{last}) {
pop @stack;
my $list = [$types->{curr} => {}];
push @{ $stack[-1] }, (@stack > 1) ? [li => {}, $list] : $list;
push @stack, $list;
if ($level->{curr} > $level->{last}) {
my $list = [$types->{curr} => {}];
push @{ $stack[-1] }, (@stack > 1) ? [li => {}, $list] : $list;
push @stack, $list;
if ($level->{curr} < $level->{last}) {
pop @stack;
push @{ $stack[-1] }, [li => {}, $rest];
pop @stack while @stack;
return @lists;
sub _parse_table {
my ($self, $lines) = @_;
my ($i, $j, $colspan) = (0, 0, 0);
my @rows = ();
foreach my $line (@{ $lines }) {
my @row = ();
for ($j = 0; ;$j++) {
last if ($line eq '|' or $line eq '^');
next if ($line !~ m/^(([\|\^])([^\|\^]*))/o);
my $attrs = {};
my ($all, $key, $value) = ($1, $2, $3);
$line = substr($line, length($all));
my $type = ($key eq '^') ? 'th' : 'td';
my ($lpad, $content, $rpad) = ($value =~ m/^(\s*)(.*?)(\s*)$/o);
if ($line =~ m/^([\|\^]{2,})/o) {
# colspan detected;
$attrs->{colspan} = $colspan = length($1);
} elsif (index($content, ":::") >= 0) {
# rowspan detected
for (my $k = $i - 1; $k >= 0; $k--) { # k is idx of prev rows
my $cellptr = $rows[$k][$j + 2]; # +2 for (tr => {})
next if (index($cellptr->[2], ":::") >= 0); # also rowspan
$cellptr->[1]->{rowspan} //= 1; # init attr, if missing
$cellptr->[1]->{rowspan} += 1; # incr value
$attrs->{skip} = 1;
} elsif ($colspan > 1) {
# colspan-eaten column
$attrs->{skip} = 1;
$colspan -= 1;
if ($lpad eq '') {
$attrs->{align} = ($rpad eq '') ? 'center' : 'left';
} else {
$attrs->{align} = ($rpad eq '') ? 'right' : 'center';
push @row, [$type => $attrs, $content];
push @rows, [tr => {}, @row];
return [table => {}, @rows];
sub parse {
my ($self, $text) = @_;
my ($tree, $mode, $attrs, $buf) = ([], 'text', '', '');
my @lines = split /\r?\n/o, $text;
my $linenum = 0;
foreach my $line (@lines) {
given ($mode) {
when (m!block/(file|code|nowiki)!o) {
if ($line =~ m{^\s*</$1>}o) {
if ($1 eq 'file') {
my $dt = [dt => {}, 0 => $attrs->{file}];
my $dd = [dt => {}, 0 => [pre => {class => $attrs->{class}}, 0 => $buf]];
push @{ $tree }, [dl => {class => 'file'}, [$dt, $dd]];
} elsif ($1 eq 'nowiki') {
push @{ $tree }, [pre => {}, 0 => $buf];
} else {
push @{ $tree }, [code => {class => $attrs->{class}}, 0 => $buf];
($buf, $mode, $attrs) = ('', '', {}); next;
$buf .= $line . "\n";
when ("code") {
if ($line =~ m/^\s{2}(.+)/o) {
$buf .= $line . "\n"; next;
} else {
push @{ $tree }, [pre => {}, 0 => $buf];
($buf, $mode, $attrs) = ('', '', {}); continue;
when ("list") {
if ($line =~ m/^\s{2}([\*-])\s+(.+)/o) {
push @{ $buf }, [li => {}, 0 => $2];
} else {
push @{ $tree }, [ul => {}, @$buf]; # TODO: lost list type
($buf, $mode, $attrs) = ('', '', {}); continue;
when ("table") {
given ($line) {
# header
when (m/^\s?(={2,6}) (.+) \g{1}\s*/o) {
my $level = $1 =~ tr/=/=/;
$level = 7 - $level; # invert
push @{ $tree }, ["h$level" => {}, 0 => $2];
# code/file block
when (m/^\s?<(code|file)(?:\s+(\S+)\s+(\S+))?>\s*$/o) {
$mode = "block/$1";
$attrs = ($2) ? {class => $2, file => $3} : {};
# nowiki block
when (m/\s?<nowiki>/o) {
$mode = "block/nowiki";
# lists
when (m/^\s{2}([\*-])\s+(.+)/o) {
$mode = 'list';
$buf = [];
push @{ $buf }, [li => {}, 0 => $2];
# quotes
when (m/^\s?(>)+\s*(.+)/o) {
my $level = $1 =~ tr/>/>/;
push @{ $tree }, [blockquote => {level => $level}, 0 => $2];
# table
when (m/^\s?[\|\^]/o) {
$mode = 'table';
$buf = $line . "\n"; # render later
# code idented with two spaces
when (m/^\s{2}(\S.+)/o) {
$mode = 'code';
$buf = $line . "\n";
# nonempty line
when (m/^\s?(\S.+)/o) {
push @{ $tree }, [p => {}, 0 => $1];
# empty lines;
when (m/^\s*$/) {
push @{ $tree }, [br => {}];
$mode = '';
# catchall
default {
printf "Unmatched % 3d: %s\n", $linenum, $line;
return $tree;
[div => {class => 'block'}, # <div class='block'>
0 => 'text1', # Hello!
[strong => {}, 0 => 'bold text'], # <strong>user</strong>
0 => ', this is converted text.', # , this is converted text.
[br => {}], # <br/>
[p => {}, 0 => 'Second paragraph'] # <p>Second paragraph</p>
] # </div>