diff --git a/lib/Text/Dokuwiki/Parser.pm b/lib/Text/Dokuwiki/Parser.pm index 4447cbe..25847c3 100644 --- a/lib/Text/Dokuwiki/Parser.pm +++ b/lib/Text/Dokuwiki/Parser.pm @@ -4,6 +4,8 @@ use strict; use warnings; use utf8; +use Text::Dokuwiki::Regexps; + sub new { my ($class) = @_; my $self = { @@ -224,108 +226,54 @@ sub _parse_text { sub parse { my ($self, $text) = @_; - my ($mode, $attrs, $buf, @tree) = ('text', '', ''); - - my @lines = split /\r?\n/o, $text; - my $linenum = 0; - - foreach my $line (@lines) { - $linenum++; - - if ($mode =~ m!block/(file|code|nowiki)!o) { - my $tag = $1; - $buf ||= []; - if ($line =~ m{^\s*}o) { - $buf = join("\n", @{ $buf }); - if ($tag eq 'file') { - my $dt = [dt => {}, $attrs->{file}]; - my $dd = [dt => {}, [pre => {class => $attrs->{class}}, $buf]]; - push @tree, [dl => {class => 'file'}, [$dt, $dd]]; - } elsif ($tag eq 'nowiki') { - push @tree, [pre => {}, $buf]; - } else { - push @tree, [code => {class => $attrs->{class}}, $buf]; - } - ($buf, $mode, $attrs) = ('', '', {}); next; - } - push @{ $buf }, $line; - next; - } elsif ($mode eq 'code') { - if ($line =~ m/^\s{2}(.+)/o) { - $buf .= $line . "\n"; - next; - } - push @tree, [pre => {}, $buf]; - ($buf, $mode, $attrs) = ('', '', {}); - } elsif ($mode eq 'list') { - if ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) { - push @{ $buf }, $line; - next; - } - push @tree, $self->_parse_list($buf); - ($buf, $mode, $attrs) = ('', '', {}); - } elsif ($mode eq 'table') { - if ($line =~ m/^\s?[\|\^]/o) { - push @{ $buf }, $line; - next; - } - push @tree, $self->_parse_table($buf); - ($buf, $mode, $attrs) = ('', '', {}); - } - # macro - if ($line =~ m/~~NO(TOC|CACHE)~~/) { - warn "ignored macro `$line` at line $linenum\n"; - next; - } elsif ($line =~ m/^\s?(={2,6}) (.+) \g{1}\s*/o) { - # header - my $level = $1 =~ tr/=/=/; + my $rx = $Text::Dokuwiki::Regexps::regexps; + my (@tree); + my $len = length($text); + pos($text) = 0; + + while ($len - pos($text) > 0) { + my $pos = pos($text); + if (0) { # TODO: MACRO + } elsif ($text =~ m/\G$rx->{header}/cgi) { + my $level = length($+{line}); $level = 7 - $level; # invert - push @tree, ["h$level" => {}, $2]; - next; - } elsif ($line =~ m/^\s?<(code|file)(?:\s+(\S+)\s+(\S+))?>\s*$/o) { - # code/file block - $mode = "block/$1"; - $attrs = ($2) ? {class => $2, file => $3} : {}; - next; - } elsif ($line =~ m/\s?/o) { - # nowiki block - $mode = "block/nowiki"; - next; - } elsif ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) { - # lists - $mode = 'list'; - $buf = []; - push @{ $buf }, $line; - next; - } elsif ($line =~ m/^\s?(>)+\s*(.+)/o) { - # quotes - my $level = $1 =~ tr/>/>/; - push @tree, [blockquote => {level => $level}, $2]; - next; - } elsif ($line =~ m/^\s?[\|\^]/o) { - # table - $mode = 'table'; - $buf = []; - push @{ $buf }, $line; - next; - } elsif ($line =~ m/^\s{2}(\S.+)/o) { - # code idented with two spaces - $mode = 'code'; - $buf = $line . "\n"; - next; - } elsif ($line =~ m/^\s?(\S.+)/o) { - # nonempty line - push @tree, [p => {}, $self->_parse_text($1)]; - next; - } elsif ($line =~ m/^\s*$/o) { - # empty lines - push @tree, [br => {}]; - $mode = ''; + push @tree, ["h$level" => {}, $+{header}]; + } elsif ($text =~ m/\G$rx->{codeblock}/cgi) { + if ($+{tag} eq 'file' and $+{filename}) { + my $attrs = $+{syntax} ? {class => $+{syntax}} : {}; + my $dt = [dt => {}, $+{filename}]; + my $dd = [dt => {}, [pre => $attrs, $+{block}]]; + push @tree, [dl => {class => 'file'}, [$dt, $dd]]; + } elsif ($+{tag} eq 'file') { + push @tree, [pre => {class => 'file'}, $+{block}]; + } else { + my $attrs = $+{syntax} ? {class => $+{syntax}} : {}; + push @tree, [code => $attrs, [pre => {}, $+{block}]]; + } + } elsif ($text =~ m/\G$rx->{table}/cgi) { + push @tree, $self->_parse_table($+{table}); + } elsif ($text =~ m/\G$rx->{list}/cgi) { + my $list = $+{list}; + chomp $list; + push @tree, $self->_parse_list([ split(/\r?\n/, $list) ]); + } elsif ($text =~ m/\G$rx->{blockquote}/cgi) { + push @tree, [blockquote => {}, $+{block}]; + } elsif ($text =~ m/\G$rx->{pre}/cgi) { + push @tree, [pre => {}, $+{block}]; + } elsif ($text =~ m/\G$rx->{paragraph}/cgi) { + my $text = $+{text}; + chomp $text; + push @tree, [p => {}, $self->_parse_text($text)]; + } elsif ($text =~ m/\G$rx->{emptyline}/cgi) { next; + } else { + my $msg = "Unmatched: '" . substr($text, pos($text)) . "'\n"; + die($msg); + } + if (pos($text) <= $pos) { + die("parser failed, abort\n"); } - # catchall - printf "Unmatched % 3d: %s\n", $linenum, $line; } if (scalar @{ $self->{footnotes} }) {