Browse Source

* Text::Dokuwiki::Parser : use regex parser instead per-line

master
Alex 'AdUser' Z 9 years ago
parent
commit
92aad3410e
  1. 144
      lib/Text/Dokuwiki/Parser.pm

144
lib/Text/Dokuwiki/Parser.pm

@ -4,6 +4,8 @@ use strict;
use warnings;
use utf8;
use Text::Dokuwiki::Regexps;
sub new {
my ($class) = @_;
my $self = {
@ -224,108 +226,54 @@ sub _parse_text {
sub parse {
my ($self, $text) = @_;
my ($mode, $attrs, $buf, @tree) = ('text', '', '');
my @lines = split /\r?\n/o, $text;
my $linenum = 0;
foreach my $line (@lines) {
$linenum++;
if ($mode =~ m!block/(file|code|nowiki)!o) {
my $tag = $1;
$buf ||= [];
if ($line =~ m{^\s*</$tag>}o) {
$buf = join("\n", @{ $buf });
if ($tag eq 'file') {
my $dt = [dt => {}, $attrs->{file}];
my $dd = [dt => {}, [pre => {class => $attrs->{class}}, $buf]];
push @tree, [dl => {class => 'file'}, [$dt, $dd]];
} elsif ($tag eq 'nowiki') {
push @tree, [pre => {}, $buf];
} else {
push @tree, [code => {class => $attrs->{class}}, $buf];
}
($buf, $mode, $attrs) = ('', '', {}); next;
}
push @{ $buf }, $line;
next;
} elsif ($mode eq 'code') {
if ($line =~ m/^\s{2}(.+)/o) {
$buf .= $line . "\n";
next;
}
push @tree, [pre => {}, $buf];
($buf, $mode, $attrs) = ('', '', {});
} elsif ($mode eq 'list') {
if ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) {
push @{ $buf }, $line;
next;
}
push @tree, $self->_parse_list($buf);
($buf, $mode, $attrs) = ('', '', {});
} elsif ($mode eq 'table') {
if ($line =~ m/^\s?[\|\^]/o) {
push @{ $buf }, $line;
next;
}
push @tree, $self->_parse_table($buf);
($buf, $mode, $attrs) = ('', '', {});
}
# macro
if ($line =~ m/~~NO(TOC|CACHE)~~/) {
warn "ignored macro `$line` at line $linenum\n";
next;
} elsif ($line =~ m/^\s?(={2,6}) (.+) \g{1}\s*/o) {
# header
my $level = $1 =~ tr/=/=/;
my $rx = $Text::Dokuwiki::Regexps::regexps;
my (@tree);
my $len = length($text);
pos($text) = 0;
while ($len - pos($text) > 0) {
my $pos = pos($text);
if (0) { # TODO: MACRO
} elsif ($text =~ m/\G$rx->{header}/cgi) {
my $level = length($+{line});
$level = 7 - $level; # invert
push @tree, ["h$level" => {}, $2];
next;
} elsif ($line =~ m/^\s?<(code|file)(?:\s+(\S+)\s+(\S+))?>\s*$/o) {
# code/file block
$mode = "block/$1";
$attrs = ($2) ? {class => $2, file => $3} : {};
next;
} elsif ($line =~ m/\s?<nowiki>/o) {
# nowiki block
$mode = "block/nowiki";
next;
} elsif ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) {
# lists
$mode = 'list';
$buf = [];
push @{ $buf }, $line;
next;
} elsif ($line =~ m/^\s?(>)+\s*(.+)/o) {
# quotes
my $level = $1 =~ tr/>/>/;
push @tree, [blockquote => {level => $level}, $2];
next;
} elsif ($line =~ m/^\s?[\|\^]/o) {
# table
$mode = 'table';
$buf = [];
push @{ $buf }, $line;
next;
} elsif ($line =~ m/^\s{2}(\S.+)/o) {
# code idented with two spaces
$mode = 'code';
$buf = $line . "\n";
next;
} elsif ($line =~ m/^\s?(\S.+)/o) {
# nonempty line
push @tree, [p => {}, $self->_parse_text($1)];
next;
} elsif ($line =~ m/^\s*$/o) {
# empty lines
push @tree, [br => {}];
$mode = '';
push @tree, ["h$level" => {}, $+{header}];
} elsif ($text =~ m/\G$rx->{codeblock}/cgi) {
if ($+{tag} eq 'file' and $+{filename}) {
my $attrs = $+{syntax} ? {class => $+{syntax}} : {};
my $dt = [dt => {}, $+{filename}];
my $dd = [dt => {}, [pre => $attrs, $+{block}]];
push @tree, [dl => {class => 'file'}, [$dt, $dd]];
} elsif ($+{tag} eq 'file') {
push @tree, [pre => {class => 'file'}, $+{block}];
} else {
my $attrs = $+{syntax} ? {class => $+{syntax}} : {};
push @tree, [code => $attrs, [pre => {}, $+{block}]];
}
} elsif ($text =~ m/\G$rx->{table}/cgi) {
push @tree, $self->_parse_table($+{table});
} elsif ($text =~ m/\G$rx->{list}/cgi) {
my $list = $+{list};
chomp $list;
push @tree, $self->_parse_list([ split(/\r?\n/, $list) ]);
} elsif ($text =~ m/\G$rx->{blockquote}/cgi) {
push @tree, [blockquote => {}, $+{block}];
} elsif ($text =~ m/\G$rx->{pre}/cgi) {
push @tree, [pre => {}, $+{block}];
} elsif ($text =~ m/\G$rx->{paragraph}/cgi) {
my $text = $+{text};
chomp $text;
push @tree, [p => {}, $self->_parse_text($text)];
} elsif ($text =~ m/\G$rx->{emptyline}/cgi) {
next;
} else {
my $msg = "Unmatched: '" . substr($text, pos($text)) . "'\n";
die($msg);
}
if (pos($text) <= $pos) {
die("parser failed, abort\n");
}
# catchall
printf "Unmatched % 3d: %s\n", $linenum, $line;
}
if (scalar @{ $self->{footnotes} }) {

Loading…
Cancel
Save