Browse Source

* Text::Dokuwiki::Parser : use regex parser instead per-line

master
Alex 'AdUser' Z 10 years ago
parent
commit
92aad3410e
  1. 144
      lib/Text/Dokuwiki/Parser.pm

144
lib/Text/Dokuwiki/Parser.pm

@ -4,6 +4,8 @@ use strict;
use warnings; use warnings;
use utf8; use utf8;
use Text::Dokuwiki::Regexps;
sub new { sub new {
my ($class) = @_; my ($class) = @_;
my $self = { my $self = {
@ -224,108 +226,54 @@ sub _parse_text {
sub parse { sub parse {
my ($self, $text) = @_; my ($self, $text) = @_;
my ($mode, $attrs, $buf, @tree) = ('text', '', '');
my @lines = split /\r?\n/o, $text;
my $linenum = 0;
foreach my $line (@lines) {
$linenum++;
if ($mode =~ m!block/(file|code|nowiki)!o) {
my $tag = $1;
$buf ||= [];
if ($line =~ m{^\s*</$tag>}o) {
$buf = join("\n", @{ $buf });
if ($tag eq 'file') {
my $dt = [dt => {}, $attrs->{file}];
my $dd = [dt => {}, [pre => {class => $attrs->{class}}, $buf]];
push @tree, [dl => {class => 'file'}, [$dt, $dd]];
} elsif ($tag eq 'nowiki') {
push @tree, [pre => {}, $buf];
} else {
push @tree, [code => {class => $attrs->{class}}, $buf];
}
($buf, $mode, $attrs) = ('', '', {}); next;
}
push @{ $buf }, $line;
next;
} elsif ($mode eq 'code') {
if ($line =~ m/^\s{2}(.+)/o) {
$buf .= $line . "\n";
next;
}
push @tree, [pre => {}, $buf];
($buf, $mode, $attrs) = ('', '', {});
} elsif ($mode eq 'list') {
if ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) {
push @{ $buf }, $line;
next;
}
push @tree, $self->_parse_list($buf);
($buf, $mode, $attrs) = ('', '', {});
} elsif ($mode eq 'table') {
if ($line =~ m/^\s?[\|\^]/o) {
push @{ $buf }, $line;
next;
}
push @tree, $self->_parse_table($buf);
($buf, $mode, $attrs) = ('', '', {});
}
# macro my $rx = $Text::Dokuwiki::Regexps::regexps;
if ($line =~ m/~~NO(TOC|CACHE)~~/) { my (@tree);
warn "ignored macro `$line` at line $linenum\n"; my $len = length($text);
next; pos($text) = 0;
} elsif ($line =~ m/^\s?(={2,6}) (.+) \g{1}\s*/o) {
# header while ($len - pos($text) > 0) {
my $level = $1 =~ tr/=/=/; my $pos = pos($text);
if (0) { # TODO: MACRO
} elsif ($text =~ m/\G$rx->{header}/cgi) {
my $level = length($+{line});
$level = 7 - $level; # invert $level = 7 - $level; # invert
push @tree, ["h$level" => {}, $2]; push @tree, ["h$level" => {}, $+{header}];
next; } elsif ($text =~ m/\G$rx->{codeblock}/cgi) {
} elsif ($line =~ m/^\s?<(code|file)(?:\s+(\S+)\s+(\S+))?>\s*$/o) { if ($+{tag} eq 'file' and $+{filename}) {
# code/file block my $attrs = $+{syntax} ? {class => $+{syntax}} : {};
$mode = "block/$1"; my $dt = [dt => {}, $+{filename}];
$attrs = ($2) ? {class => $2, file => $3} : {}; my $dd = [dt => {}, [pre => $attrs, $+{block}]];
next; push @tree, [dl => {class => 'file'}, [$dt, $dd]];
} elsif ($line =~ m/\s?<nowiki>/o) { } elsif ($+{tag} eq 'file') {
# nowiki block push @tree, [pre => {class => 'file'}, $+{block}];
$mode = "block/nowiki"; } else {
next; my $attrs = $+{syntax} ? {class => $+{syntax}} : {};
} elsif ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) { push @tree, [code => $attrs, [pre => {}, $+{block}]];
# lists }
$mode = 'list'; } elsif ($text =~ m/\G$rx->{table}/cgi) {
$buf = []; push @tree, $self->_parse_table($+{table});
push @{ $buf }, $line; } elsif ($text =~ m/\G$rx->{list}/cgi) {
next; my $list = $+{list};
} elsif ($line =~ m/^\s?(>)+\s*(.+)/o) { chomp $list;
# quotes push @tree, $self->_parse_list([ split(/\r?\n/, $list) ]);
my $level = $1 =~ tr/>/>/; } elsif ($text =~ m/\G$rx->{blockquote}/cgi) {
push @tree, [blockquote => {level => $level}, $2]; push @tree, [blockquote => {}, $+{block}];
next; } elsif ($text =~ m/\G$rx->{pre}/cgi) {
} elsif ($line =~ m/^\s?[\|\^]/o) { push @tree, [pre => {}, $+{block}];
# table } elsif ($text =~ m/\G$rx->{paragraph}/cgi) {
$mode = 'table'; my $text = $+{text};
$buf = []; chomp $text;
push @{ $buf }, $line; push @tree, [p => {}, $self->_parse_text($text)];
next; } elsif ($text =~ m/\G$rx->{emptyline}/cgi) {
} elsif ($line =~ m/^\s{2}(\S.+)/o) {
# code idented with two spaces
$mode = 'code';
$buf = $line . "\n";
next;
} elsif ($line =~ m/^\s?(\S.+)/o) {
# nonempty line
push @tree, [p => {}, $self->_parse_text($1)];
next;
} elsif ($line =~ m/^\s*$/o) {
# empty lines
push @tree, [br => {}];
$mode = '';
next; next;
} else {
my $msg = "Unmatched: '" . substr($text, pos($text)) . "'\n";
die($msg);
}
if (pos($text) <= $pos) {
die("parser failed, abort\n");
} }
# catchall
printf "Unmatched % 3d: %s\n", $linenum, $line;
} }
if (scalar @{ $self->{footnotes} }) { if (scalar @{ $self->{footnotes} }) {

Loading…
Cancel
Save