Browse Source

* Text::Dokuwiki::Parser : use regex parser instead per-line

master
Alex 'AdUser' Z 10 years ago
parent
commit
92aad3410e
  1. 142
      lib/Text/Dokuwiki/Parser.pm

142
lib/Text/Dokuwiki/Parser.pm

@ -4,6 +4,8 @@ use strict;
use warnings; use warnings;
use utf8; use utf8;
use Text::Dokuwiki::Regexps;
sub new { sub new {
my ($class) = @_; my ($class) = @_;
my $self = { my $self = {
@ -224,108 +226,54 @@ sub _parse_text {
sub parse { sub parse {
my ($self, $text) = @_; my ($self, $text) = @_;
my ($mode, $attrs, $buf, @tree) = ('text', '', '');
my $rx = $Text::Dokuwiki::Regexps::regexps;
my @lines = split /\r?\n/o, $text; my (@tree);
my $linenum = 0; my $len = length($text);
pos($text) = 0;
foreach my $line (@lines) {
$linenum++; while ($len - pos($text) > 0) {
my $pos = pos($text);
if ($mode =~ m!block/(file|code|nowiki)!o) { if (0) { # TODO: MACRO
my $tag = $1; } elsif ($text =~ m/\G$rx->{header}/cgi) {
$buf ||= []; my $level = length($+{line});
if ($line =~ m{^\s*</$tag>}o) { $level = 7 - $level; # invert
$buf = join("\n", @{ $buf }); push @tree, ["h$level" => {}, $+{header}];
if ($tag eq 'file') { } elsif ($text =~ m/\G$rx->{codeblock}/cgi) {
my $dt = [dt => {}, $attrs->{file}]; if ($+{tag} eq 'file' and $+{filename}) {
my $dd = [dt => {}, [pre => {class => $attrs->{class}}, $buf]]; my $attrs = $+{syntax} ? {class => $+{syntax}} : {};
my $dt = [dt => {}, $+{filename}];
my $dd = [dt => {}, [pre => $attrs, $+{block}]];
push @tree, [dl => {class => 'file'}, [$dt, $dd]]; push @tree, [dl => {class => 'file'}, [$dt, $dd]];
} elsif ($tag eq 'nowiki') { } elsif ($+{tag} eq 'file') {
push @tree, [pre => {}, $buf]; push @tree, [pre => {class => 'file'}, $+{block}];
} else { } else {
push @tree, [code => {class => $attrs->{class}}, $buf]; my $attrs = $+{syntax} ? {class => $+{syntax}} : {};
} push @tree, [code => $attrs, [pre => {}, $+{block}]];
($buf, $mode, $attrs) = ('', '', {}); next; }
} } elsif ($text =~ m/\G$rx->{table}/cgi) {
push @{ $buf }, $line; push @tree, $self->_parse_table($+{table});
next; } elsif ($text =~ m/\G$rx->{list}/cgi) {
} elsif ($mode eq 'code') { my $list = $+{list};
if ($line =~ m/^\s{2}(.+)/o) { chomp $list;
$buf .= $line . "\n"; push @tree, $self->_parse_list([ split(/\r?\n/, $list) ]);
} elsif ($text =~ m/\G$rx->{blockquote}/cgi) {
push @tree, [blockquote => {}, $+{block}];
} elsif ($text =~ m/\G$rx->{pre}/cgi) {
push @tree, [pre => {}, $+{block}];
} elsif ($text =~ m/\G$rx->{paragraph}/cgi) {
my $text = $+{text};
chomp $text;
push @tree, [p => {}, $self->_parse_text($text)];
} elsif ($text =~ m/\G$rx->{emptyline}/cgi) {
next; next;
} else {
my $msg = "Unmatched: '" . substr($text, pos($text)) . "'\n";
die($msg);
} }
push @tree, [pre => {}, $buf]; if (pos($text) <= $pos) {
($buf, $mode, $attrs) = ('', '', {}); die("parser failed, abort\n");
} elsif ($mode eq 'list') {
if ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) {
push @{ $buf }, $line;
next;
}
push @tree, $self->_parse_list($buf);
($buf, $mode, $attrs) = ('', '', {});
} elsif ($mode eq 'table') {
if ($line =~ m/^\s?[\|\^]/o) {
push @{ $buf }, $line;
next;
}
push @tree, $self->_parse_table($buf);
($buf, $mode, $attrs) = ('', '', {});
}
# macro
if ($line =~ m/~~NO(TOC|CACHE)~~/) {
warn "ignored macro `$line` at line $linenum\n";
next;
} elsif ($line =~ m/^\s?(={2,6}) (.+) \g{1}\s*/o) {
# header
my $level = $1 =~ tr/=/=/;
$level = 7 - $level; # invert
push @tree, ["h$level" => {}, $2];
next;
} elsif ($line =~ m/^\s?<(code|file)(?:\s+(\S+)\s+(\S+))?>\s*$/o) {
# code/file block
$mode = "block/$1";
$attrs = ($2) ? {class => $2, file => $3} : {};
next;
} elsif ($line =~ m/\s?<nowiki>/o) {
# nowiki block
$mode = "block/nowiki";
next;
} elsif ($line =~ m/^(\s{2})+([\*-])\s+(.+)/o) {
# lists
$mode = 'list';
$buf = [];
push @{ $buf }, $line;
next;
} elsif ($line =~ m/^\s?(>)+\s*(.+)/o) {
# quotes
my $level = $1 =~ tr/>/>/;
push @tree, [blockquote => {level => $level}, $2];
next;
} elsif ($line =~ m/^\s?[\|\^]/o) {
# table
$mode = 'table';
$buf = [];
push @{ $buf }, $line;
next;
} elsif ($line =~ m/^\s{2}(\S.+)/o) {
# code idented with two spaces
$mode = 'code';
$buf = $line . "\n";
next;
} elsif ($line =~ m/^\s?(\S.+)/o) {
# nonempty line
push @tree, [p => {}, $self->_parse_text($1)];
next;
} elsif ($line =~ m/^\s*$/o) {
# empty lines
push @tree, [br => {}];
$mode = '';
next;
} }
# catchall
printf "Unmatched % 3d: %s\n", $linenum, $line;
} }
if (scalar @{ $self->{footnotes} }) { if (scalar @{ $self->{footnotes} }) {

Loading…
Cancel
Save