|
|
|
package Text::Dokuwiki::Regexps;
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use utf8;
|
|
|
|
|
|
|
|
my $SP = '[\ \t]';
|
|
|
|
my $EOL = '\r?\n';
|
|
|
|
our $regexps = {};
|
|
|
|
|
|
|
|
$regexps->{header} = qr/
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
$SP? # maybe one leading space
|
|
|
|
(?<line> ={2,6} ) # header marks
|
|
|
|
\s* # leading spaces
|
|
|
|
(?<header> \S.+ ) # header text (at least one non-empty character, but least small piece)
|
|
|
|
$SP* # trailing spaces
|
|
|
|
(?: \g{line} ) # header mark (exactly same as above)
|
|
|
|
(?: $EOL | $) # newline or file end
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
$regexps->{codeblock} = qr%
|
|
|
|
^ # first line
|
|
|
|
$SP? # maybe one leading space
|
|
|
|
< # opening tag start
|
|
|
|
(?<tag> code | file ) # tag name
|
|
|
|
(?: # optional block
|
|
|
|
(?: $SP+ (?<syntax>\S+) )?
|
|
|
|
(?: $SP+ (?<filename>\S+) )
|
|
|
|
)?
|
|
|
|
> # opening tag end
|
|
|
|
$SP* # maybe any number of trailing spaces
|
|
|
|
$EOL # end of first line
|
|
|
|
|
|
|
|
(?<block> # body of block
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
.* # contents
|
|
|
|
$EOL # line end (note: no $)
|
|
|
|
)*? # don't be greedy, other blocks may follow
|
|
|
|
) # block body end
|
|
|
|
|
|
|
|
^ # last line
|
|
|
|
$SP? # maybe one leading space
|
|
|
|
</\g{tag}> # the same tag as in first line
|
|
|
|
$SP* # maybe any number of trailing spaces
|
|
|
|
(?: $EOL | $) # end of last line
|
|
|
|
%mx;
|
|
|
|
|
|
|
|
$regexps->{table} = qr/
|
|
|
|
(?<table>
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
$SP? # maybe one leading space
|
|
|
|
[|^] # at next char is '|' (td) or '^' (th)
|
|
|
|
.* # rest of line
|
|
|
|
[|^] # ends with '|' or '^'
|
|
|
|
$SP* # maybe one or more trailing space(s)
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)+ # one or more such lines
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
$regexps->{pre} = qr/
|
|
|
|
(?<block>
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
$SP {2} # at least two spaces
|
|
|
|
.* # rest of line
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)+ # one or more lines
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
$regexps->{blockquote} = qr/
|
|
|
|
(?<block>
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
$SP? # maybe one leading space
|
|
|
|
[>]{1,} # quote marker(s)
|
|
|
|
.* # rest of line
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)+ # one or more lines
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
$regexps->{list} = qr/
|
|
|
|
(?<list>
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
$SP {2,} # two or more spaces
|
|
|
|
[*-] # start marker of the list item
|
|
|
|
.+ # rest of line
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)+ # one or more lines
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
$regexps->{paragraph} = qr/
|
|
|
|
(?<text>
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
$SP? # optional leading space
|
|
|
|
\S.* # at least one printable symbol
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)+ # one or more lines
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
$regexps->{emptyline} = qr{
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
(?: $SP* ) # zero or more spaces
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)*
|
|
|
|
}mx;
|
|
|
|
|
|
|
|
$regexps->{hrule} = qr/
|
|
|
|
(?:
|
|
|
|
^ # line start
|
|
|
|
(?: -{4,} ) # four or more dashes
|
|
|
|
(?: $EOL | $ ) # newline or file end
|
|
|
|
)
|
|
|
|
/mx;
|
|
|
|
|
|
|
|
1;
|