<?php
// This may look a bit strange, but all possible formatting tags have to be in a single regular expression for this to work correctly. Yup!
// #dotmg [many lines] : Unclosed tags fix! For more info, m.randimbisoa@dotmg.net
// JavaWoman - corrected and improved unclosed tags handling, including missing ones and indents
// ------------- define the necessary functions -----------
{
function close_indents(&$indentClosers,&$oldIndentLevel) # JW 2005-07-11 removed superfluous variables
{
$result='';
$c =
count($indentClosers);
for ($i = 0; $i < $c; $i++)
{
$br = 0;
}
$oldIndentLevel = 0;
return $result;
}
}
{
function wakka2callback($things)
{
$result='';
static $oldIndentType =
'';
# JW 2005-07-12 added
#static $oldIndentLength= 0; # JW 2005-07-12 removed superfluous variables
#static $newIndentSpace = array(); # JW 2005-07-12 removed superfluous variables
static $trigger_monospace =
0;
static $trigger_underline =
0;
static $trigger_floatr =
0;
# JW added
global $wakka;
# @@@ should be capitalized but requires change in wikka.php (etc.)
if ((!
is_array($things)) &&
($things ==
'closetags'))
{
$result .= close_indents($indentClosers,$oldIndentLevel); # JW 2005-07-11 removed superfluous variables
if ($trigger_bold % 2) $result .= '</strong>';
if ($trigger_italic % 2) $result .= '</em>';
if ($trigger_keys % 2) $result .= '</kbd>';
if ($trigger_monospace % 2) $result .= '</tt>';
if ($trigger_underline % 2) $result .= '</span>';
if ($trigger_notes % 2) $result .= '</span>';
if ($trigger_strike % 2) $result .= '</span>';
if ($trigger_inserted % 2) $result .= '</span>';
if ($trigger_deleted % 2) $result .= '</span>';
if ($trigger_center % 2) $result .= '</div>';
if ($trigger_floatl % 2) $result .= '</div>';
if ($trigger_floatr % 2) $result .= '</div>'; # JW added
for ($i = 1; $i<=5; $i ++)
{
if ($trigger_l[$i] % 2) $result .= ("</h$i>");
}
$trigger_bold = $trigger_italic = $trigger_keys = $trigger_monospace = 0;
$trigger_underline = $trigger_notes = $trigger_strike = $trigger_inserted = $trigger_deleted = 0;
$trigger_center = $trigger_floatl = $trigger_floatr = 0;
$trigger_l =
array(-
1,
0,
0,
0,
0,
0);
return $result;
}
else
{
$thing = $things[1];
}
// convert HTML thingies (including ampersand NOT part of entity)
if ($thing == '<')
return '<';
else if ($thing == '>')
return '>';
else if ($thing == '&')
return '&';
// JW 2005-05-23: changed floats handling so they can be nested (one type within another only)
// float box left
else if ($thing == '<<')
{
#return (++$trigger_floatl % 2 ? '<div class="floatl">'."\n" : "\n</div>\n");
return (++$trigger_floatl % 2 ? '<div class="floatl">' : '</div>'); # JW changed (no newline)
}
// float box right
else if ($thing == '>>')
{
#return (++$trigger_floatl % 2 ? '<div class="floatr">'."\n" : "\n</div>\n");
return (++$trigger_floatr % 2 ? '<div class="floatr">' : '</div>'); # JW changed (trigger, no newline)
}
// clear floated box
else if ($thing == '::c::')
{
return ('<div class="clear"> </div>'."\n");
}
// keyboard
else if ($thing == '#%')
{
return (++$trigger_keys % 2 ? '<kbd class="keys">' : '</kbd>');
}
// bold
else if ($thing == '**')
{
return (++$trigger_bold % 2 ? '<strong>' : '</strong>');
}
// italic
else if ($thing == '//')
{
return (++$trigger_italic % 2 ? '<em>' : '</em>');
}
// monospace
else if ($thing == '##')
{
return (++$trigger_monospace % 2 ? '<tt>' : '</tt>');
}
// underline
else if ($thing == '__')
{
return (++$trigger_underline % 2 ? '<span class="underline">' : '</span>');
}
// notes
else if ($thing == "''")
{
return (++$trigger_notes % 2 ? '<span class="notes">' : '</span>');
}
// strikethrough
else if ($thing == '++')
{
return (++$trigger_strike % 2 ? '<span class="strikethrough">' : '</span>');
}
// additions
else if ($thing == '££')
{
return (++$trigger_inserted % 2 ? '<span class="additions">' : '</span>');
}
// deletions
else if ($thing == '¥¥')
{
return (++$trigger_deleted % 2 ? '<span class="deletions">' : '</span>');
}
// center
else if ($thing == '@@')
{
return (++$trigger_center % 2 ? '<div class="center">'."\n" : "\n</div>\n");
}
// urls
else if (preg_match('/^([a-z]+:\/\/\S+?)([^[:alnum:]^\/])?$/',
$thing,
$matches))
{
$url = $matches[1];
if (preg_match('/^(.*)\.(gif|jpg|png)/si',
$url)) {
return '<img src="'.$url.'" alt="image" />'.$matches[2];
} else
// Mind Mapping Mod
return $wakka->Action('mindmap '.$url);
} else
return $wakka->Link($url).$matches[2];
}
// header level 5
else if ($thing == '==')
{
$br = 0;
return (++$trigger_l[5] % 2 ? '<h5>' : "</h5>\n");
}
// header level 4
else if ($thing == '===')
{
$br = 0;
return (++$trigger_l[4] % 2 ? '<h4>' : "</h4>\n");
}
// header level 3
else if ($thing == '====')
{
$br = 0;
return (++$trigger_l[3] % 2 ? '<h3>' : "</h3>\n");
}
// header level 2
else if ($thing == '=====')
{
$br = 0;
return (++$trigger_l[2] % 2 ? '<h2>' : "</h2>\n");
}
// header level 1
else if ($thing == '======')
{
$br = 0;
return (++$trigger_l[1] % 2 ? '<h1>' : "</h1>\n");
}
// forced line breaks
else if ($thing == "---")
{
return '<br />';
}
// escaped text
else if (preg_match('/^""(.*)""$/s',
$thing,
$matches))
{
/*
echo 'embedded content<br/>';
*/
// get config
# $allowed_double_doublequote_html = $wakka->GetConfigValue('double_doublequote_html');
$ddquotes_policy = $wakka->config['double_doublequote_html'];
/*
echo 'double quotes: '.$ddquotes_policy.'<br/>';
*/
// get embedded code
$embedded = $matches[1];
// handle embedded id attributes for 'safe' and 'raw'
if ($ddquotes_policy == 'safe' || $ddquotes_policy == 'raw')
{
// get tags with id attributes
$patTagWithId = '((<[a-z].*?)(id=("|\')(.*?)\\4)(.*?>))';
// with PREG_SET_ORDER we get an array for each match: easy to use with list()!
// we do the match case-insensitive so we catch uppercase HTML as well;
// SafeHTML will treat this but 'raw' may end up with invalid code!
$tags2 =
preg_match_all('/'.
$patTagWithId.
'/i',
$embedded,
$matches2,PREG_SET_ORDER
);
# use backref to match both single and double quotes
/*
echo '# of matches (2): '.$tags2.'<br/>';
echo '<!--found (set order):'."\n";
print_r($matches2);
echo '-->'."\n";
*/
// step through code, replacing tags with ids with tags with new ('repaired') ids
$tmpembedded = $embedded;
$newembedded = '';
for ($i=0; $i < $tags2; $i++)
{
list(,$tag,$tagstart,$attrid,$quote,$id,$tagend) = $matches2[$i]; # $attrid not needed, just for clarity
$parts =
explode($tag,
$tmpembedded,
2);
# split in two at matched tag
if ($id != ($newid = $wakka->makeId('embed',$id))) # replace if we got a new value
{
/*
echo 'replacing tag - old id: '.$id.' new id: '.$newid.'<br/>';
*/
$tag = $tagstart.'id='.$quote.$newid.$quote.$tagend;
}
/*
echo "<!--old: $tag -->\n";
echo "<!--new: $replacetag -->\n";
*/
$newembedded .= $parts[0].$tag; # append (replacement) tag to first part
$tmpembedded = $parts[1]; # after tag: next bit to handle
}
$newembedded .= $tmpembedded; # add last part
/*
echo '<!--translation:'."\n";
echo $newembedded;
echo '-->'."\n";
*/
}
// return (treated) embedded content according to config
// NOTE: we apply SafeHTML *after* id treatment so it won't be throwing away invalid ids that we're repairing instead!
switch ($ddquotes_policy)
{
case 'safe':
return $wakka->ReturnSafeHTML($newembedded);
case 'raw':
return $newembedded; # may still be invalid code - 'raw' will not be corrected!
default:
return $wakka->htmlspecialchars_ent($embedded); # display only
}
}
// code text
else if (preg_match('/^% %(.*?)% %$/s',
$thing,
$matches))
{
/*
* Note: this routine is rewritten such that (new) language formatters
* will automatically be found, whether they are GeSHi language config files
* or "internal" Wikka formatters.
* Path to GeSHi language files and Wikka formatters MUST be defined in config.
* For line numbering (GeSHi only) a starting line can be specified after the language
* code, separated by a ; e.g., % %(php;27)....% %.
* Specifying >= 1 turns on line numbering if this is enabled in the configuration.
*/
$code = $matches[1];
// if configuration path isn't set, make sure we'll get an invalid path so we
// don't match anything in the home directory
$geshi_hi_path =
isset($wakka->
config['geshi_languages_path']) ?
$wakka->
config['geshi_languages_path'] :
'/:/';
$wikka_hi_path =
isset($wakka->
config['wikka_highlighters_path']) ?
$wakka->
config['wikka_highlighters_path'] :
'/:/';
// check if a language (and starting line) has been specified
if (preg_match("/^\((.+?)(;([0-9]+))??\)(.*)$/s",
$code,
$matches))
{
list(, $language, , $start, $code) = $matches;
}
// get rid of newlines at start and end (and preceding/following whitespace)
// Note: unlike trim(), this preserves any tabs at the start of the first "real" line
// check if GeSHi path is set and we have a GeSHi hilighter for this language
if (isset($language) &&
isset($wakka->
config['geshi_path']) &&
file_exists($geshi_hi_path.
'/'.
$language.
'.php'))
{
// use GeSHi for hilighting
$output = $wakka->GeSHi_Highlight($code, $language, $start);
}
// check Wikka highlighter path is set and if we have an internal Wikka hilighter
elseif (isset($language) &&
isset($wakka->
config['wikka_formatter_path']) &&
file_exists($wikka_hi_path.
'/'.
$language.
'.php') &&
'wakka' !=
$language)
{
// use internal Wikka hilighter
$output = '<div class="code">'."\n";
$output .= $wakka->Format($code, $language);
$output .= "</div>\n";
}
// no language defined or no formatter found: make default code block;
// IncludeBuffered() will complain if 'code' formatter doesn't exist
else
{
$output = '<div class="code">'."\n";
$output .= $wakka->Format($code, 'code');
$output .= "</div>\n";
}
#return $output;
// START DarTar modified 2005-02-17
// slight mod JavaWoman 2005-06-12: coding style, class for form
//build form
$form = $wakka->FormOpen('grabcode','','post','','grabcode');
$form .= '<input type="submit" name="save" class="grabcodebutton" style="line-height:10px; float:right; vertical-align: middle; margin-right:20px; margin-top:0px; font-size: 10px; color: #000; font-weight: normal; font-family: Verdana, Arial, sans-serif; background-color: #DDD; text-decoration: none; height:18px;" value="Grab" title="Download this code" />';
$form .= '<input type="hidden" name="code" value="'.urlencode($code).'" />';
$form .= $wakka->FormClose();
// output
return $output."\n".$form;
// END DarTar modified 2005-02-17
}
// forced links
// \S : any character that is not a whitespace character
// \s : any whitespace character
else if (preg_match('/^\[\[(\S*)(\s+(.+))?\]\]$/s',
$thing,
$matches)) # recognize forced links across lines
{
list(, $url, , $text) = $matches;
if ($url)
{
//if ($url!=($url=(preg_replace("/@@|££||\[\[/","",$url))))$result="</span>";
if (!$text) $text = $url;
//$text=preg_replace("/@@|££|\[\[/","",$text);
return $result.$wakka->Link($url,'', $text);
}
else
{
return '';
}
}
// indented text
# JW FIXED 2005-07-09 accented chars not used for ordered lists
# JW FIXED 2005-07-12 this does not cover the case where a list item is followed by an inline comment of the *same* level
# JW FIXED 2005-07-12 as with the expression in the /edit handler this does not cover tab or ~ at the start of the document
elseif (preg_match('/(^|\n)([\t~]+)(-|&|[0-9a-zA-Z]+\))?(\n|$)/s',
$thing,
$matches))
{
$br = 0; # no break needed after a block
// get new indent level
$newIndentLevel =
strlen($matches[2]);
# JW 2005-07-12 also match tab or ~ at start of document
// derive code indent
$nlTabs = "\n".$codeIndent;
$nlTabsOut = $nlTabs."\t";
// find out which indent type we want
$newIndentType = $matches[3]; # JW 2005-07-12 also match tab or ~ at start of document
// derive code fragments
if ($newIndentType == '') # plain indent
{
$opener = '<div class="indent">';
$closer = '</div>'/*.$nlTabs*/;
}
elseif ($newIndentType == '-') # unordered list
{
$opener = '<ul>'.$nlTabs.'<li>';
$closer = '</li>'.$nlTabs.'</ul>';
}
elseif ($newIndentType == '&') # inline comment
{
$opener = '<ul class="thread">'.$nlTabs.'<li>';
$closer = '</li>'.$nlTabs.'</ul>';
}
else # ordered list
{
$opener = '<ol type="'.substr($newIndentType, 0, 1).'">'.$nlTabs.'<li>';
$closer = '</li>'.$nlTabs.'</ol>';
$newIndentType = 'o';
}
// do an indent
if ($newIndentLevel > $oldIndentLevel)
{
for ($i = 0; $i < $newIndentLevel - $oldIndentLevel; $i++)
{
$result .= $nlTabs./*'<!--nested item '.$newIndentLevel.'-->'.*/$opener;
#$result .= '<!--pushed type: '.$oldIndentType.' -->'; # @@@
array_push($indentTypes,
$oldIndentType);
# remember type hierarchically
}
}
// do an outdent or stay at the same level
else if ($newIndentLevel <= $oldIndentLevel)
{
$bOutdent = FALSE;
if ($newIndentLevel < $oldIndentLevel)
{
$bOutdent = TRUE; # remember we're outdenting, for correct layout
// do the outdenting
for ($i = 0; $i < $oldIndentLevel - $newIndentLevel; $i++)
{
if ($i > 0)
{
$result .= $nlTabsOut;
}
$result .=
array_pop($indentClosers)/*.'<!--outdent to '.$newIndentLevel.'-->'*/;
$oldIndentType =
array_pop($indentTypes);
# make sure we will compare with "correct" previous type
#$result .= '<!--popped type: '.$oldIndentType.' -->'; # @@@
}
}
if ($bOutdent) # outdenting: put close tag on new line
{
$result .= $nlTabs/*.'<!--outdent: close tag on new line-->'*/;
}
// JW 2005-07-11 new item of different type
if ($newIndentType != $oldIndentType)
{
$result .= /*'<!--type change follows (old: '.$oldIndentType.' new: '.$newIndentType.') -->'.*/$nlTabs.$opener;
}
// new item of same type
else
{
// plain indent
if ($newIndentType == '')
{
$result .= $closer./*'<!--same type ('.$newIndentType.') same level-->'.*/$nlTabs.$opener;
}
// list or inline comment
else
{
$result .= '</li>'.$nlTabs.'<li>'/*.'<!--back to same type-->'*/;
}
}
}
$oldIndentType = $newIndentType; # remember type sequentially
$oldIndentLevel = $newIndentLevel;
return $result;
}
// new lines
else if ($thing == "\n")
{
// if we got here, there was no tab (or ~) in the next line; this means that we can close all open indents.
// JW: we need to do the same thing at the end of the page to close indents NOT followed by newline: use a function
/*
$c = count($indentClosers);
for ($i = 0; $i < $c; $i++)
{
$result .= array_pop($indentClosers);
$br = 0;
}
$oldIndentLevel = 0;
#$oldIndentLength= 0; # superfluous
#$newIndentSpace=array(); # superfluous
*/
$result .= close_indents($indentClosers,$oldIndentLevel); # JW 2005-07-11 removed superfluous variables
$result .= ($br) ? "<br />\n" : "\n";
$br = 1;
return $result;
}
// Actions
else if (preg_match('/^\{\{(.*?)\}\}$/s',
$thing,
$matches))
{
if ($matches[1])
return $wakka->Action($matches[1]);
else
return '{{}}';
}
// interwiki links!
else if (preg_match('/^[A-ZÄÖÜ][A-Za-zÄÖÜßäöü]+[:]\S*$/s',
$thing))
{
return $wakka->Link($thing);
}
// wiki links!
else if (preg_match('/^[A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*$/s',
$thing))
{
return $wakka->Link($thing);
}
// separators
{
// TODO: This could probably be improved for situations where someone puts text on the same line as a separator.
// Which is a stupid thing to do anyway! HAW HAW! Ahem.
$br = 0;
return "<hr />\n";
}
// mind map xml
else if (preg_match('/^<map.*<\/map>$/s',
$thing))
{
return $wakka->Action('mindmap '.$wakka->Href().'/mindmap.mm');
}
// if we reach this point, it must have been an accident.
// @@@ JW: or a detailed regex that excludes something that was included in the
// preg_replace_callback expression
return $thing;
}
}
{
/**
* "Afterburner" formatting: extra handling of already-generated XHTML code.
*
* 1.
* Ensure every heading has an id, either specified or generated. (May be
* extended to generate section TOC data.)
* If an id is specified, that is used without any modification.
* If no id is specified, it is generated on the basis of the heading context:
* - any image tag is replaced by its alt text (if specified)
* - all tags are stripped
* - all characters that are not valid in an id are stripped (except whitespace)
* - the resulting string is then used by makedId() to generate an id out of it
*
* @access private
* @uses Wakka::makeId()
*
* @param array $things required: matches of the regex in the preg_replace_callback
* @return string heading with an id attribute
*/
function wakka3callback($things)
{
$thing = $things[1];
// heading
if (preg_match('#^<(h[1-6])(.*?)>(.*?)</\\1>$#s',
$thing,
$matches)) # note that we don't match headings that are not valid XHTML!
{
/*
echo 'heading:<pre>';
print_r($matches);
echo '</pre>';
*/
list($element,$tagname,$attribs,$heading) = $matches;
#if (preg_match('/(id=("|\')(.*?)\\2)/',$attribs,$matches)) # use backref to match both single and double quotes
if (preg_match('/(id=("|\')(.*?)\\2)/',
$attribs)) # use backref to match both single and double quotes
{
// existing id attribute: nothing to do (assume already treated as embedded code)
// @@@ we *may* want to gather ids and heading text for a TOC here ...
// heading text should then get partly the same treatment as when we're creating ids:
// at least replace images and strip tags - we can leave entities etc. alone - so we end up with
// plain text-only
// do this if we have a condition set to generate a TOC
return $element;
}
else
{
// no id: we'll have to create one
#echo 'no id provided - create one<br/>';
$tmpheading =
trim($heading);
// first find and replace any image with its alt text
// @@@ can we use preg_match_all here? would it help?
while (preg_match('/(<img.*?alt=("|\')(.*?)\\2.*?>)/',
$tmpheading,
$matches))
{
#echo 'image found: '.$tmpheading.'<br/>';
# 1 = whole element
# 3 = alt text
list(,$element, ,$alttext) = $matches;
/*
echo 'embedded image:<pre>';
print_r($matches);
echo '</pre>';
*/
// gather data for replacement
$search = '/'.str_replace('/','\/',$element).'/'; # whole element (delimiter chars escaped!) @@@ use preg_quote as well?
$replace =
trim($alttext);
# alt text
/*
echo 'pat_repl:<pre>';
echo 'search: '.$search.'<br/>';
echo 'search: '.$replace.'<br/>';
echo '</pre>';
*/
// now replace img tag by corresponding alt text
$tmpheading =
preg_replace($search,
$replace,
$tmpheading);
# replace image by alt text
}
$headingtext = $tmpheading;
#echo 'headingtext (no img): '.$headingtext.'<br/>';
// @@@ 2005-05-27 now first replace linebreaks <br/> with spaces!!
// remove all other tags
#echo 'headingtext (no tags): '.$headingtext.'<br/>';
// @@@ this all-text result is usable for a TOC!!!
// do this if we have a condition set to generate a TOC
// replace entities that can be interpreted
// use default charset ISO-8859-1 because other chars won't be valid for an id anyway
// remove any remaining entities (so we don't end up with strange words and numbers in the id text)
#echo 'headingtext (entities decoded/removed): '.$headingtext.'<br/>';
// finally remove non-id characters (except whitespace which is handled by makeId())
$headingtext =
preg_replace('/[^A-Za-z0-9_:.-\s]/',
'',
$headingtext);
#echo 'headingtext (id-ready): '.$headingtext.'<br/>';
// now create id based on resulting heading text
$id = $wakka->makeId('hn',$headingtext);
#echo 'id: '.$id.'<br/>';
// rebuild element, adding id
return '<'.$tagname.$attribs.' id="'.$id.'">'.$heading.'</'.$tagname.'>';
}
}
// other elements to be treated go here (tables, images, code sections...)
}
}
// ------------- do the work -----------
// replace 4 consecutive spaces at the beginning of a line with tab character
// $text = preg_replace("/\n[ ]{4}/", "\n\t", $text); // moved to edit.php
if ($this->method == 'show') $mind_map_pattern = '<map.*?<\/map>|'; else $mind_map_pattern = '';
// define entity patterns
// NOTE most also used in wikka.php for htmlentities_ent(): REGEX library!
$alpha = '[a-z]+'; # character entity reference
$numdec = '#[0-9]+'; # numeric character reference (decimal)
$numhex = '#x[0-9a-f]+'; # numeric character reference (hexadecimal)
$terminator = ';|(?=($|[\n<]|<))'; # semicolon; or end-of-string, newline or tag
$entitypat = '('.$alpha.'|'.$numdec.'|'.$numhex.')('.$terminator.')'; # defines entity pattern without the starting &
$entityref = '&'.$entitypat; # entity reference
$loneamp = '&(?!'.$entitypat.')'; # ampersand NOT part of an entity
$this->callLevel++; # JW 2005-07-15 recursion level: getting in
'/('.
'% %.*?% %|'. # code
'"".*?""|'. # literal
$mind_map_pattern.
'\[\[[^\[]*?\]\]|'. # forced link
'-{4,}|---|'. # separator, new line
'\b[a-z]+:\/\/\S+|'. # URL
'\*\*|\'\'|\#\#|\#\%|@@|::c::|\>\>|\<\<|££|¥¥|\+\+|__|\/\/|'. # Wiki markup
'======|=====|====|===|==|'. # headings
'(^|\n)([\t~]+)(-|&|[0-9a-zA-Z]+\))?|'. # indents and lists # JW FIXED 2005-07-12 also match tab or ~ at start of document
'\{\{.*?\}\}|'. # action
'\b[A-ZÄÖÜ][A-Za-zÄÖÜßäöü]+[:](?![=_])\S*\b|'. # InterWiki link
'\b([A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)\b|'. # CamelWords
'<|>|'. # HTML special chars - after wiki markup!
$loneamp.'|'. # HTML special chars - ampersand NOT part of an enity
'\n'. # new line
')/ms','wakka2callback',$text);
// we're cutting the last <br />
$this->callLevel--; # JW 2005-07-15 recursion level: getting out
if ($this->callLevel == 0) # JW 2005-07-15 only for "outmost" call level
{
$text .= wakka2callback('closetags'); # JW changed logic
}
// add ids to heading elements
// @@@ LATER:
// - extend with other elements (tables, images, code blocks)
// - also create array(s) for TOC(s)
$idstart = getmicrotime();
'#('.
'<h[1-6].*?>.*?</h[1-6]>'.
// other elements to be treated go here
')#ms','wakka3callback',$text);
printf('<!-- Header id generation took %.6f seconds -->',
(getmicrotime
() -
$idstart));
?>