Revision history for ChecklinksAction


Revision [19762]

Last edited on 2008-03-30 16:46:28 by DomBonj [v0.94: coding standards; 1.1.6.4 compatibility]
Additions:
- Wikka 1.1.6.2 & 1.1.6.3 & 1.1.6.4
# @version 0.94
function CLerror ($msg)
{
return ('<em class="error">'. $msg .'</em><br />');
function CLsummary ($goodlinks, $badlinks)
{
$linknames = array('ac'=>CL_NAME_AC, 'ex'=>CL_NAME_EX, 'iw'=>CL_NAME_IW, 'wn'=>CL_NAME_WN, 'fi'=>CL_NAME_FI);
$table_css = "class='data' cellpadding='2' cellspacing='1' border='2'";
$colheads = split(',', CL_COLNAME_SUMMARY);
$sumgood = 0; $sumbad = 0;
$output = sprintf ("<table $table_css><tr class='comment'><th>%s</th><th>%s</th><th>%s</th></tr>", $colheads[0], $colheads[1], $colheads[2]);
foreach ($goodlinks as $linktype => $cnt)
{
$output .= "<tr><td>{$linknames[$linktype]}</td><td align='right'>{$cnt}</td><td align='right'>".(empty($badlinks[$linktype])?0:$badlinks[$linktype])."</td></tr>";
$sumgood += $cnt;
$sumbad += (empty($badlinks[$linktype])? 0 : $badlinks[$linktype]);
$output .= "<tr class='comment'><td>{$colheads[3]}</td><td align='right'>$sumgood</td><td align='right'>$sumbad</td></tr></table>";
return $output;
function CLreport ($thisone, $badlinks, $keyorder)
{
$linknames = array('ac'=>CL_NAME_AC, 'ex'=>CL_NAME_EX, 'iw'=>CL_NAME_IW, 'wn'=>CL_NAME_WN, 'fi'=>CL_NAME_FI);
$table_css = "class='wikka' cellpadding='2' cellspacing='1' border='2'";
$colheads = split(',', CL_COLNAME_DETAILED);
$output = sprintf ("<table $table_css><tr class='comment'><th>%s</th><th>%s</th><th>%s</th><th>%s</th><th>%s</th></tr>", $colheads[0], $colheads[1], $colheads[2], $colheads[3], $colheads[4]);
foreach ($keyorder as $tag => $val)
{
$cnt = $badlinks[$tag];
preg_match('/^(.+) (\w\w)\/(.+?)\*(.+)$/', $tag, $matches);
$pagelink = "<a href='". $thisone->Href('', $matches[1]) ."'>{$matches[1]}</a>";
$link = ($matches[2] == 'ex') ? ("<a href='{$matches[3]}'>".substr($matches[3], 0, CL_MAX_LINK_LENGTH).((strlen($matches[3])>CL_MAX_LINK_LENGTH)?'...':'').'</a>')
: (($matches[2] == 'fi') ? ("<a href='file://{$matches[3]}'>".substr($matches[3], 0, CL_MAX_LINK_LENGTH).((strlen($matches[3])>CL_MAX_LINK_LENGTH)?'...':'').'</a>') : $matches[3]);
$output .= "<tr><td>{$linknames[$matches[2]]}</td><td>$pagelink</td><td>$link</td><td>{$matches[4]}</td><td align='right'>$cnt</td></tr>";
$output .= '</table>';
return $output;
function CLcheck_page($fp, $page, $hostname, $firstcall)
{
$filestatus = CL_FILE_NOHTTP;
$tmp = fputs ($fp, sprintf( "HEAD %s HTTP/1.0\r\nHost: %s\r\nUser-Agent: WikkaCheckLinks/1.0\r\n\r\n", $page, $hostname));
for ($try = 1; ($try <= CL_MAX_TRY) && ($filestatus == CL_FILE_NOHTTP); $try++)
{
if (($http_reply = fgets($fp, 256)) == NULL)
{
break;
if (preg_match('/^HTTP\/(\d)\.(\d)\s+(\d+)\s+(.*)$/', $http_reply, $matches ))
{
$filestatus = ($matches[4]) ? trim($matches[4]) : trim($matches[3]);
if ($firstcall && ($matches[3] == '100'))
{ // in HTTP/1.1, '100' means Continue
else if (substr($matches[3], 0, 1) == '3')
{ // redirection: let's find the new location
while (!feof($fp))
{
$reply .= fgets($fp, 256);
if (preg_match('/^Location:\s+(\S+)\s*$/m', $reply, $matches1))
{
$filestatus = 'MOV '. $matches1[1];
return $filestatus;
function CLcheck_link($url, $p='')
{
static $statuses = array();
static $hostnames = array();
$now = time();
$purl = parse_url($url);
$proto = isset($purl['scheme']) ? $purl['scheme'] : 'http';
$port = isset($purl['port']) ? $purl['port'] : '';
$path = isset($purl['path']) ? $purl['path'] : '/';
$suffix = isset($purl['query']) ? $purl['query'] : '';
if ((empty($purl['host'])) || ($proto!='http')&&($proto!='https')&&($proto!='ftp'))
{
else
{
$hostname = strtolower($purl['host']);
if (preg_match('/^d+.d+.d+.d+/', $hostname))
{ // the host is an IP address
$ip = $hostname;
else
{ // the host is a domain name, so we have to resolve it first
$from_cache = false;
if (isset($hostnames[$hostname]))
{ // have we tried to resolve it not so long ago?
if (($now - $hostnames[$hostname][1]) <= CL_CACHE_LIFETIME)
{
$ip = $hostnames[$hostname][0];
$from_cache = true;
if (!$from_cache)
{
$ip = gethostbyname($hostname);
// if hostname not resolvable, gethostbyname returns its argument unchanged
if ($ip === $hostname)
{
$ip = '';
}
// cache this resolve
else
{
$hostnames[$hostname] = array($ip, $now);
if (!$ip)
{ // was the hostname unresolvable?
$serverstatus = CL_HOST_NOT_FOUND;
else
{
if (!$port)
{
if ($proto == 'http')
{
$port = 80;
elseif ($proto == 'https')
{
$port = 443;
elseif ($proto == 'ftp')
{
$port = 21;

$key = "$ip:$port";
// have we checked the server not so long ago?
$from_cache = false;
if (isset($statuses[$key]))
{
if (($now - $statuses[$key][2]) <= CL_CACHE_LIFETIME)
{
$serverstatus = $statuses[$key][0];
if ($serverstatus == CL_SERVER_OK)
{
$from_cache = true;
}
if (!$from_cache || ($from_cache && ($serverstatus == CL_SERVER_OK)))
{ // we have to check the server, or the host is ok so check the file
$errno = 0;
$errstr = '';
if ($fp = fsockopen($ip, $port, $errno, $errstr, CL_CX_TIMEOUT))
{
$serverstatus = CL_SERVER_OK;
$filestatus = CL_FILE_NOHTTP;
$page = ($suffix) ? $path .'?'. $suffix : $path;
$filestatus = CLcheck_page($fp, $page, $hostname, false);
else
{ // could not connect to server
if (preg_match('/timed?[- ]?out/i', $errstr))
{
$serverstatus = CL_HOST_TIMEOUT;
}
elseif (preg_match('/refused/i', $errstr))
{
$serverstatus = CL_HOST_REJECT;
}
else
{
$serverstatus = CL_HOST_UNREACH;
}
// cache this (server, file) pair
$statuses[$key] = array($serverstatus, $filestatus, $now);
if ($filestatus == '200')
{
$filestatus = 'OK';
else if ($filestatus == '302')
{
$filestatus = 'OK';
$output = ($serverstatus != CL_SERVER_OK) ? $serverstatus : $filestatus;
return ($output);
$output = '';
if ( (isset($vars['opts']) && (!preg_match("/^[tiw]{1,3}$/i", $vars['opts'])))
|| (isset($vars['scope']) && (!preg_match("/^(page|user|all)$/i", $vars['scope'])))
|| (isset($vars['sort']) && (!preg_match("/^(tag|type|cnt)$/i", $vars['sort']))) )
$output .= CLerror("Usage: checklinks [scope=\"user|page|all\"] [sort=\"tag|type|cnt\"] [opts=\"[i][t][w]\"]");
else
foreach ($rows as $row)
{
$query = "SELECT * FROM ".$this->config['table_prefix']."pages WHERE ((latest = 'Y') AND (user <> 'WikkaInstaller')";
{
$query .= '';
{
$query .= " AND (tag = '". $this->GetPageTag() ."')";
else
{ // default value
$query .= " AND (owner = '". $this->GetUserName() ."')";
$query .= ')';
foreach ($rows as $row)
{
if ($this->HasAccess('read', $row['tag']))
{ // this page is to be scanned: pretend your are this page
$this->SetPage($this->LoadPage($row['tag']));
$tmppage = $this->page['body'];
// get rid of raw HTML and code blocks
$tmppage = preg_replace("/\"\"(.*?)\"\"/s", '', $tmppage);
$tmppage = preg_replace("/\%\%(.*?)\%\%/s", '', $tmppage);
// 1. is page tag formatted as a valid WikiName ?
if (!preg_match("/^([A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)$/", $row['tag'], $matches) && $show_badwnames)
{
$badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME] = (empty($badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME])) ? 1 : $badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME]+1;
$badcnts['wn'] += 1;
// 2. check actions
preg_match_all("/\{\{(.*?)\}\}/", $tmppage, $matches);
foreach ($matches[1] as $actionname)
{
if (preg_match("/^([A-Za-z0-9]+)/", trim($actionname), $matches1))
{
if (!file_exists($this->config['action_path']."/".$matches1[1].".php"))
{
$badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE]+1;
$badcnts['ac'] += 1;
else
{
$goodcnts['ac'] +=1;
// now get rid of actions to avoid confusion
$tmppage = preg_replace("/\{\{(.*?)\}\}/", '', $tmppage);
// 3. check interwiki links
preg_match_all("/([A-ZÄÖÜ][A-Za-zÄÖÜßäöü]+)[:](\S*)\b/", $tmppage, $matches);
foreach ($matches[1] as $interwikiname)
{
if (!$this->GetInterWikiUrl(trim($interwikiname), ''))
{
$badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI] = (empty($badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI])) ? 1 : $badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI]+1;
$badcnts['iw'] += 1;
{
$goodcnts['iw'] +=1;
// now get rid of interwiki links to avoid confusion
$tmppage = preg_replace("/([A-ZÄÖÜ][A-Za-zÄÖÜßäöü]+[:]\S*)\b/", '', $tmppage);
// now check hyperlinks; first, prevent recursive calling
$page = preg_replace('/\{\{\s*checklinks\b.*?\}\}/i', '', $this->page['body']);
// do not count twice non-existent links
$page = preg_replace('/\{\{\s*wantedpages\s*\}\}/i', '', $page);
// render the page
$html = $this->Format($page, 'wakka');
if (preg_match_all("/href\=[\"|\']((http|https|ftp):\/\/[^\\s\"\'<>]+)/", $html, $matches))
{
foreach ($matches[1] as $url)
{ // 4. check intra-wiki links
if (preg_match('/'.$base_url.'([A-Za-zÄÖÜßäöü][A-Za-z0-9ÄÖÜßäöü]*)/', $url, $matches1))
{
$wikiname = $matches1[1];
if (!$exist[trim($wikiname)])
{
$badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE] = (empty($badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE])) ? 1 : $badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE]+1;
$badcnts['wn'] += 1;
}
else
{
$goodcnts['wn'] += 1;
}
else if ($show_external)
{ // 5. check external hyperlinks
$OK = (strtoupper($tmp = CLcheck_link($url)) == 'OK');
// allow at most 3 successive redirections
for ($i=1; !$OK && preg_match("/^MOV (.+)$/", $tmp, $matches1) && ($i<=CL_MAX_REDIRECTS); $i++)
{
$OK = (strtoupper($tmp = CLcheck_link(trim($matches1[1]))) == "OK");
}
if (!$OK)
{
$badlinks[$row['tag']." ex/{$url}*$tmp"] = (empty($badlinks[$row['tag']." ex/{$url}*$tmp"])) ? 1 : $badlinks[$row['tag']." ex/{$url}*$tmp"]+1;
$badcnts['ex'] += 1;
}
else
{
$goodcnts['ex'] += 1;
}
} // foreach $matches
} // if preg_match_all http
// 5. check href-ed files
if (preg_match_all("/href\=[\"|\']file:\/{2,}([^\\\"\'<>]+)[\'\"]/", $html, $matches))
{
foreach ($matches[1] as $rawfname)
{
if (!file_exists(rawurldecode($rawfname)))
{
$fname = str_replace(' ', ' ', $rawfname);
$badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE]+1;
$badcnts['fi'] += 1;
else
{
$goodcnts['fi'] += 1;
} // foreach $matches
} // if preg_match_all file
} // if $this->HasAcess
if (isset($vars['sort']))
{
if ($vars['sort'] == 'type')
{
$field = 2;
$fn = 'asort';
else if ($vars['sort'] == 'cnt')
{
$field = 'cnt';
$fn = 'arsort';
if (!$field)
{
$field = 1;
$fn = 'asort';
foreach ($badlinks as $index => $cnt)
{
$output .= '<h3>'. CL_SUMMARY .'</h3>'. CLsummary($goodcnts, $badcnts).'<br />';
{
$output .= '<h3>'. CL_DETAILED .'</h3>'. CLreport($this, $badlinks, $keyorder);
echo $output;
// avoid side-effect if there were footnotes on checked pages
if (function_exists('FNprint'))
FNprint($this, 'purge', '', $this->Href());
Deletions:
- Wikka 1.1.6.2 & 1.1.6.3
# @version 0.93
function CLerror ($msg) {
return ("<em class='error'>$msg</em><br />");
}
function CLsummary ($goodlinks, $badlinks) {
$linknames = array('ac'=>CL_NAME_AC, 'ex'=>CL_NAME_EX, 'iw'=>CL_NAME_IW, 'wn'=>CL_NAME_WN, 'fi'=>CL_NAME_FI);
$table_css = "class='wikka' cellpadding='2' cellspacing='1' border='2'";
$colheads = split(',', CL_COLNAME_SUMMARY);
$sumgood = 0; $sumbad = 0;
$output = sprintf ("<table $table_css><tr class='comment'><th>%s</th><th>%s</th><th>%s</th></tr>", $colheads[0], $colheads[1], $colheads[2]);
foreach ($goodlinks as $linktype => $cnt) {
$output .= "<tr><td>{$linknames[$linktype]}</td><td align='right'>{$cnt}</td><td align='right'>".(empty($badlinks[$linktype])?0:$badlinks[$linktype])."</td></tr>";
$sumgood += $cnt;
$sumbad += (empty($badlinks[$linktype])?0:$badlinks[$linktype]);
}
$output .= "<tr class='comment'><td>{$colheads[3]}</td><td align='right'>$sumgood</td><td align='right'>$sumbad</td></tr></table>";
return $output;
}
function CLreport ($this, $badlinks, $keyorder) {
$linknames = array('ac'=>CL_NAME_AC, 'ex'=>CL_NAME_EX, 'iw'=>CL_NAME_IW, 'wn'=>CL_NAME_WN, 'fi'=>CL_NAME_FI);
$table_css = "class='wikka' cellpadding='2' cellspacing='1' border='2'";
$colheads = split(',', CL_COLNAME_DETAILED);
$output = sprintf ("<table $table_css><tr class='comment'><th>%s</th><th>%s</th><th>%s</th><th>%s</th><th>%s</th></tr>", $colheads[0], $colheads[1], $colheads[2], $colheads[3], $colheads[4]);
foreach ($keyorder as $tag => $val) {
$cnt = $badlinks[$tag];
preg_match("/^(.+) (\w\w)\/(.+?)\*(.+)$/", $tag, $matches);
$pagelink = "<a href='".$this->Href("", $matches[1])."'>{$matches[1]}</a>";
$link = ($matches[2] == 'ex') ? ("<a href='{$matches[3]}'>".substr($matches[3], 0, CL_MAX_LINK_LENGTH).((strlen($matches[3])>CL_MAX_LINK_LENGTH)?"...":"")."</a>")
: (($matches[2] == 'fi') ? ("<a href='file://{$matches[3]}'>".substr($matches[3], 0, CL_MAX_LINK_LENGTH).((strlen($matches[3])>CL_MAX_LINK_LENGTH)?"...":"")."</a>") : $matches[3]);
$output .= "<tr><td>{$linknames[$matches[2]]}</td><td>$pagelink</td><td>$link</td><td>{$matches[4]}</td><td align='right'>$cnt</td></tr>";
}
$output .= "</table>";
return $output;
}
function CLcheck_page($fp, $page, $hostname, $firstcall) {
$filestatus = CL_FILE_NOHTTP;
$tmp = fputs ($fp, sprintf( "HEAD %s HTTP/1.0\r\nHost: %s\r\nUser-Agent: WikkaCheckLinks/1.0\r\n\r\n", $page, $hostname));
for ($try = 1; ($try <= CL_MAX_TRY) && ($filestatus == CL_FILE_NOHTTP); $try++) {
if (($http_reply = fgets($fp, 256)) == NULL)
break;
if (preg_match("/^HTTP\/(\d)\.(\d)\s+(\d+)\s+(.*)$/", $http_reply, $matches )) {
$filestatus = ($matches[4]) ? trim($matches[4]) : trim($matches[3]);
if ($firstcall && ($matches[3] == '100')) { // in HTTP/1.1, '100' means Continue
$filestatus = CLcheck_page($fp, $page, $hostname, false);
} else if (substr($matches[3], 0, 1) == '3') {
// redirection: let's find the new location
while (!feof($fp)) {
$reply .= fgets($fp, 256);
}
if (preg_match("/^Location:\s+(\S+)\s*$/m", $reply, $matches1))
$filestatus = "MOV ".$matches1[1];
}
}
}
return $filestatus;
}
function CLcheck_link($url, $p="") {
static $statuses=array();
static $hostnames=array();
$now = time();
$purl = parse_url($url);
$proto = isset($purl['scheme']) ? $purl['scheme'] : "http";
$port = isset($purl['port']) ? $purl['port'] : "";
$path = isset($purl['path']) ? $purl['path'] : "/";
$suffix = isset($purl['query']) ? $purl['query'] : "";
if ((empty($purl['host'])) || ($proto!='http')&&($proto!='https')&&($proto!='ftp')) {
$serverstatus = CL_HOST_NOT_FOUND;
} else {
$hostname = strtolower($purl['host']);
if (preg_match('/^d+.d+.d+.d+/', $hostname)) {
// the host is an IP address
$ip = $hostname;
} else { // the host is a domain name, so we have to resolve it first
$from_cache = false;
if (isset($hostnames[$hostname])) {
// have we tried to resolve it not so long ago?
if (($now - $hostnames[$hostname][1]) <= CL_CACHE_LIFETIME) {
$ip = $hostnames[$hostname][0];
$from_cache = true;
if (!$from_cache) {
$ip = gethostbyname($hostname);
// if hostname not resolvable, gethostbyname returns its argument unchanged
if ($ip === $hostname)
$ip = '';
// cache this resolve
$hostnames[$hostname] = array($ip, $now);
if (!$ip) { // was the hostname unresolvable?
} else {
if (!$port) {
if ($proto == 'http') { $port = 80; }
elseif ($proto == 'https') { $port = 443; }
elseif ($proto == 'ftp') { $port = 21; }
$key = "$ip:$port";
// have we checked the server not so long ago?
$from_cache = false;
if (isset($statuses[$key])) {
if (($now - $statuses[$key][2]) <= CL_CACHE_LIFETIME) {
$serverstatus = $statuses[$key][0];
if ($serverstatus == CL_SERVER_OK) $from_cache = true;
if (!$from_cache || ($from_cache && ($serverstatus == CL_SERVER_OK))) {
// we have to check the server, or the host is ok so check the file
$errno = 0;
$errstr = '';
if ($fp = fsockopen($ip, $port, $errno, $errstr, CL_CX_TIMEOUT)) {
$serverstatus = CL_SERVER_OK;
$filestatus = CL_FILE_NOHTTP;
$page = ($suffix) ? $path."?".$suffix : $path;
} else {
// could not connect to server
if (preg_match('/timed?[- ]?out/i', $errstr)) {
$serverstatus = CL_HOST_TIMEOUT;
} elseif (preg_match('/refused/i', $errstr)) {
$serverstatus = CL_HOST_REJECT;
} else {
$serverstatus = CL_HOST_UNREACH;
// cache this (server, file) pair
$statuses[$key] = array($serverstatus, $filestatus, $now);
}
if ($filestatus == '200')
$filestatus = 'OK';
else if ($filestatus == '302')
$filestatus = 'OK';
$output = ($serverstatus != CL_SERVER_OK) ? $serverstatus : $filestatus;
return ($output);
}
$output = "";
if ( (isset($vars['opts']) && (!preg_match("/^[tiw]{1,3}$/i", $vars['opts'])))
|| (isset($vars['scope']) && (!preg_match("/^(page|user|all)$/i", $vars['scope'])))
|| (isset($vars['sort']) && (!preg_match("/^(tag|type|cnt)$/i", $vars['sort']))) )
$output .= CLerror("Usage: checklinks [scope=\"user|page|all\"] [sort=\"tag|type|cnt\"] [opts=\"[i][t][w]\"]");
else {
foreach ($rows as $row) {
$query = "SELECT * FROM ".$this->config['table_prefix']."pages WHERE ((latest = 'Y')";
$query .= "";
$query .= " AND (tag = '".$this->GetPageTag()."')";
else // default value
$query .= " AND (owner = '".$this->GetUserName()."')";
$query .= ")";
foreach ($rows as $row) {
if ($this->HasAccess("read", $row['tag'])) {
// this page is to be scanned: pretend your are this page
$this->SetPage($this->LoadPage($row['tag']));
$tmppage = $this->page['body'];
// get rid of raw HTML and code blocks
$tmppage = preg_replace("/\"\"(.*?)\"\"/s", "", $tmppage);
$tmppage = preg_replace("/\%\%(.*?)\%\%/s", "", $tmppage);
// 1. is page tag formatted as a valid WikiName ?
if (!preg_match("/^([A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)$/", $row['tag'], $matches)
&& $show_badwnames) {
$badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME] = (empty($badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME])) ? 1 : $badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME]+1;
$badcnts['wn'] += 1;
// 2. check actions
preg_match_all("/\{\{(.*?)\}\}/", $tmppage, $matches);
foreach ($matches[1] as $actionname) {
if (preg_match("/^([A-Za-z0-9]+)/", trim($actionname), $matches1)) {
if (!file_exists($this->config['action_path']."/".$matches1[1].".php")) {
$badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE]+1;
$badcnts['ac'] += 1;
} else {
$goodcnts['ac'] +=1;
// now get rid of actions to avoid confusion
$tmppage = preg_replace("/\{\{(.*?)\}\}/", "", $tmppage);
// 3. check interwiki links
preg_match_all("/([A-ZÄÖÜ][A-Za-zÄÖÜßäöü]+)[:](\S*)\b/", $tmppage, $matches);
foreach ($matches[1] as $interwikiname) {
if (!$this->GetInterWikiUrl(trim($interwikiname), "")) {
$badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI] = (empty($badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI])) ? 1 : $badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI]+1;
$badcnts['iw'] += 1;
} else {
$goodcnts['iw'] +=1;
// now get rid of interwiki links to avoid confusion
$tmppage = preg_replace("/([A-ZÄÖÜ][A-Za-zÄÖÜßäöü]+[:]\S*)\b/", "", $tmppage);
// now check hyperlinks
// prevent recursive calling
$page = preg_replace('/\{\{\s*checklinks\b.*?\}\}/i', "", $this->page['body']);
// do not count twice non-existent links
$page = preg_replace('/\{\{\s*wantedpages\s*\}\}/i', "", $page);
// render the page
$html = $this->Format($page, 'wakka');
if (preg_match_all("/href\=[\"|\']((http|https|ftp):\/\/[^\\s\"\'<>]+)/", $html, $matches)) {
foreach ($matches[1] as $url) {
// 4. check intra-wiki links
if (preg_match("/".$base_url."([A-Za-zÄÖÜßäöü][A-Za-z0-9ÄÖÜßäöü]*)/", $url, $matches1)) {
$wikiname = $matches1[1];
if (!$exist[trim($wikiname)]) {
$badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE] = (empty($badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE])) ? 1 : $badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE]+1;
$badcnts['wn'] += 1;
} else {
$goodcnts['wn'] += 1;
else if ($show_external) {
// 5. check external hyperlinks
$OK = (strtoupper($tmp = CLcheck_link($url)) == "OK");
// Allow at most 3 successive redirections
for ($i=1; !$OK && preg_match("/^MOV (.+)$/", $tmp, $matches1) && ($i<=CL_MAX_REDIRECTS); $i++) {
$OK = (strtoupper($tmp = CLcheck_link(trim($matches1[1]))) == "OK");
if (!$OK) {
$badlinks[$row['tag']." ex/{$url}*$tmp"] = (empty($badlinks[$row['tag']." ex/{$url}*$tmp"])) ? 1 : $badlinks[$row['tag']." ex/{$url}*$tmp"]+1;
$badcnts['ex'] += 1;
} else {
$goodcnts['ex'] +=1;
} // foreach $matches
} // if preg_match_all http
// 5. check href-ed files
if (preg_match_all("/href\=[\"|\']file:\/{2,}([^\\\"\'<>]+)[\'\"]/", $html, $matches)) {
foreach ($matches[1] as $rawfname) {
if (!file_exists(rawurldecode($rawfname))) {
$fname = str_replace(' ', ' ', $rawfname);
$badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE]+1;
$badcnts['fi'] += 1;
} else {
$goodcnts['fi'] +=1;
} // foreach $matches
} // if preg_match_all file
} // if $this->HasAcess
if (isset($vars['sort'])) {
if ($vars['sort'] == 'type') { $field = 2; $fn = 'asort'; }
else if ($vars['sort'] == 'cnt') { $field = 'cnt'; $fn = 'arsort'; }
if (!$field) { $field = 1; $fn = 'asort'; }
foreach ($badlinks as $index => $cnt) {
$output .= "<h3>".CL_SUMMARY."</h3>".CLsummary($goodcnts, $badcnts)."<br />";
$output .= "<h3>".CL_DETAILED."</h3>".CLreport($this, $badlinks, $keyorder);
}
echo $output;
// avoid side-effect if there were footnotes on checked pages
if (function_exists('FNprint'))
{
FNprint($this, 'purge', "", $this->Href());
}


Revision [19107]

Edited on 2008-01-28 00:14:03 by DomBonj [Modified links pointing to docs server]

No Differences

Revision [16608]

Edited on 2007-05-18 10:46:23 by DomBonj [v0.93: bugfix, 1.1.6.3 compatibility]
Additions:
// avoid side-effect if there were footnotes on checked pages
Deletions:
// avoid side-effect if there are footnotes


Revision [16607]

Edited on 2007-05-18 10:24:47 by DomBonj [v0.91: bugfix, addition, 1.1.6.3 compatibility]
Additions:
- Wikka 1.1.6.2 & 1.1.6.3
# @version 0.93
// avoid side-effect if there are footnotes
if (function_exists('FNprint'))
{
FNprint($this, 'purge', "", $this->Href());
Deletions:
- Wikka 1.1.6.2
# @version 0.92


Revision [15931]

Edited on 2007-01-18 18:00:19 by DomBonj [v0.91: bugfix, addition, 1.1.6.3 compatibility]
Deletions:
# v0.92: improved i18n ; determination base_url compatible mod_rewrite ; bugfix regexp
# v0.91: bugfix: tableau keyorder non initialisé
# v0.9: suppression WantedPages; hyperlien vers wikipage dans CLreport
# v0.8: traitement des relocations


Revision [15930]

Edited on 2007-01-18 17:58:02 by DomBonj [v0.92: mod_rewrite-compatibility bugfix]
Additions:
# @version 0.92
# v0.92: improved i18n ; determination base_url compatible mod_rewrite ; bugfix regexp
# v0.91: bugfix: tableau keyorder non initialisé
# v0.9: suppression WantedPages; hyperlien vers wikipage dans CLreport
# v0.8: traitement des relocations
if (!defined('CL_SUMMARY')) define('CL_SUMMARY', 'Link check summary');
if (!defined('CL_DETAILED')) define('CL_DETAILED', 'Detailed report');
preg_match("/^(.+) (\w\w)\/(.+?)\*(.+)$/", $tag, $matches);
$base_url = preg_quote($this->config['base_url'], '/');
$badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME] = (empty($badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME])) ? 1 : $badlinks[$row['tag']." wn/ *".CL_NON_WIKINAME]+1;
$badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." ac/{$matches1[1]}*". CL_NOSUCH_FILE]+1;
$badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI] = (empty($badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI])) ? 1 : $badlinks[$row['tag']." iw/{$interwikiname}*". CL_MISSING_INTERIWIKI]+1;
if (preg_match("/".$base_url."([A-Za-zÄÖÜßäöü][A-Za-z0-9ÄÖÜßäöü]*)/", $url, $matches1)) {
$badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE] = (empty($badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE])) ? 1 : $badlinks[$row['tag']." wn/{$wikiname}*". CL_MISSING_PAGE]+1;
$badlinks[$row['tag']." ex/{$url}*$tmp"] = (empty($badlinks[$row['tag']." ex/{$url}*$tmp"])) ? 1 : $badlinks[$row['tag']." ex/{$url}*$tmp"]+1;
$badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." fi/$fname*".CL_NOSUCH_FILE]+1;
preg_match("/^(.+) (\w\w)\/(.+)\*(.+)$/", $index, $matches);
$output .= "<h3>".CL_SUMMARY."</h3>".CLsummary($goodcnts, $badcnts)."<br />";
$output .= "<h3>".CL_DETAILED."</h3>".CLreport($this, $badlinks, $keyorder);
Deletions:
# @version 0.91
preg_match("/^(.+) (\w\w)\/(.+?) (.+)$/", $tag, $matches);
// echo "$page $hostname <br>";
$wikka_url_prefix = preg_quote('http://'.$_SERVER['SERVER_NAME'].$_SERVER['PHP_SELF'], '/');
$badlinks[$row['tag']." wn/  ".CL_NON_WIKINAME] = (empty($badlinks[$row['tag']." wn/  ".CL_NON_WIKINAME])) ? 1 : $badlinks[$row['tag']." wn/  ".CL_NON_WIKINAME]+1;
$badlinks[$row['tag']." ac/{$matches1[1]} ". CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." ac/{$matches1[1]} ". CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." ac/{$matches1[1]} ". CL_NOSUCH_FILE]+1;
$badlinks[$row['tag']." iw/{$interwikiname} ". CL_MISSING_INTERIWIKI] = (empty($badlinks[$row['tag']." iw/{$interwikiname} ". CL_MISSING_INTERIWIKI])) ? 1 : $badlinks[$row['tag']." iw/{$interwikiname} ". CL_MISSING_INTERIWIKI]+1;
if (preg_match("/".$wikka_url_prefix."\?wakka=([A-Za-zÄÖÜßäöü][A-Za-z0-9ÄÖÜßäöü]*)/", $url, $matches1)) {
$badlinks[$row['tag']." wn/{$wikiname} ". CL_MISSING_PAGE] = (empty($badlinks[$row['tag']." wn/{$wikiname} ". CL_MISSING_PAGE])) ? 1 : $badlinks[$row['tag']." wn/{$wikiname} ". CL_MISSING_PAGE]+1;
$badlinks[$row['tag']." ex/{$url} $tmp"] = (empty($badlinks[$row['tag']." ex/{$url} $tmp"])) ? 1 : $badlinks[$row['tag']." ex/{$url} $tmp"]+1;
$badlinks[$row['tag']." fi/$fname ".CL_NOSUCH_FILE] = (empty($badlinks[$row['tag']." fi/$fname ".CL_NOSUCH_FILE])) ? 1 : $badlinks[$row['tag']." fi/$fname ".CL_NOSUCH_FILE]+1;
preg_match("/^(.+) (\w\w)\/(.+?) (.+)$/", $index, $matches);
$output .= "<h3>Link check summary</h3>".CLsummary($goodcnts, $badcnts)."<br />";
$output .= "<h3>Detailed report</h3>".CLreport($this, $badlinks, $keyorder);


Revision [15885]

Edited on 2007-01-14 16:18:33 by DomBonj [v0.91: bugfix]
Additions:
# @version 0.91
// 1. is page tag formatted as a valid WikiName ?
$keyorder = array();
Deletions:
# @version 0.9
// 1. is page tag formatted as a WikiName ?


Revision [15879]

The oldest known version of this page was created on 2007-01-13 17:43:45 by DomBonj [v0.91: bugfix]
Valid XHTML :: Valid CSS: :: Powered by WikkaWiki