Fileexport Action

See also:
works with:
  • Wikka 1.1.6.2 & 1.1.6.3 & 1.1.6.4
NOT included in any Wikka version
Last edited by DomBonj:
v1.05: fixed XML encoding; PHP5 compatibility; 1.1.6.4 compatibility
Sat, 14 Jun 2008 17:19 UTC [diff]

This is the development page for the Fileexport action.

Installation


Code


actions/fileexport.php


<?php                                                                          
#
# Displays a link to download a selection of the wiki content in various formats
#
# @package      Actions
# @name         fileexport
#
# @authors      DomBonj
#
# @version      0.97
# @input        Parameters =  [type=('html'|'rawhtml'|'xml')] [sort=('tag'|'type'|'time')] [scope=('PageTag'|'m'|'o'|'p')] [opts=['e']['r']] [cats='Category1','Category2'..]
#               default values: type='html', sort='tag'
#
# @uses         Wakka::Href()
#

// i18n strings
if (!defined('FE_DOWNLOAD_LINK_TITLE')) define('FE_DOWNLOAD_LINK_TITLE', 'Download');
if (!defined('FE_DOWNLOAD_LINK_TEXT')) define('FE_DOWNLOAD_LINK_TEXT', 'Download %s file');
if (!defined('FE_ERROR_REQUEST_FORMAT')) define ('FE_ERROR_REQUEST_FORMAT', 'Incorrect parameter; usage: %s');
if (!defined('FE_ERROR_USAGE')) define ('FE_ERROR_USAGE', "fileexport [type=\"html|rawhtml|xml\"] [scope=\"PageTag|m|o|p\"] [opts=\"[e][r]\"] [sort=\"tag|time|title\"] [cats=\"<i>category list</i>\"]");
if (!defined('FE_ERROR_MISSING_PAGE')) define ('FE_ERROR_MISSING_PAGE', 'Can not export: page \'%s\' does not exist');

if (!function_exists('FEerror'))
{
    function FEerror ($msg)
    {
        return ('<em class="error">'. $msg .'</em><br />');
    }
}

$output = '';
if (isset($vars['cats']))
{
    $categories = preg_split("/\s*,\s*/i", $vars['cats']);
}
if ( (isset($vars['opts']) && (!preg_match("/^[er]{1,4}$/i", $vars['opts'])))
    || (isset($vars['scope']) && (!preg_match("/^([oO]|[pP]|[mM]|[A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)$/", $vars['scope'])))
    || (isset($vars['type']) && (!preg_match("/^(html|rawhtml|xml)$/i", $vars['type'])))
    || (isset($vars['sort']) && (!preg_match("/^(time|tag|title)$/i", $vars['sort']))))
{
    $output .= FEerror(sprintf(FE_ERROR_REQUEST_FORMAT, FE_ERROR_USAGE));
}
else if ( isset($vars['scope']) && preg_match("/^[A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*$/", $vars['scope']) && !$this->ExistsPage($vars['scope']) )
{
    $output .= FEerror(sprintf(FE_ERROR_MISSING_PAGE, $vars['scope']));
}
else
{
    $type = (isset($vars['type']) ? strtolower($vars['type']) : 'html');
    $query = '&type='. $type;
    $query .= '&scope='. (isset($vars['scope']) ? $vars['scope'] : 'm');
    $query .= "&emb=". (preg_match("/[e]/i", $vars['opts']) ? 'y' : 'n');
    $query .= "&rlnk=". (preg_match("/[r]/i", $vars['opts']) ? 'y' : 'n');
    $query .= "&sort=". ((isset($vars['sort'])) ? $vars['sort'] : 'tag');
    $query .= '&cats=';
   
    $first = true;
    if ($categories)
    {
        foreach ($categories as $lacat)
        {
            // sanitize categories
            if (preg_match("/^(Category[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)\s*$/", $lacat, $matches))
            {
                if ($first)
                {
                    $query .= $matches[1];
                    $first = false;
                }
                else
                {
                    $query .= '_'. $lacat;
                }  
            }
        }
    }
    $output .= '<a href="'. $this->Href('fileexport.xml'.$query, $this->tag, '') .'" title="'. FE_DOWNLOAD_LINK_TITLE. '">'. sprintf(FE_DOWNLOAD_LINK_TEXT, strtoupper($type)). '</a>';
  }
  echo $output;
?>


handlers/page/fileexport.xml.php


<?php                                                                          
#
# Exports part of the wiki content in various formats
#
# @package      Handlers
# @name         fileexport.xml
#
# @authors      DomBonj
#
# @version      1.05
#
# @uses         Wakka::Format()
# @uses         Wakka::GetConfigValue()
# @uses         Wakka::GetUserName()
# @uses         Wakka::HasAccess()
# @uses         Wakka::Href()
# @uses         Wakka::LoadAll()
# @uses         Wakka::LoadPage()
# @uses         Wakka::PageTitle()
# @uses         Wakka::SetPage()
#

// i18n strings
if (!defined('FE_TABLE_OF_CONTENTS')) define ('FE_TABLE_OF_CONTENTS', 'Table of contents');
if (!defined('FE_ERROR_REQUEST_STRING')) define ('FE_ERROR_REQUEST_STRING', 'Illegal request string, unable to export');
if (!defined('FE_ERROR_ARCHIVE_BUILD')) define ('FE_ERROR_ARCHIVE_BUILD', 'Error building the archive file, unable to export');

if (!function_exists('FEerror'))
{
    function FEerror($msg)
    {
        return ('<em class="error">'. $msg .'</em><br />');
    }
 
    function FEpage_data(&$thisone, $tag, &$page_content)
    {
        // pretend your are this page
        $thisone->SetPage($thisone->LoadPage($tag));
        unset ($thisone->config['includes']);
        $page_content['raw'] = $thisone->page['body'];
        // prevent recursive calling
        $page = preg_replace('/\{\{\s*fileexport\b(.*?)\}\}/', "[fileexport action, $1]", $thisone->page['body']);
        $rendered_body = $thisone->Format($page, 'wakka');
        // add page footer from FootnoteAction
        if (function_exists('FNprint'))
        {
            $rendered_body .= FNprint($thisone, 'list', '', $thisone->Href());
        }
        $page_content['body'] = $rendered_body;
    }
 
    function FEhtml(&$thisone, &$pages, &$key_order, $embed_css, $css_path, $remove_links, $save_tag, $save_page, $raw=false)
    {
        $root_url = $_SERVER['SERVER_NAME'] . $_SERVER['PHP_SELF'];
        $header = "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>\n".
            "<html><head><meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1' />\n";
        if ($fcss = fopen($css_path, 'r'))
        {
            if (!($stylesheet = file_get_contents($css_path)))
            {
                $stylesheet = '';
            }
            else
            {
                $stylesheet = "<style type='text/css'>$stylesheet\n</style>";
            }
        }
        else
        {
            $stylesheet = '';
        }
        $header .= ($embed_css ? $stylesheet : '') .'<title>'. $thisone->GetConfigValue('wakka_name') ."</title></head><body>\n";
        $toc = '';
        $file_name = $thisone->GetConfigValue('wakka_name') .'_'. date('Ymd');
        foreach ($pages as $tag => $mypage)
        {  // prepare mapping table for translation of intra-wiki links
            $pat[] = '/'. preg_quote('http://'.$root_url, '/') .'\?wakka='. $tag .'([^\w^Ä^Ö^Ü^ß^ä^ö^ü])/';
            $rep[] = '#'. $mypage['i'] .'$1';
        }
        $save_includes = $thisone->config['includes'];
        foreach ($key_order as $tag => $val)
        {
            $pagedata = array();
            FEpage_data($thisone, $tag, $pagedata);
            $toc .= '<li><a href="#'. $pages[$tag]['i'] .'">'. $pages[$tag]['title'] ."</a></li>\n";
            if (!$raw)
            {  // preserve intra-wiki links
                $pagedata['body'] = '<a name="'. $pages[$tag]['i'] .'"></a>'. preg_replace($pat, $rep, $pagedata['body']);
            }
            else
            {  // remove all intra-wiki links for raw HTML
                $pat = '/<\s*a[^>]+href=([\'\"])'. preg_quote('http://'.$root_url, '/') .'\?wakka=.+?[^>].*?\>(.*?)<\/a>/';
                $pagedata['body'] = preg_replace($pat, "$2", $pagedata['body']);           
            }
            // relabel footnotes from FootnoteAction
            $pagedata['body'] = preg_replace("/href=\'\#(\d+)\#fn(\d+)/", "href='#fn$1_$2", $pagedata['body']);
            $pagedata['body'] = preg_replace("/id=\'fn(\d+)/", "id='fn". $pages[$tag]['i']. "_$1", $pagedata['body']);
            $pagedata['body'] = preg_replace("/href=\'\#(\d+)\#fnback(\d+)/", "href='#fnback$1_$2", $pagedata['body']);
            $pagedata['body'] = preg_replace("/id=\'fnback(\d+)/", "id='fnback". $pages[$tag]['i']. "_$1", $pagedata['body']);
            // if requested, remove external links including tail
            if ($remove_links)
            {  
                $pagedata['body'] = preg_replace("/<\s*a[^>]+href=([\'\"])[^#][^>]*?\>(.*?)<\/a>/", "$2", $pagedata['body']);
                $pagedata['body'] = preg_replace('/'.preg_quote($thisone->GetConfigValue('external_link_tail'),'/').'/', '', $pagedata['body']);
            }
            else if ($raw)
            {  // remove links tail
                $pagedata['body'] = preg_replace('/'.preg_quote($thisone->GetConfigValue('external_link_tail'),'/').'/', '', $pagedata['body']);
            }
            $output .= $pagedata['body'] ."<hr />\n";
        }
        if (!$raw)
        {
            $output = $header .'<div class="page"><h2>'. FE_TABLE_OF_CONTENTS. '</h2><ul>'. $toc .'</ul><hr />'. $output . '</div></body></html>';
        }
        else
        {  // remove last ruler from raw HTML
            $output = preg_replace('/\<hr \/\>$/', '', $output);
        }
     
        // restore original values
        $this->tag = $save_tag;
        $this->page = $save_page;
        $this->config['includes'] = $save_includes;
     
        header('Content-Disposition: attachment; filename='. $file_name .'.html');
        header('Content-Type: text/html; charset=ISO-8859-1');
        header('Expires: 0');
        header('Pragma: no-cache');
        print $output;
        exit();
    }

    function FExml(&$thisone, &$pages, &$key_order, $remove_links, $save_tag, $save_page)
    {
        $file_name = $thisone->GetConfigValue('wakka_name') .'_'. date('Ymd');
        $output = '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><wikka>';
        $save_includes = $thisone->config['includes'];
        foreach ($key_order as $tag => $val)
        {
            $mytab = $pages[$tag];
            $output .= '<page tag="'. $tag .'" lastchange="'. strftime('%Y-%m-%dT%H:%M:%S', strtotime($mytab['lastchange'])) .'"><owner>'. $mytab['owner'] .'</owner>';
            foreach (explode (' ', $mytab['cats']) as $mycat)
            {
                if ($mycat)
                {
                    $output .= '<category>'. $mycat. '</category>';
                }
            }
            $output .= '<title>'. utf8_encode(htmlspecialchars($mytab['title'], ENT_COMPAT, 'UTF-8')) ."</title>\n";
            $pagedata = array();
            FEpage_data($thisone, $tag, $pagedata);      
            $output .= '<raw_content><![CDATA['. utf8_encode($pagedata['raw']) .".]]></raw_content>\n";
            if ($remove_links)
            {  // if requested, remove external links including tail   
                $pagedata['body'] = preg_replace("/<\s*a[^>]+href=([\'\"])[^#][^>]*?\>(.*?)<\/a>/", "$2", $pagedata['body']);
                $pagedata['body'] = preg_replace('/'.preg_quote($thisone->GetConfigValue('external_link_tail'),'/').'/', '', $pagedata['body']);
            }
            $output .= '<rendered_content>'. utf8_encode(numeric_html_entities($pagedata['body'])) .'</rendered_content>'; 
            if (preg_match_all('/\{\{structdata(.+?)\}\}/', $pagedata['raw'], $matches))
            {  // extract structured data items and add them to XML
                foreach ($matches[1] as $mym)
                {
                    $structdatatype = '';
                    $params = array();
                    if (preg_match('/type=\"(.*?)\"[ \t]*.*?data=\"(.*?)\"/', $mym, $matches2))
                    {
                        $structdatatype = $matches2[1];
                        preg_match_all('/(\w+?)\=\'(.*?)\'/', $matches2[2], $matches3);
                    }
                    else if (preg_match('/data=\"(.*?)\"[ \t]*.*?type=\"(.*?)\"/', $mym, $matches2))
                    {
                        $structdatatype = $matches2[2];
                        preg_match_all('/(\w+?)\=\'(.*?)\'/', $matches2[1], $matches3);
                    }
                    for ($i=0; $i < count($matches3[1]); $i++)
                    {
                        $params[strtolower($matches3[1][$i])] = $matches3[2][$i];
                    }
                    if ($structdatatype)
                    {
                        $output .= '<structdataitem type="'. $structdatatype. '">';
                        foreach ($params as $myparam => $myval)
                        {
                            $output .= '<'. $myparam .'>'. utf8_encode(htmlspecialchars($myval, ENT_COMPAT)) .'</'. $myparam .'>';
                        }
                        $output .= "</structdataitem>\n";
                    }
                }
            }
            $output .= "</page>\n";
        }
        $output .= '</wikka>';

        // restore original values
        $thisone->tag = $save_tag;
        $thisone->page = $save_page;
        $thisone->config['includes'] = $save_includes;
     
        header('Content-Disposition: attachment; filename='. $file_name .'.xml');
        header('Content-Type: text/xml; charset=UTF-8');
        header('Expires: 0');
        header('Pragma: no-cache');
        print $output;
        exit();
    }
   
    function numeric_html_entities($string)
    {
        $trans_tbl1 = get_html_translation_table(HTML_ENTITIES);
        foreach ($trans_tbl1 as $ascii => $html_entity)
        {
            $trans_tbl2[$html_entity] = '&#'. ord($ascii) .';';
            $trans_tbl3[$ascii] = '&#'. ord($ascii) .';';
        }
        // keep XML entities
        unset($trans_tbl3['<']);
        unset($trans_tbl3['>']);
        unset($trans_tbl3['"']);
        unset($trans_tbl3['\'']);
        unset($trans_tbl3['&']);
        $ret = strtr (strtr ($string, $trans_tbl2), $trans_tbl3);
        // translate '&' character if not part of a numeric entity
        $ret = preg_replace('/&(?!#[x]?[0-9a-f]+;)/i', '&', $ret);
        return($ret);
    }
}

$pages = array();
$error_msg = '';
if (isset($_GET['cats']))
{
    $categories = preg_split("/_/i", $_GET['cats']);
}
if ((isset($_GET['emb']) && (!preg_match("/^[YN]$/i", $_GET['emb'])))
    || (isset($_GET['rlnk']) && (!preg_match("/^[YN]$/i", $_GET['rlnk'])))
    || (isset($_GET['scope']) && (!preg_match("/^([oO]|[pP]|[mM]|[A-ZÄÖÜ]+[a-zßäöü]+[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)$/", $_GET['scope'])))
    || (isset($_GET['type']) && (!preg_match("/^(html|rawhtml|xml)$/i", $_GET['type'])))
    || (isset($_GET['sort']) && (!preg_match("/^(time|tag|title)$/i", $_GET['sort']))))
{  
    $error_msg .= FEerror(FE_ERROR_REQUEST_STRING);
}
else
{
    $save_page = $this->page;
    $save_tag = $this->tag;
    $embed_css = ('y' == strtolower($_GET['emb']));
    $remove_links = ('y' == strtolower($_GET['rlnk']));
    $css_path = 'css' .DIRECTORY_SEPARATOR. $this->GetConfigValue('stylesheet');
    $pagecnt = 0;

    $query = 'SELECT * FROM '. $this->config['table_prefix']. 'pages WHERE ((latest = \'Y\')';
    if ('m' == strtolower($_GET['scope']))
    {
        $query .= ' AND (owner = \''. $this->GetUserName() .'\')';
    }
    else if ('o' == strtolower($_GET['scope']))
    {
        $query .= ' AND (owner <> \'(Public)\')';
    }
    else if ('p' == strtolower($_GET['scope']))
    { // placeholder
        $query .= '';
    }
    else
    { // a single page is exported
        $query .= ' AND (tag = \''. $_GET['scope'] .'\')';
    }
    if ($categories[0])
    {
        $query .= ' AND (body REGEXP \'Category[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*[[:>:]]\')';
    }
    $query .= ')';
    $rows = $this->LoadAll($query);
    foreach ($rows as $row)
    {
        $itscats = array();
        if ($categories[0])
        {
            $match = false;
            foreach ($categories as $mycat)
            {
                if (preg_match("/\b$mycat\b/i", $row['body']))
                {
                    $match = true;
                    $itscats[] = $mycat;
                }
            }
        }
        else
        {
            $match = true;
            // sanitize categories
            preg_match_all("/\b(Category[A-Z0-9ÄÖÜ][A-Za-z0-9ÄÖÜßäöü]*)\b/", $row['body'], $matches);
            foreach ($matches[1] as $mycat)
            {
                $itscats[] = $mycat;
            }
        }
        if ($match && $this->HasAccess('read', $row['tag']))
        {  // this page is to be exported: pretend your are this page
            $this->SetPage($this->LoadPage($row['tag']));
            // fill the 'pages' associative array
            $pages[$row['tag']]['i'] = $pagecnt;
            $pages[$row['tag']]['cats'] = implode (' ', array_unique($itscats));
            $pages[$row['tag']]['lastchange'] = $this->page['time'];
            $pages[$row['tag']]['ts'] = strtotime($this->page['time']);
            $pages[$row['tag']]['owner'] = $this->page['owner'];
            $pages[$row['tag']]['title'] = trim($this->PageTitle());
            $pagecnt++;
        }
    }

    // now, sort the associative array's keys
    if (isset($_GET['sort']))
    {
        if ($_GET['sort'] == 'time')
        {
            $field = 'ts'; $fn = 'arsort';
        }
        else if ($_GET['sort'] == 'title')
        {
            $field = 'title'; $fn= 'asort';
        }
    }
    // default case: sort on tag
    if (!$field)
    {
        $field = 'tag'; $fn = 'asort';
    }
    $key_order = array();
    foreach ($pages as $tag => $mytab)
    {
        $key_order[$tag] = ($field == 'tag') ? strtolower($tag) : strtolower($mytab[$field]);
    }
    // do the actual sorting
    $fn($key_order);
   
    if (!isset($_GET['type']) || ($_GET['type'] == 'html') || ($_GET['type'] == 'rawhtml'))
    {
        $raw_html = ($_GET['type'] == 'rawhtml');
        FEhtml($this, $pages, $key_order, $embed_css, $css_path, $remove_links, $save_tag, $save_page, $raw_html);
    }
    else if ($_GET['type'] == 'xml')
    {
        FExml($this, $pages, $key_order, $remove_links, $save_tag, $save_page);
    }
}

// display error message, if any
if ($error_msg)
{
    header('Content-type: text/xml');
    $xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
    $xml .= '<?xml-stylesheet href="' . $this->GetConfigValue('base_url') .'/css/xml.css" type="text/css"?' .">\n";
    $xml .= '<item><title>Error message</title>';
    $xml .= '<description>'. $error_msg. '</description></item>';
    print $xml;
}
?>



CategoryUserContributions
There are no comments on this page.
Valid XHTML :: Valid CSS: :: Powered by WikkaWiki