Revision [2865]

This is an old revision of GmBowenSpellchecker made by GmBowen on 2004-12-05 23:43:28.

 

Wikka Spellchecker

This is code derived from a class released at Php Classes as a generic PHP Spell Checker and is released under GPL for non-commercial uses. (Version 1.1 with improved punctuation & wikicode removal)

I don't doubt it is less "efficient" than those using packages that are "built into" the server, but it has the advantage that to set it up you don't need server root access, or the permission of your server provider. I'll also apologize that it is so "english" based, but the beauty of using this text-based file system is that non-English words added or other dictionaries used instead.

If anyone wants to improve this spellchecker, I think it needs a "regular expression" thing (or something) added somewhere that....
Other slight oddities also crop up (like two words being run together despite the space between them).

I modified the handlers/page/edit.php file by adding this code close to the bottom of the file...
    print($output);
// Spellcheck added by GMB
    if ($_POST["submit"] == "Spellcheck")
    {
include("spellchecker/dictionary_class.php"); // this is the main class and must be included to the main body.
$dr=new dataReader();   // creating a new instance of the class dataReader.
               // this class gets the input text and uses the class LookupTable to find misspelt words.
        $wrd="";
        $j=0;
     $i=0;


// instantiate new instance (from jsearchstring from phpclasses.org) to remove punctuation & wikiwords
$jSS=new jSearchString();

//output formatted string
$str = $jSS->parseString($body);

$str= strtolower($str);

        $dr->reader($str);
    }
// end of Spellcheck code}
else
{


The handlers/page/edit.php file was further altered by adding code for a "Spellcheck" button.....
            //finish
// ORIGINAL LINE          "<input name=\"submit\" type=\"submit\" value=\"Store\" accesskey=\"s\" /> <input name=\"submit\" type=\"submit\" value=\"Preview\" accesskey=\"p\" /> <input type=\"button\" value=\"Cancel\" onClick=\"document.location='".$this->href("")."';\" />\n".
// Line below this has added spellcheck button (GMB)
            "<input name=\"submit\" type=\"submit\" value=\"Store\" accesskey=\"s\" /> <input name=\"submit\" type=\"submit\" value=\"Preview\" accesskey=\"p\" /> <input name=\"submit\" type=\"submit\" value=\"Spellcheck\" /> <input type=\"button\" value=\"Cancel\" onClick=\"document.location='".$this->href("")."';\" />\n".
            $this->FormClose();


The dictionary_class.php file (provided at phpclasses) was modified to the following (and placed in the directory "spellchecker" in the wiki root).
<?php
//////////////////////////////// Dictionary class originally written by Reza Salehi.     ////////////////////////////////////////////
//////////////////////////////// General Public License.                    ////////////////////////////////////////////
//////////////////////////////// zaalion@yahoo.com, http://zaalion.com          ////////////////////////////////////////////
//////////////////////////////// tel : +98 912 2345463                         ////////////////////////////////////////////
//////////////////////////////// Dec. 2004.                            ////////////////////////////////////////////
//////////////////////////////// JSearchString by Johan De Klerk from PHPCLASSES.org     ////////////////////////////////////////////
//////////////////////////////// Modified by GMBowen for Wikka Wiki Dec. 2004 under GPL  ////////////////////////////////////////////

class hashing
{
        function hashFunction($str, $len)
        {
                $hash=0;
                $l = strlen($str);
               
                //step 1
                for($i=0; $i<$l; $i++)                
                        $hash+=ord($str[$i])*($i+1)*($i+1);
               
                $hash*=ord($str[0]);
                //step 2
                $hash%=$len;
                 
                return($hash);
        }
};

class lookupTable
{
        var $MAX=6967;//6967
        var $MaxWordLength=45;
        var $dictionaryFile="spellchecker/dictionary.txt";
        var $lookupTable;
        var $MAXLen=16;
       
        function initializeArray()
        {
            for($i=0; $i<$this->MAX; $i++)                 
                for($j=0; $j<$this->MAXLen; $j++)                      
                    $this->lookupTable[$i][$j]=NULL;                                   
        }
                       
        function readToTable()
        {
                $this->initializeArray();
               
                $buffer=""; $n=0;
                $hash=new hashing();
                $index=0;
                $buffer="";
                $count=0; $i=0;
               
                $fp = fopen ("spellchecker/dictionary.txt", "r");                      
                do
                {
                        $buffer=fgets($fp, $this->MaxWordLength);
                        $buffer=trim($buffer);
                       
                        $index=$hash->hashFunction($buffer, $this->MAX);
                        //---->read process
                        while($this->lookupTable[$index][$i]!=NULL)
                            $i++;
                        $this->lookupTable[$index][$i]=$buffer;                    
                        $i=0;
                        //<--- read process
                }
                while($buffer);
               
                fclose($fp);
        }

        function isIn($str)
        {
                $h=new hashing();
                $i=0;
                $str=trim($str);
                $temp="";
               
                $index=$h->hashFunction($str, $this->MAX);
                do
                {
                    $temp=$this->lookupTable[$index][$i++];                                    
                    if($temp==$str)
                        return(true);
                }
                while($temp!=NULL);            
                                                   
                return(false);
        }

        function suggestion($str)
        {
                $count=0;
                $index=-1;
                $current=97;
                $strC=$str;

                for($i=0; $i<strlen($strC); $i++)
                {
                        for($j=0; $j<26; $j++)
                        {
                                $strC[$i]= chr($current++);
                                if($this->isIn($strC))
                                {
                                        if($count>0) print(" , ");
                                        print($strC);
                                        $count++;
                                }
                        }
                       
                        $strC=$str;
                        $current=97;
                }
                return($count);
        }
} ;

// modified by GMB from JsearchString
// obtained from phpclasses.org, written by Johan De Klerk released as "Freely Distributable"

class jSearchString {

// other words can be added to this string...probably all actions would be a good idea, and maybe code to list all wikipagenames?  

    var $wikiwords = array("{{google}}", "{{chat}}", "{{showcode}}", "{{hiddenpages}}", "CategoryWiki");
   
    var $symbols = array('/','\\','\'','"',',','.','<','>','?',';',':','[',']','{','}','|','=','+','-','_',')','(','*','&','^','%','$','#','@','!','~','`');
   
    function parseString($string) {
        $string = ' '.$string.' ';
        $string = $this->removeWikiwords($string);
        $string = $this->removeSymbols($string);
        return $string;
    }
   
    function removeWikiwords($string) {
        for ($i = 0; $i < sizeof($this->wikiwords); $i++) {
            $string = str_replace($this->wikiwords[$i],' ',$string);
        }
       
        //$string = str_replace('  ',' ',$string);
        return trim($string);
    }
   
    function removeSymbols($string) {
        for ($i = 0; $i < sizeof($this->symbols); $i++) {
            $string = str_replace($this->symbols[$i],'',$string);
        }
           
        return trim($string);
    }
};

class dataReader
{
        function reader($strToCheck)
        {
                $lt=new lookupTable();
                print("</p>Spellcheck....");
                $lt->readToTable();
                print("Completed!<br><br>");

                $buffer="";
                $words=explode(' ', $strToCheck);
                $h=new hashing();
                print("<b>These words are mis-spelled, suggestions for correct spellings are provided :<br></b>");
                for($i=0; $i<count($words); $i++)
                {
                        $buffer=$words[$i];                    
                       
                        if(!($lt->isIn($buffer)))
                        {
                               
                                print("<b>&nbsp;&nbsp;&nbsp;".$buffer." : </b>");
                                $sug=1; //added by GMB
                                if($sug)
                                        $lt->suggestion($buffer);                              
                                print("<br>");
                        }
                }
        }
};
?>


The ""dictionary.txt" file may either be downloaded from phpclasses, or is available from my server here (at least for now) and also must be placed in the "spellchecker" directory in the wikka root.
Valid XHTML :: Valid CSS: :: Powered by WikkaWiki