My RebelWiki has been getting a lot of comment spam. I've installed BadBehavior about a year ago and that helped for a while. When that started failing, I've installed SpamBlacklist, and that again helped for a some time, but it was struggle to keep the list updated especially as I travel. In October, even bigger wave of spam came and some pages gotten so big and take so much time to output, that my site started hitting CPU quotas. Also, in addition to comment spam, the pages themselves were now getting spam. So I had to come up with the new solution. I've cleaned up pages with DeleteSpamAction, then installed FreeCap, and finally added my own IP filter.

Here's a brief description of IP filter:
  1. Added action MarkSpam that shows admin the number of comments made from each IP and allows blacklisting of IPs
  1. Modified SpamBlacklist to log spam to DB instead of a file
  1. Modified SpamBlacklist to block IPs that have been blacklisted or that previously hit blacklisted word before

A few days now with no spam. I received a single comment that said something like: "I've accessed your site 730 times" and promptly blacklisted it.

DB Changes

Added following fileds to wikka_comments
ip, varchar(16)
spam, tinyint(1)
spamrule, varchar(64)

MarkSpam

  1. <?php
  2.  
  3. // actions/commentsspamlog.php
  4. // written by AlexBernstein
  5.   if ($this->IsAdmin())
  6.     {
  7.       if (isset($_POST['show']) && isset($_POST['spammers'])) {
  8.     $wikitext = "<b>Mark following comments as SPAM?</b><br><br>\n";
  9.     //Show comments about to be marked as spam
  10.     $wikitext .= $this->FormOpen();
  11.     foreach ($_POST['spammers'] as $spammer) {
  12.       $query = "SELECT * FROM ".$this->config["table_prefix"]."comments WHERE ip = '".mysql_real_escape_string($spammer)."' AND spam = '0' ORDER BY time";
  13.       $comments = $this->LoadAll($query);
  14.      
  15.       if ($comments) {
  16.           $wikitext .= "<input type=\"checkbox\" name=\"spammers[]\" value=\"".$spammer."\" checked><b>".$spammer."</b><br>\n";
  17.  
  18.         foreach ($comments as $comment) {
  19.           $wikitext .= '<div class="comment">'."\n".
  20.         '<span id="comment_'.$comment['id'].'"></span>'.$comment['comment']."\n".
  21.         "\t".'<div class="commentinfo">'."\n-- ";
  22.           $wikitext .= $comment['user'];
  23.           $wikitext .= ' ('.$comment['time'].')'."\n";
  24.           $wikitext .= "\n\t".'</div>'."\n";
  25.           $wikitext .= '</div>'."\n";        
  26.         }
  27.       }
  28.     }
  29.     $wikitext .= "<input type=\"submit\" value=\"Mark As Spam\" name=\"mark\">";
  30.     $wikitext .= $this->FormClose();
  31.       } else if (isset($_POST['mark']) && isset($_POST['spammers'])) {
  32.     // FINALIZE marking as SPAM
  33.     foreach ($_POST['spammers'] as $spammer) {
  34.       $query = "UPDATE ".$this->config["table_prefix"]."comments SET spam ='1' WHERE ip ='".$spammer."'";
  35.       $this->Query($query);
  36.  
  37.     }
  38.     // redirect to page
  39.     $this->redirect($this->Href());
  40.       } else {
  41.     // FIRST page that display potential spammers
  42.       $wikitext = "This page manages blacklist for the " .
  43.     "comment spam IP filter.<br><br>\n";
  44.       $wikitext .= "<h1>Frequent Commenters not yet blacklisted</h1><hr>";
  45.      
  46.       if ($ipentries = $this->LoadAll("SELECT ip, MAX(time) AS lastdate, COUNT(*) AS count FROM ".$this->config["table_prefix"]."comments WHERE spam = '0' GROUP BY ip ORDER BY count DESC;")) {
  47.       $wikitext .= $this->FormOpen();
  48.       foreach ($ipentries as $ipentry) {
  49.           $wikitext .= "<input type=\"checkbox\" name=\"spammers[]\" value=\"".$ipentry['ip']."\"><b>".$ipentry['ip']."</b> blocked ".$ipentry['count']." times, last was ".$ipentry['lastdate']."<br>\n";
  50.       }
  51.       $wikitext .= "<input type=\"submit\" value=\"Show Comments\" name=\"show\">";
  52.       $wikitext .= $this->FormClose();
  53.     } else {
  54.       $wikitext .= "<em>No entries found.</em>";
  55.     }
  56.       }
  57.     }
  58.  
  59. // show result
  60. echo trim($wikitext);
  61. ?>


Modified SpamBlacklist

  1. <?php
  2. // Spam Blacklisting Plugin for Wikka Wiki
  3. // Copyright (C) Manuel Reimer (Manuel _dot_ Reimer _at_ gmx _dot_ de)
  4. // This program is free software; you can redistribute it and/or
  5. // modify it under the terms of the GNU General Public License
  6. // version 2 as published by the Free Software Foundation
  7.  
  8. // More information about SpamBlacklist here: http://wikkawiki.org/SpamBlacklist
  9.  
  10. // Main spam detection routine. If the message has been spam, then this
  11. // one will call "sb_do_output_magic" and will *exit* the script immediately!
  12. function sb_checkit($wikkaref, $body) {
  13.   if (!$wikkaref->config["sbl_page"])
  14.     die("SpamBlacklist: Please configure the plugin first!");
  15.  
  16.   // test if user has been blocked by admin
  17.   if ($wikkaref->LoadSingle("SELECT spam FROM ".$wikkaref->config["table_prefix"]."comments WHERE ip = '".mysql_real_escape_string($_SERVER["REMOTE_ADDR"])."' AND spam = '1' LIMIT 1")) {
  18.     // store new comment
  19.     $wikkaref->SaveComment($wikkaref->tag, $body, 3, "");
  20.     sb_do_output_magic($wikkaref);
  21.     exit();
  22.   }
  23.  
  24.   // test if user hit RegExp blacklist before
  25.   if ($wikkaref->LoadSingle("SELECT spam FROM ".$wikkaref->config["table_prefix"]."comments WHERE ip = '".mysql_real_escape_string($_SERVER["REMOTE_ADDR"])."' AND spam = '2' LIMIT 1")) {
  26.     // store new comment
  27.     $wikkaref->SaveComment($wikkaref->tag, $body, 4, "");
  28.     sb_do_output_magic($wikkaref);
  29.     exit();
  30.   }
  31.  
  32.   $body = sb_unhtmlentities(trim($body));
  33.   $sb_blacklist = $wikkaref->LoadPage($wikkaref->config["sbl_page"]);
  34.   if (!$wikkaref->GetUser() || !$wikkaref->config["sbl_only_anon"]) {
  35.     if ($sb_blacklist && isset($sb_blacklist["body"])) {
  36.       $sb_blacklist = $sb_blacklist["body"];
  37.       $sb_blacklist = explode("\n", $sb_blacklist);
  38.       foreach ($sb_blacklist as $sb_expression) {
  39.     if (preg_match('/(^\s*$|^\s*#)/', $sb_expression))
  40.       continue;
  41.     if (preg_match($sb_expression, $body)) {     
  42.       // store new comment
  43.       $wikkaref->SaveComment($wikkaref->tag, $body, 2, $sb_expression);
  44.       //      if ($wikkaref->config["sbl_logfile"]) {
  45.       //        $sb_fp = fopen($wikkaref->config["sbl_logfile"], "a");
  46.       //        if ($sb_fp && flock($sb_fp, LOCK_EX)) {
  47.       //          $sb_logline = date("M d Y H:i:s") . "\t";
  48.       //          $sb_logline .= $sb_expression . "\t";
  49.       //          $sb_logline .= $wikkaref->GetUserName() . "\n";
  50.       //          fwrite($sb_fp, $sb_logline);
  51.       //          fclose($sb_fp);
  52.       //        }
  53.       //      }
  54.       sb_do_output_magic($wikkaref);
  55.       exit();
  56.     }
  57.       }
  58.     }
  59.   }
  60. }
  61.  
  62. // Function for decoding all html entities
  63. // http://www.php.net/manual/en/function.html-entity-decode.php
  64. function sb_unhtmlentities($string) {
  65.   $string = html_entity_decode($string);
  66.   $string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
  67.   $string = preg_replace('~&#([0-9]+);~e', 'chr("\\1")', $string);
  68.   return $string;
  69. }
  70.  
  71. // Function for doing the output magic
  72. // Will send the user a message first
  73. // Then a short definition of "spam" is sent *really* slow, to slow down
  74. // the spammer (teergrubing). The whole process takes about 20 seconds.
  75. // This should be within the "max_execution_time" of most providers.
  76. function sb_do_output_magic($wikkaref) {
  77.   $slow_message = array("Spamming", "is", "the", "abuse", "of", "electronic", "messaging", "systems", "to", "send", "unsolicited", "bulk", "messages,", "which", "are", "almost", "universally", "undesired.");
  78.  
  79.   while(@ob_end_clean());
  80.  
  81.   $headercode = file_get_contents("actions/header.php");
  82.   $headercode = str_replace('$this->', '$wikkaref->', $headercode);
  83.   eval("?>" . $headercode);
  84.  
  85.   print("<div class=\"page\">");
  86.   print $wikkaref->config["sbl_message"] . "<br/>\n<br/>\n";
  87.   flush();
  88.   sleep(1);
  89.   foreach ($slow_message as $word) {
  90.     print $word . " ";
  91.     flush();
  92.     sleep(1);
  93.   }
  94.   print "</div>";
  95.  
  96.   $footercode = file_get_contents("actions/footer.php");
  97.   $footercode = str_replace('$this->', '$wikkaref->', $footercode);
  98.   eval("?>" . $footercode);
  99.  
  100.   flush();
  101.   sleep(1);
  102.   print "<div class=\"smallprint\">Spam notice was generated in > 20 seconds. ";
  103.   flush();
  104.   sleep(1);
  105.   print "Spam filtering powered by <a href=\"http://www.wikkawiki.org/SpamBlacklist\">SpamBlacklist<a>. <a href=\"http://en.wikipedia.org/wiki/teergrubing\">Teergrubing</a> ends here ;-)</div>\n</body>\n</html>";
  106.   flush();
  107.   sleep(1);
  108. }
  109. ?>



Changes to Wakka.class.php

  1.     // COMMENTS
  2.     function LoadComments($tag) { return $this->LoadAll("SELECT * FROM ".$this->config["table_prefix"]."comments WHERE page_tag = '".mysql_real_escape_string($tag)."' AND spam = '0' ORDER BY time"); }
  3.     function LoadRecentComments($limit = 50) { return $this->LoadAll("SELECT * FROM ".$this->config["table_prefix"]."comments WHERE spam = '0' ORDER BY time DESC LIMIT ".$limit); }
  4.     function LoadRecentlyCommented($limit = 50)
  5.     {
  6.         $sql = "SELECT comments.id, comments.page_tag, comments.time, comments.comment, comments.user"
  7.             . " FROM ".$this->config["table_prefix"]."comments AS comments"
  8.             . " LEFT JOIN ".$this->config["table_prefix"]."comments AS c2 ON comments.page_tag = c2.page_tag AND comments.spam = '0' AND c2.spam = '0' AND comments.id < c2.id"
  9.             . " WHERE c2.page_tag IS NULL AND comments.spam = '0' "
  10.             . " ORDER BY time DESC "
  11.             . " LIMIT ".$limit;
  12.         return $this->LoadAll($sql);
  13.     }
  14.     function SaveComment($page_tag, $comment, $spam = 0, $spamrule= "")
  15.     {
  16.         // get current user
  17.         $user = $this->GetUserName();
  18.         $ip = $_SERVER["REMOTE_ADDR"];
  19.  
  20.         // add new comment
  21.         $this->Query("INSERT INTO ".$this->config["table_prefix"]."comments SET ".
  22.             "page_tag = '".mysql_real_escape_string($page_tag)."', ".
  23.             "time = now(), ".
  24.             "comment = '".mysql_real_escape_string($comment)."', ".
  25.             "user = '".mysql_real_escape_string($user)."', ".
  26.             "ip = '".mysql_real_escape_string($ip)."', ".
  27.             "spam = '".$spam."', ".
  28.             "spamrule = '".$spamrule."'");
  29.     }
There are no comments on this page.
Valid XHTML :: Valid CSS: :: Powered by WikkaWiki