Consider text-decoration:none to be a spam indicator as well
Tue Jun 28 08:04:46 UTC 2011 pix@kepibu.org
* Consider text-decoration:none to be a spam indicator as well
diff -rN -u old-claki/claki.lisp new-claki/claki.lisp
--- old-claki/claki.lisp 2013-07-08 13:39:21.000000000 +0000
+++ new-claki/claki.lisp 2013-07-08 13:39:21.000000000 +0000
@@ -78,12 +78,16 @@
(defun auto-classify (link)
"Auto-classify URLs based upon traits common to spammers."
(let ((rel (oh-ducks.traversal:element-attribute :rel link))
+ (style (oh-ducks.traversal:element-attribute :style link))
(url (oh-ducks.traversal:element-attribute :href link)))
(cond
((and (stringp rel)
(or (string-equal "follow" rel)
(string-equal "dofollow" rel)))
(setf (gethash url *spam-urls*) t))
+ ((and (stringp style)
+ (cl-ppcre:scan "text-decoration[ ]*:[ ]*none" style))
+ (setf (gethash url *spam-urls*) t))
(t nil))))
(defun request-classification (url &optional page version)