Mon Jun 13 22:58:13 UTC 2011 pix@kepibu.org
* Move automatic URL classification into its own function.
hunk ./claki.lisp 55
- (let ((url (oh-ducks.traversal:element-attribute :href link))
- (rel (oh-ducks.traversal:element-attribute :rel link)))
+ (let ((url (oh-ducks.traversal:element-attribute :href link)))
hunk ./claki.lisp 61
- (gethash (url-domain url) *spam-urls*))
- (pushnew (list page-url current-version) *has-spam* :test #'equal))
- ((and (stringp rel)
- (or (string-equal "follow" rel)
- (string-equal "dofollow" rel)))
- (setf (gethash url *spam-urls*) t)
+ (gethash (url-domain url) *spam-urls*)
+ (auto-classify link))
hunk ./claki.lisp 69
+(defun auto-classify (link)
+ "Auto-classify URLs based upon traits common to spammers."
+ (let ((rel (oh-ducks.traversal:element-attribute :rel link))
+ (url (oh-ducks.traversal:element-attribute :href link)))
+ (cond
+ ((and (stringp rel)
+ (or (string-equal "follow" rel)
+ (string-equal "dofollow" rel)))
+ (setf (gethash url *spam-urls*) t))
+ (t nil))))
+