Split out an unattended and an attented portion
Mon Jun 13 22:35:27 UTC 2011 pix@kepibu.org
* Split out an unattended and an attented portion
In preparation for the time I let it loose without me watching over it.
hunk ./claki.lisp 41
+(defvar *urls-to-classify* (list) "list of (url-to-classify cliki-page-where-it-was-found page-version).")
+(defvar *has-spam* (list) "list of (cliki-page . version) known to have spam.")
hunk ./claki.lisp 55
- (let ((url (oh-ducks.traversal:element-attribute :href link)))
- (tagbody
- :handle-url
- (cond
- ((or (gethash url *okay-urls*)
- (gethash (url-domain url) *okay-urls*))
- #+(or) (do-nothing))
- ((or (gethash url *spam-urls*)
- (gethash (url-domain url) *spam-urls*))
- (maybe-request-last-known-good page-url)
- (return-from parse-page (revert-page page-url current-version (gethash page-url *last-known-good*))))
- (t
- (request-classification url)
- (go :handle-url))))))))))
+ (let ((url (oh-ducks.traversal:element-attribute :href link))
+ (rel (oh-ducks.traversal:element-attribute :rel link)))
+ (cond
+ ((or (gethash url *okay-urls*)
+ (gethash (url-domain url) *okay-urls*))
+ #+(or) (do-nothing))
+ ((or (gethash url *spam-urls*)
+ (gethash (url-domain url) *spam-urls*))
+ (pushnew (list page-url current-version) *has-spam* :test #'equal))
+ ((and (stringp rel)
+ (or (string-equal "follow" rel)
+ (string-equal "dofollow" rel)))
+ (setf (gethash url *spam-urls*) t)
+ (pushnew (list page-url current-version) *has-spam* :test #'equal))
+ (t
+ (pushnew (list url page-url current-version) *urls-to-classify* :test #'equal)))))))))
hunk ./claki.lisp 88
- (setf (gethash (url-domain url) *spam-urls*) t))))
+ (setf (gethash (url-domain url) *spam-urls*) t))
+ (classify-later ()
+ :report "Don't classify this URL yet."
+ nil)))
+
+(defun classify-unknown-urls ()
+ (setf *urls-to-classify*
+ (loop :for (url page version) :in *urls-to-classify*
+ :unless (or (gethash url *okay-urls*)
+ (gethash (url-domain url) *okay-urls*)
+ (gethash url *spam-urls*)
+ (gethash (url-domain url) *spam-urls*)
+ (request-classification url))
+ :collect (list url page version))))
+
+(defun mark-known-goods ()
+ (loop :for (page-url version) :in *has-spam*
+ :do (maybe-request-last-known-good page-url)))
+
+(defun revert-spam ()
+ (setf *has-spam*
+ (loop :for (page-url version) :in *has-spam*
+ :unless (and (gethash page-url *last-known-good*)
+ (revert-page page-url version (gethash page-url *last-known-good*)))
+ :collect (list page-url version))))
hunk ./claki.lisp 131
-(defun revert-page (page current-version to-version)
- (drakma:http-request (format nil "http://cliki.net/edit/~a" page)
- :method :post
- :parameters `(("version" . ,current-version)
- ("T0" . "BODY")
- ("E0" . ,(get-cliki-source page to-version))
- ("summary" . "Spam detected, reverting to Known-Good.")
- ("captcha" . "lisp")
- ("name" . "Claki (Revertobot Alpha)"))))
+(defun revert-page (url current-version to-version)
+ (multiple-value-bind (page status headers)
+ (drakma:http-request (format nil "http://cliki.net/edit/~a" url)
+ :method :post
+ :parameters `(("version" . ,current-version)
+ ("T0" . "BODY")
+ ("E0" . ,(get-cliki-source url to-version))
+ ("summary" . "Spam detected, reverting to Known-Good.")
+ ("captcha" . "lisp")
+ ("name" . "Claki (Revertobot Alpha)")))
+ (cond
+ ((and (= status 200)
+ (not (search "rejected" page :test #'char-equal)))
+ page)
+ (t nil))))
hunk ./claki.lisp 156
-(defun revert-new-spam ()
- (let ((modified-pages (get-recent-changes)))
- (loop :for page :in modified-pages
- :do (parse-page page))))
+(defun attented-revert-new-spam ()
+ (mapcar #'parse-page (get-recent-changes))
+ (attendant))
+
+(defun attendant ()
+ (classify-unknown-urls)
+ (mark-known-goods)
+ (revert-spam))
+
+(defun unattented-revert-new-spam ()
+ (mapcar #'parse-page (get-recent-changes))
+ (revert-spam))
hunk ./claki.lisp 169
-#+(or) (revert-new-spam)
+#+(or) (attented-revert-new-spam)
hunk ./claki.lisp 171
-#+(or) (loop (sleep (* 60 60)) (revert-new-spam))
+#+(or) (loop (sleep (* 60 60)) (unattented-revert-new-spam))