Initial import
Mon Jun 13 19:08:27 UTC 2011 pix@kepibu.org
* Initial import
diff -rN -u old-claki/claki.asd new-claki/claki.asd
--- old-claki/claki.asd 1970-01-01 00:00:00.000000000 +0000
+++ new-claki/claki.asd 2013-07-08 10:16:08.000000000 +0000
@@ -0,0 +1,3 @@
+(defsystem :claki
+ :depends-on (:oh-ducks :closure-html :cl-unification :drakma :alexandria)
+ :components ((:file "claki")))
diff -rN -u old-claki/claki.lisp new-claki/claki.lisp
--- old-claki/claki.lisp 1970-01-01 00:00:00.000000000 +0000
+++ new-claki/claki.lisp 2013-07-08 10:16:08.000000000 +0000
@@ -0,0 +1,129 @@
+(defpackage #:claki
+ (:use #:cl #:unify :alexandria))
+(in-package #:claki)
+
+(defvar *last-modified* (make-hash-table :test 'equal) "hash table of cliki pages and the last-modified header we last received.")
+
+#+(or) (clrhash *last-modified*)
+
+(defun get-cliki-page (url)
+ "Returns a page from cliki if it has been modified since we last saw it or nil
+if it has not been modified. Signals an error otherwise."
+ (multiple-value-bind (page status headers)
+ (drakma:http-request (format nil "http://cliki.net/~a" url)
+ :additional-headers (when (gethash url *last-modified*)
+ `((:if-modified-since (gethash url *last-modified*)))))
+ (cond
+ ;; If the page hasn't been modified, no need to update
+ ((= 304 status) nil)
+ ;; The "Recent Changes" page doesn't return 304s, but does return a last-modified.
+ ((and (gethash url *last-modified*)
+ (cdr (assoc :last-modified headers))
+ (string= (cdr (assoc :last-modified headers)) (gethash url *last-modified*)))
+ nil)
+ ((= 200 status)
+ (setf (gethash url *last-modified*) (cdr (assoc :last-modified headers)))
+ page)
+ (t (error "crap!")))))
+
+(defun get-recent-changes ()
+ (when-let ((page (get-cliki-page "Recent%20Changes")))
+ (match (#t(oh-ducks:html ("blockquote > b > a.internal" . ?links)) page)
+ (loop :for link :in (remove-duplicates links :test #'equalp
+ :key (lambda (x) (oh-ducks.traversal:element-attribute :href x))
+ :from-end t)
+ :collect (oh-ducks.traversal:element-attribute :href link)))))
+
+#+(or) (get-recent-changes)
+
+(defvar *spam-urls* (make-hash-table :test 'equal) "hash table of spam urls.")
+(defvar *okay-urls* (make-hash-table :test 'equal) "hash table of acceptable urls.")
+
+#+(or) (clrhash *okay-urls*)
+
+(defun url-domain (url)
+ (puri:uri-host (puri:parse-uri url)))
+
+(defun parse-page (page-url)
+ (when-let ((page (get-cliki-page page-url)))
+ (match (#t(oh-ducks:html ("a[href^=http]" . ?a)
+ ("#footer > b" . #t(list ?b))) page)
+ (let ((current-version (oh-ducks.traversal:element-content b)))
+ (dolist (link a)
+ (let ((url (oh-ducks.traversal:element-attribute :href link)))
+ (tagbody
+ :handle-url
+ (cond
+ ((or (gethash url *okay-urls*)
+ (gethash (url-domain url) *okay-urls*))
+ #+(or) (do-nothing))
+ ((or (gethash url *spam-urls*)
+ (gethash (url-domain url) *spam-urls*))
+ (maybe-request-last-known-good page-url)
+ (return-from parse-page (revert-page page-url current-version (gethash page-url *last-known-good*))))
+ (t
+ (request-classification url)
+ (go :handle-url))))))))))
+
+#+(or) (parse-page "araneida")
+
+(defun request-classification (url)
+ (restart-case (error 'simple-error :format-control "Please classify the URL ~s."
+ :format-arguments (list url))
+ (mark-url-okay ()
+ :report "Mark this URL as acceptable."
+ (setf (gethash url *okay-urls*) t))
+ (mark-domain-okay ()
+ :report "Mark the domain as acceptable."
+ (setf (gethash (url-domain url) *okay-urls*) t))
+ (mark-url-spam ()
+ :report "Mark this URL as spam."
+ (setf (gethash url *spam-urls*) t))
+ (mark-domain-spam ()
+ :report "Mark the domain as spam."
+ (setf (gethash (url-domain url) *spam-urls*) t))))
+
+(defvar *last-known-good* (make-hash-table :test 'equal) "hash table of cliki pages and the last-known \"good\" revision.")
+
+(defun read-number ()
+ (format t "Enter a version: ")
+ (list (format nil "~a" (parse-integer (read-line)))))
+
+(defun maybe-request-last-known-good (page)
+ (unless (gethash page *last-known-good*)
+ (restart-case (error 'simple-error :format-control "Do not know of a good version of cliki page ~s."
+ :format-arguments (list page))
+ (specify-version (version)
+ :interactive read-number
+ :report "Specify a known-good version."
+ (setf (gethash page *last-known-good*) version)))))
+
+#+(or) (maybe-request-last-known-good "araneida")
+
+(defun revert-page (page current-version to-version)
+ (drakma:http-request (format nil "http://cliki.net/edit/~a" page)
+ :method :post
+ :parameters `(("version" . ,current-version)
+ ("T0" . "BODY")
+ ("E0" . ,(get-cliki-source page to-version))
+ ("summary" . "Spam detected, reverting to Known-Good.")
+ ("captcha" . "lisp")
+ ("name" . "Claki (Revertobot Alpha)"))))
+
+(defun get-cliki-source (url version)
+ "Fetches the source text of a given version of a cliki page. That is, it
+returns the text you should POST to revert a cliki page to the given version."
+ (multiple-value-bind (page status headers)
+ (drakma:http-request (format nil "http://cliki.net/~a?source&v=~a" url version))
+ (cond
+ ((= 200 status) page)
+ (t (error "crap!")))))
+
+(defun revert-new-spam ()
+ (let ((modified-pages (get-recent-changes)))
+ (loop :for page :in modified-pages
+ :do (parse-page page))))
+
+#+(or) (revert-new-spam)
+
+#+(or) (loop (sleep (* 60 60)) (revert-new-spam))