Sun Jul  3 08:26:55 UTC 2011  pix@kepibu.org
  * Fix some problems discovered on fresh-load
hunk ./claki.asd 2
-  :depends-on (:oh-ducks :closure-html :cl-unification :drakma :alexandria :rucksack)
+  :depends-on (:oh-ducks :closure-html :cl-unification :drakma :alexandria :rucksack :local-time)
hunk ./claki.lisp 266
-(defvar *state-file* (merge-pathnames #p"state/" (directory-namestring (or #.*load-pathname* #p"/home/pixel/repos/"))))
+(defvar *state-file* (merge-pathnames #p"state/" (directory-namestring (or #.*load-pathname* #p"/home/pixel/repos/claki/"))))
Fri Jul  1 22:52:58 UTC 2011  pix@kepibu.org
  * Expand the timing arguments run-for-a-while takes
hunk ./claki.lisp 246
+(defun days (d) (* (hours 24) d))
hunk ./claki.lisp 252
-(defun run-for-a-while (times)
+(defun run-for-a-while (how-long how-often variance)
hunk ./claki.lisp 254
-  (dotimes (i times)
-    (sleep (minutes (plus-or-minus 30 5)))
+  (dotimes (i (floor how-long how-often))
+    (sleep (plus-or-minus how-often variance))
hunk ./claki.lisp 263
-   (sb-thread:make-thread (lambda () (let ((*standard-output* stdout)) (run-for-a-while (* 2 48))))
+   (sb-thread:make-thread (lambda () (let ((*standard-output* stdout)) (run-for-a-while (days 2) (minutes 30) (minutes 5))))
Wed Jun 29 03:08:25 UTC 2011  pix@kepibu.org
  * Wrap drakma:http-request so we can have automatic retries on timeouts
hunk ./claki.lisp 9
+(defun http-request (&rest drakma-args)
+  "A wrapper around drakma:http-request which automatically retries the request
+in the event of a timeout."
+  (let ((times-failed 0))
+    (tagbody
+     :fetch-page
+       (handler-case (return-from http-request (apply #'drakma:http-request drakma-args))
+         (usocket:timeout-error ()
+           (sleep (* 60 (expt 2 times-failed)))
+           (incf times-failed)
+           (go :fetch-page))))))
+
hunk ./claki.lisp 25
-      (drakma:http-request (format nil "http://cliki.net/~a" url)
-                           :additional-headers (when (gethash url *last-modified*)
-                                                 `((:if-modified-since (gethash url *last-modified*)))))
+      (http-request (format nil "http://cliki.net/~a" url)
+                    :additional-headers (when (gethash url *last-modified*)
+                                          `((:if-modified-since (gethash url *last-modified*)))))
hunk ./claki.lisp 191
-      (drakma:http-request (format nil "http://cliki.net/edit/~a" url)
-                           :method :post
-                           :parameters `(("version" . ,current-version)
-                                         ("T0"      . "BODY")
-                                         ("E0"      . ,(get-cliki-source url to-version))
-                                         ("summary" . "Spam detected, reverting to Known-Good.")
-                                         ("captcha" . "lisp")
-                                         ("name"    . "Claki (Revertobot Beta)")))
+      (http-request (format nil "http://cliki.net/edit/~a" url)
+                    :method :post
+                    :parameters `(("version" . ,current-version)
+                                  ("T0"      . "BODY")
+                                  ("E0"      . ,(get-cliki-source url to-version))
+                                  ("summary" . "Spam detected, reverting to Known-Good.")
+                                  ("captcha" . "lisp")
+                                  ("name"    . "Claki (Revertobot Beta)")))
hunk ./claki.lisp 213
-          (drakma:http-request (format nil "http://cliki.net/~a?source&v=~a" url version))
+          (http-request (format nil "http://cliki.net/~a?source&v=~a" url version))
Tue Jun 28 17:03:09 UTC 2011  pix@kepibu.org
  * Make #'run-for-a-while take the number of iterations as an argument
hunk ./claki.lisp 239
-(defun run-for-a-while ()
+(defun run-for-a-while (times)
hunk ./claki.lisp 241
-  (dotimes (i (* 2 24))
+  (dotimes (i times)
hunk ./claki.lisp 250
-   (sb-thread:make-thread (lambda () (let ((*standard-output* stdout)) (run-for-a-while)))
+   (sb-thread:make-thread (lambda () (let ((*standard-output* stdout)) (run-for-a-while (* 2 48))))
Tue Jun 28 08:05:37 UTC 2011  pix@kepibu.org
  * Padding for time numbers, so we get 01 instead of 1
hunk ./claki.lisp 236
-(defconstant +simple-time+ '(:year #\- :month #\- :day #\Space :hour #\: :min #\: :sec))
+(defconstant +simple-time+ '(:year #\- (:month 2) #\- (:day 2) #\Space (:hour 2) #\: (:min 2) #\: (:sec 2)))
Tue Jun 28 08:05:09 UTC 2011  pix@kepibu.org
  * Fix state-file pathname when *load-pathname* actually exists
hunk ./claki.lisp 253
-(defvar *state-file* (or #.*load-pathname* #p"/home/pixel/repos/claki/state/"))
+(defvar *state-file* (merge-pathnames #p"state/" (directory-namestring (or #.*load-pathname* #p"/home/pixel/repos/"))))
Tue Jun 28 08:04:46 UTC 2011  pix@kepibu.org
  * Consider text-decoration:none to be a spam indicator as well
hunk ./claki.lisp 81
+        (style (oh-ducks.traversal:element-attribute :style link))
hunk ./claki.lisp 88
+      ((and (stringp style)
+            (cl-ppcre:scan "text-decoration[ ]*:[ ]*none" style))
+       (setf (gethash url *spam-urls*) t))
Tue Jun 28 08:04:14 UTC 2011  pix@kepibu.org
  * Don't consider a page updated if the last-known good is the same as the current version
hunk ./claki.lisp 60
+          (when (string= (gethash page-url *last-known-good*) current-version)
+            (return-from parse-page nil))
Sat Jun 18 23:16:27 UTC 2011  pix@kepibu.org
  * Send cliki reverter thread output to swank's standard-io (the repl in emacs)
hunk ./claki.lisp 242
-#+(or) (sb-thread:make-thread #'run-for-a-while :name "cliki reverter")
+#+(or)
+ (let ((stdout *standard-output*))
+   (sb-thread:make-thread (lambda () (let ((*standard-output* stdout)) (run-for-a-while)))
+                          :name "cliki reverter"))
Sat Jun 18 23:15:57 UTC 2011  pix@kepibu.org
  * Add FIXME noting a behavior of cliki I wasn't expecting
hunk ./claki.lisp 16
+    ;; FIXME: this doesn't work all that well: it turns out cliki uses a single
+    ;; last-modified header for the entire wiki, rather than one per page, so we
+    ;; aren't saving ourselves (or cliki) much with this.
Sat Jun 18 23:15:27 UTC 2011  pix@kepibu.org
  * Print when we start and stop a run.
hunk ./claki.lisp 228
+(defun now () (local-time:format-timestring nil (local-time:now) :format +simple-time+))
hunk ./claki.lisp 231
-  (dotimes (i 20)
+  (format t "; Beginning run at ~a~%" (now))
+  (dotimes (i (* 2 24))
hunk ./claki.lisp 234
-    (format t "; Unattented run at ~a~%" (local-time:format-timestring nil (local-time:now) :format +simple-time+))
+    (format t "; Unattended run at ~a~%" (now))
hunk ./claki.lisp 236
-    (save-state)))
+    (save-state))
+  (format t "; Run ended at ~a~%~%" (now)))
Sat Jun 18 23:14:59 UTC 2011  pix@kepibu.org
  * Bugfix: if we manually classify an URL as spam, don't then mark the page that
  had that URL as known-good.
hunk ./claki.lisp 84
-(defun request-classification (url)
+(defun request-classification (url &optional page version)
hunk ./claki.lisp 95
+      (when (and page version)
+        (pushnew (list page version) *has-spam* :test #'equal))
hunk ./claki.lisp 100
+      (when (and page version)
+        (pushnew (list page version) *has-spam* :test #'equal))
hunk ./claki.lisp 114
-                          (request-classification url))
+                          (request-classification url page version))
Thu Jun 16 05:58:14 UTC 2011  pix@kepibu.org
  * Automagical updating of last-known good if a non-spammy update was made.
hunk ./claki.lisp 42
-(defvar *has-spam* (list) "list of (cliki-page . version) known to have spam.")
+(defvar *has-spam* (list) "list of (cliki-page version) known to have spam.")
+(defvar *updated-pages* (list) "list of pages that have been updated in the format (cliki-page version).")
+(defvar *ignore-update* (list) "list of pages in the format (cliki-page version) which updates should not result in updating the last-known good version (e.g., because we did it and it was a reversion).")
hunk ./claki.lisp 56
+          (format t "; Page ~s modified, now at version ~a.~%" page-url current-version)
+          (pushnew (list page-url current-version) *updated-pages* :test #'equal)
hunk ./claki.lisp 141
+(defun update-last-known-good ()
+  ;; Remove known-spam pages from list of updates
+  (loop :for (page-url version) :in *has-spam*
+        :do (removef *updated-pages* (list page-url version) :test #'equal))
+  ;; Remove updates we made from list of updates (that way, we'll continue to
+  ;; use the old cached known-good, saving a bit of strain on cliki)
+  (loop :for (page-url version) :in *ignore-update*
+        :do (removef *updated-pages* (list page-url version) :test #'equal))
+  (setf *updated-pages*
+        (loop :for (page-url version) :in *updated-pages*
+              ;; If there are unclassified urls from this page, don't mark good
+              ;; (could be spam!)
+              :if (notany (lambda (x) (and (string= page-url (second x))
+                                           (string= version (third x))))
+                          *urls-to-classify*)
+                :do (format t "; Updating last-known good of ~s from ~a to ~a~%" page-url (gethash page-url *last-known-good*) version)
+                    (setf (gethash page-url *last-known-good*) version)
+              :else
+                :collect (list page-url version))))
+
+(defun numstring+1 (numstring)
+  (write-to-string (1+ (parse-integer numstring))))
+
hunk ./claki.lisp 173
-                                         ("name"    . "Claki (Revertobot Alpha)")))
+                                         ("name"    . "Claki (Revertobot Beta)")))
hunk ./claki.lisp 179
+       (pushnew (list url (numstring+1 current-version)) *ignore-update* :test #'equal)
hunk ./claki.lisp 206
+  (update-last-known-good)
hunk ./claki.lisp 212
+  (update-last-known-good)
hunk ./claki.lisp 259
-    (rs:btree-insert btree 'known-good *last-known-good*)))
+    (rs:btree-insert btree 'known-good *last-known-good*)
+    (rs:btree-insert btree 'updated-pages *updated-pages*)
+    (rs:btree-insert btree 'our-updates *ignore-update*)))
hunk ./claki.lisp 269
-          *last-known-good* (rs:btree-search btree 'known-good))))
+          *last-known-good* (rs:btree-search btree 'known-good)
+          *updated-pages* (rs:btree-search btree 'updated-pages)
+          *ignore-update* (rs:btree-search btree 'our-updates))))
Wed Jun 15 09:22:12 UTC 2011  pix@kepibu.org
  * More declarative way of saying "every x minutes, give or take"
hunk ./claki.lisp 191
+(defun plus-or-minus (x y) (+ (- x y) (random (* 2 y))))
hunk ./claki.lisp 197
-    (sleep (minutes (+ 25 (random 10))))
+    (sleep (minutes (plus-or-minus 30 5)))
Wed Jun 15 09:21:48 UTC 2011  pix@kepibu.org
  * Cache known-good copies; simplify working with rucksack
hunk ./claki.lisp 155
-;; TODO?: persist this, so we don't have to care whether cliki remembers it
hunk ./claki.lisp 158
-  (multiple-value-bind (page status headers)
-      (drakma:http-request (format nil "http://cliki.net/~a?source&v=~a" url version))
-    (cond
-      ((= 200 status) page)
-      (t (error "crap!")))))
+  (or (find-in-cache url version)
+      (multiple-value-bind (page status headers)
+          (drakma:http-request (format nil "http://cliki.net/~a?source&v=~a" url version))
+        (declare (ignore headers))
+        (cond
+          ((= 200 status)
+           ;; We have to coerce to a simple-string because rucksack mistakenly
+           ;; assumes non-simple strings have a fill pointer
+           (cache-known-good url version (coerce page 'simple-string))
+           page)
+          (t (error "crap!"))))))
+
+#+(or) (get-cliki-source "araneida" "281")
hunk ./claki.lisp 205
+(defmacro with-rucksack-and-transaction ((rucksack) (&rest root-vars) &body body)
+  (with-unique-names (rest)
+    `(rucksack:with-rucksack (,rucksack *state-file*)
+       (rucksack:with-transaction ()
+         (destructuring-bind (&optional ,@root-vars &rest ,rest) (ordered-roots rs:*rucksack*)
+           (declare (ignore ,rest) ,@(when (member '_ root-vars) `((ignore _))))
+           ,@body)))))
+
+(defun ordered-roots (sack)
+  (sort (rs:rucksack-roots sack) #'< :key #'rs:object-id))
+
+(defun initialize-rucksack ()
+  (with-rucksack-and-transaction (rs) ()
+    (let ((roots (rs:rucksack-roots rs:*rucksack*)))
+      (dotimes (i (- 2 (length roots)))
+        (rs:add-rucksack-root (make-instance 'rs:btree :key< 'string<) rs:*rucksack*)))))
+
+#+(or) (initialize-rucksack)
+
hunk ./claki.lisp 225
-  (rucksack:with-rucksack (rs *state-file*)
-    (rucksack:with-transaction ()
-      (unless (rs:rucksack-roots rs:*rucksack*)
-        (rs:add-rucksack-root (make-instance 'rs:btree :key< 'string<) rs:*rucksack*))
-      (let ((btree (first (rs:rucksack-roots rs:*rucksack*))))
-        (rs:btree-insert btree 'spam-urls *spam-urls*)
-        (rs:btree-insert btree 'ham-urls *okay-urls*)
-        (rs:btree-insert btree 'known-good *last-known-good*)))))
+  (with-rucksack-and-transaction (rs) (btree)
+    (rs:btree-insert btree 'spam-urls *spam-urls*)
+    (rs:btree-insert btree 'ham-urls *okay-urls*)
+    (rs:btree-insert btree 'known-good *last-known-good*)))
hunk ./claki.lisp 233
-  (rs:with-rucksack (rs *state-file*)
-    (rs:with-transaction ()
-      (let ((btree (first (rs:rucksack-roots rs:*rucksack*))))
-        (setf *spam-urls* (rs:btree-search btree 'spam-urls)
-              *okay-urls* (rs:btree-search btree 'ham-urls)
-              *last-known-good* (rs:btree-search btree 'known-good))))))
+  (with-rucksack-and-transaction (rs) (btree)
+    (setf *spam-urls* (rs:btree-search btree 'spam-urls)
+          *okay-urls* (rs:btree-search btree 'ham-urls)
+          *last-known-good* (rs:btree-search btree 'known-good))))
hunk ./claki.lisp 239
+#+(or) (with-rucksack-and-transaction (rs) ()
+         (ordered-roots rs:*rucksack*))
+
+(defun cache-known-good (cliki-page version content)
+  (with-rucksack-and-transaction (rs) (_ btree)
+    (rs:btree-insert btree cliki-page (list version content))))
+
+(defun find-in-cache (cliki-page version)
+  (with-rucksack-and-transaction (rs) (_ btree)
+    (destructuring-bind (&optional cached-version cached-content)
+        (rs:btree-search btree cliki-page :default-value nil :errorp nil)
+      (and cached-version
+           (string= cached-version version)
+           cached-content))))
+
+#+(or) (find-in-cache "araneida" "281")