repos
/
claki
/ annotate_shade
summary
|
shortlog
|
log
|
tree
|
commit
|
commitdiff
|
headdiff
|
annotate
|
headblob
|
headfilediff
|
filehistory
normal
|
plain
|
shade
|
zebra
Initial import
Annotate for file claki.lisp
2011-06-13 pix
1
(defpackage #:claki
19:08:27 '
2
(:use #:cl #:unify :alexandria))
'
3
(in-package #:claki)
'
4
'
5
(defvar *last-modified* (make-hash-table :test 'equal) "hash table of cliki pages and the last-modified header we last received.")
'
6
'
7
#+(or) (clrhash *last-modified*)
'
8
'
9
(defun get-cliki-page (url)
'
10
"Returns a page from cliki if it has been modified since we last saw it or nil
'
11
if it has not been modified. Signals an error otherwise."
'
12
(multiple-value-bind (page status headers)
'
13
(drakma:http-request (format nil "http://cliki.net/~a" url)
'
14
:additional-headers (when (gethash url *last-modified*)
'
15
`((:if-modified-since (gethash url *last-modified*)))))
'
16
(cond
'
17
;; If the page hasn't been modified, no need to update
'
18
((= 304 status) nil)
'
19
;; The "Recent Changes" page doesn't return 304s, but does return a last-modified.
'
20
((and (gethash url *last-modified*)
'
21
(cdr (assoc :last-modified headers))
'
22
(string= (cdr (assoc :last-modified headers)) (gethash url *last-modified*)))
'
23
nil)
'
24
((= 200 status)
'
25
(setf (gethash url *last-modified*) (cdr (assoc :last-modified headers)))
'
26
page)
'
27
(t (error "crap!")))))
'
28
'
29
(defun get-recent-changes ()
'
30
(when-let ((page (get-cliki-page "Recent%20Changes")))
'
31
(match (#t(oh-ducks:html ("blockquote > b > a.internal" . ?links)) page)
'
32
(loop :for link :in (remove-duplicates links :test #'equalp
'
33
:key (lambda (x) (oh-ducks.traversal:element-attribute :href x))
'
34
:from-end t)
'
35
:collect (oh-ducks.traversal:element-attribute :href link)))))
'
36
'
37
#+(or) (get-recent-changes)
'
38
'
39
(defvar *spam-urls* (make-hash-table :test 'equal) "hash table of spam urls.")
'
40
(defvar *okay-urls* (make-hash-table :test 'equal) "hash table of acceptable urls.")
'
41
'
42
#+(or) (clrhash *okay-urls*)
'
43
'
44
(defun url-domain (url)
'
45
(puri:uri-host (puri:parse-uri url)))
'
46
'
47
(defun parse-page (page-url)
'
48
(when-let ((page (get-cliki-page page-url)))
'
49
(match (#t(oh-ducks:html ("a[href^=http]" . ?a)
'
50
("#footer > b" . #t(list ?b))) page)
'
51
(let ((current-version (oh-ducks.traversal:element-content b)))
'
52
(dolist (link a)
'
53
(let ((url (oh-ducks.traversal:element-attribute :href link)))
'
54
(tagbody
'
55
:handle-url
'
56
(cond
'
57
((or (gethash url *okay-urls*)
'
58
(gethash (url-domain url) *okay-urls*))
'
59
#+(or) (do-nothing))
'
60
((or (gethash url *spam-urls*)
'
61
(gethash (url-domain url) *spam-urls*))
'
62
(maybe-request-last-known-good page-url)
'
63
(return-from parse-page (revert-page page-url current-version (gethash page-url *last-known-good*))))
'
64
(t
'
65
(request-classification url)
'
66
(go :handle-url))))))))))
'
67
'
68
#+(or) (parse-page "araneida")
'
69
'
70
(defun request-classification (url)
'
71
(restart-case (error 'simple-error :format-control "Please classify the URL ~s."
'
72
:format-arguments (list url))
'
73
(mark-url-okay ()
'
74
:report "Mark this URL as acceptable."
'
75
(setf (gethash url *okay-urls*) t))
'
76
(mark-domain-okay ()
'
77
:report "Mark the domain as acceptable."
'
78
(setf (gethash (url-domain url) *okay-urls*) t))
'
79
(mark-url-spam ()
'
80
:report "Mark this URL as spam."
'
81
(setf (gethash url *spam-urls*) t))
'
82
(mark-domain-spam ()
'
83
:report "Mark the domain as spam."
'
84
(setf (gethash (url-domain url) *spam-urls*) t))))
'
85
'
86
(defvar *last-known-good* (make-hash-table :test 'equal) "hash table of cliki pages and the last-known \"good\" revision.")
'
87
'
88
(defun read-number ()
'
89
(format t "Enter a version: ")
'
90
(list (format nil "~a" (parse-integer (read-line)))))
'
91
'
92
(defun maybe-request-last-known-good (page)
'
93
(unless (gethash page *last-known-good*)
'
94
(restart-case (error 'simple-error :format-control "Do not know of a good version of cliki page ~s."
'
95
:format-arguments (list page))
'
96
(specify-version (version)
'
97
:interactive read-number
'
98
:report "Specify a known-good version."
'
99
(setf (gethash page *last-known-good*) version)))))
'
100
'
101
#+(or) (maybe-request-last-known-good "araneida")
'
102
'
103
(defun revert-page (page current-version to-version)
'
104
(drakma:http-request (format nil "http://cliki.net/edit/~a" page)
'
105
:method :post
'
106
:parameters `(("version" . ,current-version)
'
107
("T0" . "BODY")
'
108
("E0" . ,(get-cliki-source page to-version))
'
109
("summary" . "Spam detected, reverting to Known-Good.")
'
110
("captcha" . "lisp")
'
111
("name" . "Claki (Revertobot Alpha)"))))
'
112
'
113
(defun get-cliki-source (url version)
'
114
"Fetches the source text of a given version of a cliki page. That is, it
'
115
returns the text you should POST to revert a cliki page to the given version."
'
116
(multiple-value-bind (page status headers)
'
117
(drakma:http-request (format nil "http://cliki.net/~a?source&v=~a" url version))
'
118
(cond
'
119
((= 200 status) page)
'
120
(t (error "crap!")))))
'
121
'
122
(defun revert-new-spam ()
'
123
(let ((modified-pages (get-recent-changes)))
'
124
(loop :for page :in modified-pages
'
125
:do (parse-page page))))
'
126
'
127
#+(or) (revert-new-spam)
'
128
'
129
#+(or) (loop (sleep (* 60 60)) (revert-new-spam))