/
selectors.lisp
  1 (in-package #:oh-ducks)
  2 
  3 (defvar *implicit-element* nil
  4   "The element to be considered as an implicit element to be matched by combinators without a leading qualifier.  E.g., \"> a\" will match <a> tags directly under *implicit-element*, and \"+ a\" will match <a> tags directly following *implicit-element*.")
  5 
  6 #.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|)
  7 
  8 (defclass selector (unify::string-template)
  9   ((matcher :reader matcher :initarg :matcher)))
 10 
 11 (defgeneric selector-p (object)
 12   (:method ((ob selector)) t)
 13   (:method ((ob t)) nil))
 14 
 15 (defclass simple-selector (selector)
 16   ((arg :reader selector-arg :initarg :arg)))
 17 
 18 (defmethod print-object ((selector simple-selector) stream)
 19   (format stream "#<selector ~s>" (selector-arg selector)))
 20 
 21 (defclass combinator (selector) ())
 22 
 23 (defgeneric combinator-p (object)
 24   (:method ((ob combinator)) t)
 25   (:method ((ob t)) nil))
 26 
 27 (defmethod print-object ((selector combinator) stream)
 28   (format stream "#<~s ~s>" (class-name (class-of selector)) (matcher selector)))
 29 
 30 (defclass child-combinator (combinator) ())
 31 (defclass descendant-combinator (combinator) ())
 32 (defclass adjacent-combinator (combinator) ())
 33 (defclass sibling-combinator (combinator) ())
 34 
 35 (defclass universal-selector (simple-selector) ())
 36 (defclass type-selector  (simple-selector) ())
 37 (defclass id-selector    (simple-selector) ())
 38 (defclass class-selector (simple-selector) ())
 39 (defclass nth-child-selector (simple-selector) ())
 40 (defclass nth-last-child-selector (nth-child-selector) ())
 41 (defclass nth-of-type-selector (nth-child-selector) ())
 42 (defclass nth-last-of-type-selector (nth-of-type-selector) ())
 43 (defclass empty-selector (simple-selector) ())
 44 
 45 (defclass attribute-selector (simple-selector)
 46   ((val :reader attribute-value :initarg :value)))
 47 (defclass attribute-present-selector (attribute-selector) ())
 48 (defclass attribute-equal-selector (attribute-selector) ())
 49 (defclass attribute-starts-with-selector (attribute-selector) ())
 50 
 51 (defmethod initialize-instance :after ((selector nth-child-selector)
 52                                        &key (asign "+") a
 53                                             (bsign "+") b
 54                                             namedp)
 55   (setf (slot-value selector 'arg)
 56         (if namedp
 57             (cons 2 (if (string-equal "odd" b) 1 0))
 58             (cons (parse-integer (format nil "~a~a" asign (or a 1)))
 59                   (parse-integer (format nil "~a~a" bsign (or b 0)))))))
 60 
 61 (defmethod print-object ((selector universal-selector) stream)
 62   (format stream "#<universal-selector>"))
 63 
 64 (defmethod initialize-instance :after ((template combinator) &key)
 65   (unless (slot-boundp template 'matcher)
 66     (let ((selector (template-spec template)))
 67       (setf (slot-value template 'matcher) (parse-selector (string-trim " " selector))))))
 68 
 69 (defclass %implicit-element-selector (selector) ())
 70 (defparameter %implicit-element-selector (make-instance '%implicit-element-selector))
 71 
 72 (defmethod print-object ((selector %implicit-element-selector) stream)
 73   (print-unreadable-object (selector stream :type t)))
 74 
 75 (cl-ppcre:define-parse-tree-synonym \s*
 76   (:non-greedy-repetition 0 nil :whitespace-char-class))
 77 (cl-ppcre:define-parse-tree-synonym \s+
 78   (:greedy-repetition 1 nil :whitespace-char-class))
 79 (cl-ppcre:define-parse-tree-synonym sign
 80   (:char-class #\+ #\-))
 81 (cl-ppcre:define-parse-tree-synonym sign?
 82   (:greedy-repetition 0 1 sign))
 83 (cl-ppcre:define-parse-tree-synonym integer
 84   (:greedy-repetition 1 nil :digit-class))
 85 (cl-ppcre:define-parse-tree-synonym name
 86   (:greedy-repetition 1 nil (:char-class :word-char-class #\-)))
 87 (cl-ppcre:define-parse-tree-synonym $name
 88   (:register name))
 89 (cl-ppcre:define-parse-tree-synonym an+b
 90   (:sequence
 91    (:register sign?) (:greedy-repetition 0 1 (:register integer))
 92    #\n \s*
 93    (:register sign?) \s* (:greedy-repetition 0 1 (:register integer))))
 94 (cl-ppcre:define-parse-tree-synonym b
 95   (:register (:sequence sign? integer)))
 96 (cl-ppcre:define-parse-tree-synonym odd/even
 97   (:register (:alternation "odd" "even")))
 98 
 99 ;; FIXME: proper parsing (e.g., by using the W3C's provided FLEX and YACC bits).
100 (defun parse-selector (selector)
101   (match-case (selector)
102     ;; combinators
103     (#T(regexp$ (\s* #\~ \s*) ())
104      (list (make-instance 'sibling-combinator    :matcher (or (parse-selector &rest) %implicit-element-selector))))
105     (#T(regexp$ (\s* #\+ \s*) ())
106      (list (make-instance 'adjacent-combinator   :matcher (or (parse-selector &rest) %implicit-element-selector))))
107     (#T(regexp$ (\s* #\> \s*) ())
108      (list (make-instance 'child-combinator      :matcher (or (parse-selector &rest) %implicit-element-selector))))
109     (#T(regexp$ (\s+)         ())
110      (list (make-instance 'descendant-combinator :matcher (or (parse-selector &rest) %implicit-element-selector))))
111     ;; simple selectors
112     ;; attribute selectors
113     (#T(regexp$ ("[" $name "]") (?attribute))
114      (cons (make-instance 'attribute-present-selector :arg attribute)
115            (parse-selector &rest)))
116     (#T(regexp$ ("[" $name "=" $name "]") (?attribute ?value))
117      (cons (make-instance 'attribute-equal-selector :arg attribute :value value)
118            (parse-selector &rest)))
119     (#T(regexp$ ("[" $name "^=" $name "]") (?attribute ?value))
120      (cons (make-instance 'attribute-starts-with-selector :arg attribute :value value)
121            (parse-selector &rest)))
122     ;; cyclic (An+B, n+B)
123     (#T(regexp$ (":nth-child(" \s* an+b \s* ")")
124                 (?asign ?a ?bsign ?b))
125      (cons (make-instance 'nth-child-selector
126                           :asign asign :a a
127                           :bsign bsign :b b)
128            (parse-selector &rest)))
129     (#T(regexp$ (":nth-last-child(" \s* an+b \s* ")")
130                 (?asign ?a ?bsign ?b))
131      (cons (make-instance 'nth-last-child-selector
132                           :asign asign :a a
133                           :bsign bsign :b b)
134            (parse-selector &rest)))
135     (#T(regexp$ (":nth-of-type(" \s* an+b \s* ")")
136                 (?asign ?a ?bsign ?b))
137      (cons (make-instance 'nth-of-type-selector
138                           :asign asign :a a
139                           :bsign bsign :b b)
140            (parse-selector &rest)))
141     (#T(regexp$ (":nth-last-of-type(" \s* an+b \s* ")")
142                 (?asign ?a ?bsign ?b))
143      (cons (make-instance 'nth-last-of-type-selector
144                           :asign asign :a a
145                           :bsign bsign :b b)
146            (parse-selector &rest)))
147     ;; absolute (B)
148     (#T(regexp$ (":nth-child(" \s* b \s* ")")
149                 (?b))
150      (cons (make-instance 'nth-child-selector :a 0 :b b)
151            (parse-selector &rest)))
152     (#T(regexp$ (":nth-last-child(" \s* b \s* ")")
153                 (?b))
154      (cons (make-instance 'nth-last-child-selector :a 0 :b b)
155            (parse-selector &rest)))
156     (#T(regexp$ (":nth-of-type(" \s* b \s* ")")
157                 (?b))
158      (cons (make-instance 'nth-of-type-selector :a 0 :b b)
159            (parse-selector &rest)))
160     (#T(regexp$ (":nth-last-of-type(" \s* b \s* ")")
161                 (?b))
162      (cons (make-instance 'nth-last-of-type-selector :a 0 :b b)
163            (parse-selector &rest)))
164     ;; named (odd, even)
165     (#T(regexp$ (":nth-child(" \s* odd/even \s* ")")
166                 (?which))
167      (cons (make-instance 'nth-child-selector :namedp t :b which)
168            (parse-selector &rest)))
169     (#T(regexp$ (":nth-last-child(" \s* odd/even \s* ")")
170                 (?which))
171      (cons (make-instance 'nth-last-child-selector :namedp t :b which)
172            (parse-selector &rest)))
173     (#T(regexp$ (":nth-of-type(" \s* odd/even \s* ")")
174                 (?which))
175      (cons (make-instance 'nth-of-type-selector :namedp t :b which)
176            (parse-selector &rest)))
177     (#T(regexp$ (":nth-last-of-type(" \s* odd/even \s* ")")
178                 (?which))
179      (cons (make-instance 'nth-last-of-type-selector :namedp t :b which)
180            (parse-selector &rest)))
181     ;; Everybody else
182     (#T(regexp$ (":first-child") ())
183      (cons (make-instance 'nth-child-selector :a 0 :b 1)
184            (parse-selector &rest)))
185     (#T(regexp$ (":last-child")  ())
186      (cons (make-instance 'nth-last-child-selector :a 0 :b 1)
187            (parse-selector &rest)))
188     (#T(regexp$ (":only-child")  ())
189       (list* (make-instance 'nth-child-selector      :a 0 :b 1)
190              (make-instance 'nth-last-child-selector :a 0 :b 1)
191              (parse-selector &rest)))
192     (#T(regexp$ (":first-of-type") ())
193      (cons (make-instance 'nth-of-type-selector :a 0 :b 1)
194            (parse-selector &rest)))
195     (#T(regexp$ (":last-of-type")  ())
196      (cons (make-instance 'nth-last-of-type-selector :a 0 :b 1)
197            (parse-selector &rest)))
198     (#T(regexp$ (":only-of-type")  ())
199       (list* (make-instance 'nth-of-type-selector      :a 0 :b 1)
200              (make-instance 'nth-last-of-type-selector :a 0 :b 1)
201              (parse-selector &rest)))
202     (#T(regexp$ (":empty")       ())
203       (cons (make-instance 'empty-selector)           (parse-selector &rest)))
204     (#T(regexp$ (#\# $name)      (?id))
205      (cons (make-instance 'id-selector :arg id)       (parse-selector &rest)))
206     (#T(regexp$ (#\. $name)      (?class))
207      (cons (make-instance 'class-selector :arg class) (parse-selector &rest)))
208     (#T(regexp$ ($name)          (?type))
209      (cons (make-instance 'type-selector :arg type)   (parse-selector &rest)))
210     (#T(regexp$ (#\*)            ())
211      (cons (make-instance 'universal-selector)        (parse-selector &rest)))
212     (t (unless (string= selector "")
213          (error "Unable to to parse selector: ~s" selector)))))
214 
215 (defun subjects-in-list (selector element-list)
216   (reduce #'nconc
217           (mapcar (curry #'subjects-of selector)
218                   element-list)))
219 
220 (defun subjects-of (selector element)
221   (nconc
222    (when (subject-p selector element) (list element))
223    (subjects-in-list selector (element-children element))))
224 
225 (defgeneric subject-p (selector element))
226 
227 (defmethod subject-p ((selector type-selector) element)
228   (element-type-equal element (selector-arg selector)))
229 
230 (defmethod subject-p ((selector id-selector) element)
231   (string= (element-id element) (selector-arg selector)))
232 
233 (defun an+b? (a b element siblings)
234   (when-let* ((pos (1+ (position element siblings :test #'eq))))
235     ;; pos = An + B
236     (cond
237       ;; pos = 0n + B
238       ((= 0 a) (= b pos))
239       ;; (pos - B)/A = n
240       (t (and (zerop (mod (- pos b) a))
241               (not (minusp (/ (- pos b) a))))))))
242 
243 (defmethod subject-p ((selector nth-child-selector) element)
244   (when-let* ((arg (selector-arg selector))
245               (parent (element-parent element)))
246     (an+b? (car arg) (cdr arg) element (element-children parent))))
247 
248 (defmethod subject-p ((selector nth-last-child-selector) element)
249   (when-let* ((arg (selector-arg selector))
250               (parent (element-parent element)))
251     (an+b? (car arg) (cdr arg) element (reverse (element-children parent)))))
252 
253 (defmethod subject-p ((selector nth-of-type-selector) element)
254   (when-let* ((arg (selector-arg selector))
255               (parent (element-parent element)))
256     (an+b? (car arg) (cdr arg) element
257            (remove-if-not (rcurry #'element-type-equal (element-type element))
258                           (element-children parent)))))
259 
260 (defmethod subject-p ((selector nth-last-of-type-selector) element)
261   (when-let* ((arg (selector-arg selector))
262               (parent (element-parent element)))
263     (an+b? (car arg) (cdr arg) element
264            (reverse
265             (remove-if-not (rcurry #'element-type-equal (element-type element))
266                            (element-children parent))))))
267 
268 (defmethod subject-p ((selector empty-selector) element)
269   (= 0 (length (element-children element))))
270 
271 (defmethod subject-p ((selector class-selector) element)
272   (member (selector-arg selector)
273           (element-classes element)
274           :test #'string=))
275 
276 (defmethod subject-p ((selector universal-selector) element)
277   (declare (ignore element selector))
278   t)
279 
280 (defmethod subject-p ((selector attribute-present-selector) element)
281   (element-attribute (selector-arg selector) element))
282 
283 (defmethod subject-p ((selector attribute-equal-selector) element)
284   (when-let* ((val (element-attribute (selector-arg selector) element)))
285     (string= val (attribute-value selector))))
286 
287 (defmethod subject-p ((selector attribute-starts-with-selector) element)
288   (when-let* ((val (element-attribute (selector-arg selector) element)))
289     (alexandria:starts-with-subseq (string-downcase (attribute-value selector)) (string-downcase val))))
290 
291 (defmethod subject-p ((selector %implicit-element-selector) element)
292   (eq element *implicit-element*))
293 
294 (defmethod subject-p ((selector list) element)
295   (every (rcurry #'subject-p element) selector))
296 
297 (defmethod subject-p ((selector child-combinator) element)
298   (subject-p (matcher selector) (element-parent element)))
299 
300 (defmethod subject-p ((selector descendant-combinator) element)
301   (some (curry #'subject-p (matcher selector)) (element-ancestors element)))
302 
303 (defmethod subject-p ((selector adjacent-combinator) element)
304   (let* ((parent (element-parent element))
305          (siblings (element-children parent))
306          (ourpos (position element siblings :test #'eq)))
307     (and ourpos
308          (> ourpos 0)
309          (subject-p (matcher selector) (elt siblings (1- ourpos))))))
310 
311 (defmethod subject-p ((selector sibling-combinator) element)
312   (let* ((parent (element-parent element))
313          (siblings (element-children parent))
314          (ourpos (position element siblings :test #'eq)))
315     (and ourpos
316          (> ourpos 0)
317          (find-if (curry #'subject-p (matcher selector)) siblings :end ourpos))))
318 
319 ;; Hello excessively long name
320 (defun terminating-implicit-sibling-combinator-p (selector)
321   (typecase selector
322     ((or sibling-combinator adjacent-combinator)
323      (typecase (matcher selector)
324        (%implicit-element-selector t)
325        (list (terminating-implicit-sibling-combinator-p (car (last (matcher selector)))))))
326     (combinator (terminating-implicit-sibling-combinator-p (matcher selector)))
327     (selector   nil)
328     (null       nil)
329     (list       (terminating-implicit-sibling-combinator-p (car (last selector))))
330     (t          nil)))