Mon Jan 4 05:59:48 UTC 2010 pix@kepibu.org * "lispier" regexps, l*last-child stuff Probably against best practices to commit monolithic patches, but this is still an unreleased library, so I don't care. Not really sure I care for the sexp-based regexps, but they do make it easy to use the same regexp bits across several places, and I don't have a lexer/parser handy, so they'll have to do for now. diff -rN -u old-Oh, Ducks!/notes new-Oh, Ducks!/notes --- old-Oh, Ducks!/notes 2015-10-18 17:29:08.000000000 +0000 +++ new-Oh, Ducks!/notes 2015-10-18 17:29:08.000000000 +0000 @@ -134,18 +134,24 @@ * [ ] selectors involving descendants ** write documentation ** improve selector support -*** positional selectors [3/13] - * [X] :nth-child(n) - * [X] :nth-child(xn+y) - * [ ] :nth-last-child - * [ ] :nth-last-child(xn+y) +*** positional selectors [9/19] + * [X] :nth-child(an+b) + * [X] :nth-child(b) + * [X] :nth-child(odd|even) + * [X] :nth-last-child(an+b) + * [X] :nth-last-child(b) + * [X] :nth-last-child(odd|even) * [X] :first-child - * [ ] :last-child - * [ ] :nth-of-type - * [ ] :nth-last-of-type + * [X] :last-child + * [ ] :nth-of-type(an+b) + * [ ] :nth-of-type(b) + * [ ] :nth-of-type(odd|even) + * [ ] :nth-last-of-type(an+b) + * [ ] :nth-last-of-type(b) + * [ ] :nth-last-of-type(odd|even) * [ ] :first-of-type * [ ] :last-of-type - * [ ] :only-child + * [X] :only-child * [ ] :only-of-type * [ ] :empty *** attribute selectors [0/7] diff -rN -u old-Oh, Ducks!/regexp-template.lisp new-Oh, Ducks!/regexp-template.lisp --- old-Oh, Ducks!/regexp-template.lisp 2015-10-18 17:29:08.000000000 +0000 +++ new-Oh, Ducks!/regexp-template.lisp 2015-10-18 17:29:08.000000000 +0000 @@ -31,11 +31,18 @@ (declare (ignore re-kwd)) (make-instance 'unify::regular-expression-template :spec (list* 'unify::regexp - (concatenate 'string "^(.*?)" regexp "$") + (cond + ((stringp regexp) + (concatenate 'string "^(.*?)" regexp "$")) + ((listp regexp) + `(:sequence :start-anchor + (:register (:non-greedy-repetition 0 nil :everything)) + ,@regexp + :end-anchor)) + (t (error "Unknown regexp format."))) (append '(?&rest) vars) keys)))) - ;; (match (#t(regexp+ "^f(o+)" (?o)) "fooooooobar") (values o &rest)) ;; => "ooooooo", "bar" diff -rN -u old-Oh, Ducks!/selectors.lisp new-Oh, Ducks!/selectors.lisp --- old-Oh, Ducks!/selectors.lisp 2015-10-18 17:29:08.000000000 +0000 +++ new-Oh, Ducks!/selectors.lisp 2015-10-18 17:29:08.000000000 +0000 @@ -39,6 +39,16 @@ (defclass nth-child-selector (simple-selector) ()) (defclass nth-last-child-selector (nth-child-selector) ()) +(defmethod initialize-instance :after ((selector nth-child-selector) + &key (asign "+") a + (bsign "+") b + namedp) + (setf (slot-value selector 'arg) + (if namedp + (cons 2 (if (string-equal "odd" b) 1 0)) + (cons (parse-integer (format nil "~a~a" asign (or a 1))) + (parse-integer (format nil "~a~a" bsign (or b 0))))))) + (defmethod print-object ((selector universal-selector) stream) (format stream "#")) @@ -53,52 +63,96 @@ (defmethod print-object ((selector %implicit-element-selector) stream) (print-unreadable-object (selector stream :type t))) +(cl-ppcre:define-parse-tree-synonym \s* + (:non-greedy-repetition 0 nil :whitespace-char-class)) +(cl-ppcre:define-parse-tree-synonym \s+ + (:greedy-repetition 1 nil :whitespace-char-class)) +(cl-ppcre:define-parse-tree-synonym sign + (:char-class #\+ #\-)) +(cl-ppcre:define-parse-tree-synonym sign? + (:greedy-repetition 0 1 sign)) +(cl-ppcre:define-parse-tree-synonym integer + (:greedy-repetition 1 nil :digit-class)) +(cl-ppcre:define-parse-tree-synonym name + (:greedy-repetition 1 nil (:char-class :word-char-class #\-))) +(cl-ppcre:define-parse-tree-synonym $name + (:register name)) +(cl-ppcre:define-parse-tree-synonym an+b + (:sequence + (:register sign?) (:greedy-repetition 0 1 (:register integer)) + #\n \s* + (:register sign?) \s* (:greedy-repetition 0 1 (:register integer)))) +(cl-ppcre:define-parse-tree-synonym b + (:register (:sequence sign? integer))) +(cl-ppcre:define-parse-tree-synonym odd/even + (:register (:alternation "odd" "even"))) + ;; FIXME: proper parsing (e.g., by using the W3C's provided FLEX and YACC bits). (defun parse-selector (selector) (match-case (selector) ;; combinators - (#T(regexp$ "[ ]*[~][ ]*" ()) + (#T(regexp$ (\s* #\~ \s*) ()) (list (make-instance 'sibling-combinator :matcher (or (parse-selector &rest) %implicit-element-selector)))) - (#T(regexp$ "[ ]*[+][ ]*" ()) + (#T(regexp$ (\s* #\+ \s*) ()) (list (make-instance 'adjacent-combinator :matcher (or (parse-selector &rest) %implicit-element-selector)))) - (#T(regexp$ "[ ]*[>][ ]*" ()) + (#T(regexp$ (\s* #\> \s*) ()) (list (make-instance 'child-combinator :matcher (or (parse-selector &rest) %implicit-element-selector)))) - (#T(regexp$ "[ ]+" ()) + (#T(regexp$ (\s+) ()) (list (make-instance 'descendant-combinator :matcher (or (parse-selector &rest) %implicit-element-selector)))) ;; simple selectors ;; cyclic (An+B, n+B) - (#T(regexp$ ":nth-child\\([ ]*([+-]?)([0-9]+)?n[ ]*([+-])[ ]*([0-9]+)?[ ]*\\)" (?asign ?a ?bsign ?b)) + (#T(regexp$ (":nth-child(" \s* an+b \s* ")") + (?asign ?a ?bsign ?b)) (cons (make-instance 'nth-child-selector - :arg (cons (funcall (if (string= "-" asign) #'- #'+) - (if (stringp a) (parse-integer a) 1)) - (funcall (if (string= "-" bsign) #'- #'+) - (if (stringp b) (parse-integer b) 0)))) + :asign asign :a a + :bsign bsign :b b) + (parse-selector &rest))) + (#T(regexp$ (":nth-last-child(" \s* an+b \s* ")") + (?asign ?a ?bsign ?b)) + (cons (make-instance 'nth-last-child-selector + :asign asign :a a + :bsign bsign :b b) (parse-selector &rest))) ;; absolute (B) - (#T(regexp$ ":nth-child\\([ ]*([+-]?[0-9]+)[ ]*\\)" (?b)) - (cons (make-instance 'nth-child-selector :arg (cons 0 (parse-integer b))) (parse-selector &rest))) + (#T(regexp$ (":nth-child(" \s* b \s* ")") + (?b)) + (cons (make-instance 'nth-child-selector :a 0 :b b) + (parse-selector &rest))) + (#T(regexp$ (":nth-last-child(" \s* b \s* ")") + (?b)) + (cons (make-instance 'nth-last-child-selector :a 0 :b b) + (parse-selector &rest))) ;; named (odd, even) - (#T(regexp$ ":nth-child\\([ ]*(odd|even)[ ]*\\)" (?which)) - (cons (make-instance 'nth-child-selector :arg (cons 2 (if (string-equal "odd" which) 1 0))) + (#T(regexp$ (":nth-child(" \s* odd/even \s* ")") + (?which)) + (cons (make-instance 'nth-child-selector :namedp t :b which) (parse-selector &rest))) - (#T(regexp$ ":first-child" ()) - (cons (make-instance 'nth-child-selector :arg (cons 0 1)) (parse-selector &rest))) - (#T(regexp$ "[#](\\w+)" (?id)) + (#T(regexp$ (":nth-last-child(" \s* odd/even \s* ")") + (?which)) + (cons (make-instance 'nth-last-child-selector :namedp t :b which) + (parse-selector &rest))) + ;; Everybody else + (#T(regexp$ (":first-child") ()) + (cons (make-instance 'nth-child-selector :a 0 :b 1) + (parse-selector &rest))) + (#T(regexp$ (":last-child") ()) + (cons (make-instance 'nth-last-child-selector :a 0 :b 1) + (parse-selector &rest))) + (#T(regexp$ (":only-child") ()) + (list* (make-instance 'nth-child-selector :a 0 :b 1) + (make-instance 'nth-last-child-selector :a 0 :b 1) + (parse-selector &rest))) + (#T(regexp$ (#\# $name) (?id)) (cons (make-instance 'id-selector :arg id) (parse-selector &rest))) - (#T(regexp$ "[\\.](\\w+)" (?class)) + (#T(regexp$ (#\. $name) (?class)) (cons (make-instance 'class-selector :arg class) (parse-selector &rest))) - (#T(regexp$ "(\\w+)" (?type)) + (#T(regexp$ ($name) (?type)) (cons (make-instance 'type-selector :arg type) (parse-selector &rest))) - (#T(regexp$ "\\*" ()) + (#T(regexp$ (#\*) ()) (cons (make-instance 'universal-selector) (parse-selector &rest))) (t (unless (string= selector "") (error "Unable to to parse selector: ~s" selector))))) -;; Hrm... would something like this make things more or less clear? -;#t(lex$ (":nth-child(" :s? (?a :int) "n" :s? (or #\+ #\-) :s? (?b :int) :s? ")")) -;#t(lex$ ("#" (?id :identifier))) -;#t(lex$ (?type :identifier)) - (defun subjects-in-list (selector element-list) (reduce #'nconc (mapcar (curry #'subjects-of selector) @@ -106,7 +160,7 @@ (defun subjects-of (selector element) (nconc - (when (subject-p element selector) (list element)) + (when (subject-p selector element) (list element)) (subjects-in-list selector (element-children element)))) (defgeneric subject-p (selector element)) @@ -117,27 +171,30 @@ (defmethod subject-p ((selector id-selector) element) (string= (element-id element) (selector-arg selector))) +(defun an+b? (a b element siblings) + (when-let* ((pos (1+ (position element siblings :test #'eq)))) + ;; pos = An + B + (cond + ;; pos = 0n + B + ((= 0 a) (= b pos)) + ;; (pos - B)/A = n + (t (and (zerop (mod (- pos b) a)) + (not (minusp (/ (- pos b) a)))))))) + (defmethod subject-p ((selector nth-child-selector) element) - (when-let* ((parent (element-parent element)) - (pos (position element (funcall (typecase selector - (nth-last-child-selector #'reverse) - (nth-child-selector #'identity)) - (element-children parent)) :test #'eq))) - (let ((pos (1+ pos)) - (a (car (selector-arg selector))) - (b (cdr (selector-arg selector)))) - ;; pos = An + B - (cond - ;; pos = 0n + B - ((= 0 a) (= b pos)) - ;; (pos - B)/A = n - (t (and (zerop (mod (- pos b) a)) - (not (minusp (/ (- pos b) a))))))))) + (when-let* ((arg (selector-arg selector)) + (parent (element-parent element))) + (an+b? (car arg) (cdr arg) element (element-children parent)))) + +(defmethod subject-p ((selector nth-last-child-selector) element) + (when-let* ((arg (selector-arg selector)) + (parent (element-parent element))) + (an+b? (car arg) (cdr arg) element (reverse (element-children parent))))) (defmethod subject-p ((selector class-selector) element) (member (selector-arg selector) - (element-classes element) - :test #'string=)) + (element-classes element) + :test #'string=)) (defmethod subject-p ((selector universal-selector) element) (declare (ignore element selector)) diff -rN -u old-Oh, Ducks!/tests.lisp new-Oh, Ducks!/tests.lisp --- old-Oh, Ducks!/tests.lisp 2015-10-18 17:29:08.000000000 +0000 +++ new-Oh, Ducks!/tests.lisp 2015-10-18 17:29:08.000000000 +0000 @@ -53,6 +53,17 @@ "
I do not like cheese.
I like cheese.
") (values div i)) +(match (#T(html (:model dom) + ("div:nth-last-child(1)" . ?div) + ("div:last-child" . ?d2)) + "
I do not like cheese.
I like cheese.
") + (values div d2)) + +(match (#T(html (:model dom) + ("i:only-child" . ?i)) + "
I do not like cheese.
I like cheese.
") + (values i)) + ;; throws 'unification-failure (match (#T(html (:model dom) ("q" . ?div))