"lispier" regexps, l*last-child stuff
Mon Jan 4 05:59:48 UTC 2010 pix@kepibu.org
* "lispier" regexps, l*last-child stuff
Probably against best practices to commit monolithic patches, but this
is still an unreleased library, so I don't care.
Not really sure I care for the sexp-based regexps, but they do make it
easy to use the same regexp bits across several places, and I don't
have a lexer/parser handy, so they'll have to do for now.
diff -rN -u old-Oh, Ducks!/notes new-Oh, Ducks!/notes
--- old-Oh, Ducks!/notes 2013-07-04 18:59:12.000000000 +0000
+++ new-Oh, Ducks!/notes 2013-07-04 18:59:12.000000000 +0000
@@ -134,18 +134,24 @@
* [ ] selectors involving descendants
** write documentation
** improve selector support
-*** positional selectors [3/13]
- * [X] :nth-child(n)
- * [X] :nth-child(xn+y)
- * [ ] :nth-last-child
- * [ ] :nth-last-child(xn+y)
+*** positional selectors [9/19]
+ * [X] :nth-child(an+b)
+ * [X] :nth-child(b)
+ * [X] :nth-child(odd|even)
+ * [X] :nth-last-child(an+b)
+ * [X] :nth-last-child(b)
+ * [X] :nth-last-child(odd|even)
* [X] :first-child
- * [ ] :last-child
- * [ ] :nth-of-type
- * [ ] :nth-last-of-type
+ * [X] :last-child
+ * [ ] :nth-of-type(an+b)
+ * [ ] :nth-of-type(b)
+ * [ ] :nth-of-type(odd|even)
+ * [ ] :nth-last-of-type(an+b)
+ * [ ] :nth-last-of-type(b)
+ * [ ] :nth-last-of-type(odd|even)
* [ ] :first-of-type
* [ ] :last-of-type
- * [ ] :only-child
+ * [X] :only-child
* [ ] :only-of-type
* [ ] :empty
*** attribute selectors [0/7]
diff -rN -u old-Oh, Ducks!/regexp-template.lisp new-Oh, Ducks!/regexp-template.lisp
--- old-Oh, Ducks!/regexp-template.lisp 2013-07-04 18:59:12.000000000 +0000
+++ new-Oh, Ducks!/regexp-template.lisp 2013-07-04 18:59:12.000000000 +0000
@@ -31,11 +31,18 @@
(declare (ignore re-kwd))
(make-instance 'unify::regular-expression-template
:spec (list* 'unify::regexp
- (concatenate 'string "^(.*?)" regexp "$")
+ (cond
+ ((stringp regexp)
+ (concatenate 'string "^(.*?)" regexp "$"))
+ ((listp regexp)
+ `(:sequence :start-anchor
+ (:register (:non-greedy-repetition 0 nil :everything))
+ ,@regexp
+ :end-anchor))
+ (t (error "Unknown regexp format.")))
(append '(?&rest) vars)
keys))))
-
;; (match (#t(regexp+ "^f(o+)" (?o)) "fooooooobar") (values o &rest))
;; => "ooooooo", "bar"
diff -rN -u old-Oh, Ducks!/selectors.lisp new-Oh, Ducks!/selectors.lisp
--- old-Oh, Ducks!/selectors.lisp 2013-07-04 18:59:12.000000000 +0000
+++ new-Oh, Ducks!/selectors.lisp 2013-07-04 18:59:12.000000000 +0000
@@ -39,6 +39,16 @@
(defclass nth-child-selector (simple-selector) ())
(defclass nth-last-child-selector (nth-child-selector) ())
+(defmethod initialize-instance :after ((selector nth-child-selector)
+ &key (asign "+") a
+ (bsign "+") b
+ namedp)
+ (setf (slot-value selector 'arg)
+ (if namedp
+ (cons 2 (if (string-equal "odd" b) 1 0))
+ (cons (parse-integer (format nil "~a~a" asign (or a 1)))
+ (parse-integer (format nil "~a~a" bsign (or b 0)))))))
+
(defmethod print-object ((selector universal-selector) stream)
(format stream "#<universal-selector>"))
@@ -53,52 +63,96 @@
(defmethod print-object ((selector %implicit-element-selector) stream)
(print-unreadable-object (selector stream :type t)))
+(cl-ppcre:define-parse-tree-synonym \s*
+ (:non-greedy-repetition 0 nil :whitespace-char-class))
+(cl-ppcre:define-parse-tree-synonym \s+
+ (:greedy-repetition 1 nil :whitespace-char-class))
+(cl-ppcre:define-parse-tree-synonym sign
+ (:char-class #\+ #\-))
+(cl-ppcre:define-parse-tree-synonym sign?
+ (:greedy-repetition 0 1 sign))
+(cl-ppcre:define-parse-tree-synonym integer
+ (:greedy-repetition 1 nil :digit-class))
+(cl-ppcre:define-parse-tree-synonym name
+ (:greedy-repetition 1 nil (:char-class :word-char-class #\-)))
+(cl-ppcre:define-parse-tree-synonym $name
+ (:register name))
+(cl-ppcre:define-parse-tree-synonym an+b
+ (:sequence
+ (:register sign?) (:greedy-repetition 0 1 (:register integer))
+ #\n \s*
+ (:register sign?) \s* (:greedy-repetition 0 1 (:register integer))))
+(cl-ppcre:define-parse-tree-synonym b
+ (:register (:sequence sign? integer)))
+(cl-ppcre:define-parse-tree-synonym odd/even
+ (:register (:alternation "odd" "even")))
+
;; FIXME: proper parsing (e.g., by using the W3C's provided FLEX and YACC bits).
(defun parse-selector (selector)
(match-case (selector)
;; combinators
- (#T(regexp$ "[ ]*[~][ ]*" ())
+ (#T(regexp$ (\s* #\~ \s*) ())
(list (make-instance 'sibling-combinator :matcher (or (parse-selector &rest) %implicit-element-selector))))
- (#T(regexp$ "[ ]*[+][ ]*" ())
+ (#T(regexp$ (\s* #\+ \s*) ())
(list (make-instance 'adjacent-combinator :matcher (or (parse-selector &rest) %implicit-element-selector))))
- (#T(regexp$ "[ ]*[>][ ]*" ())
+ (#T(regexp$ (\s* #\> \s*) ())
(list (make-instance 'child-combinator :matcher (or (parse-selector &rest) %implicit-element-selector))))
- (#T(regexp$ "[ ]+" ())
+ (#T(regexp$ (\s+) ())
(list (make-instance 'descendant-combinator :matcher (or (parse-selector &rest) %implicit-element-selector))))
;; simple selectors
;; cyclic (An+B, n+B)
- (#T(regexp$ ":nth-child\\([ ]*([+-]?)([0-9]+)?n[ ]*([+-])[ ]*([0-9]+)?[ ]*\\)" (?asign ?a ?bsign ?b))
+ (#T(regexp$ (":nth-child(" \s* an+b \s* ")")
+ (?asign ?a ?bsign ?b))
(cons (make-instance 'nth-child-selector
- :arg (cons (funcall (if (string= "-" asign) #'- #'+)
- (if (stringp a) (parse-integer a) 1))
- (funcall (if (string= "-" bsign) #'- #'+)
- (if (stringp b) (parse-integer b) 0))))
+ :asign asign :a a
+ :bsign bsign :b b)
+ (parse-selector &rest)))
+ (#T(regexp$ (":nth-last-child(" \s* an+b \s* ")")
+ (?asign ?a ?bsign ?b))
+ (cons (make-instance 'nth-last-child-selector
+ :asign asign :a a
+ :bsign bsign :b b)
(parse-selector &rest)))
;; absolute (B)
- (#T(regexp$ ":nth-child\\([ ]*([+-]?[0-9]+)[ ]*\\)" (?b))
- (cons (make-instance 'nth-child-selector :arg (cons 0 (parse-integer b))) (parse-selector &rest)))
+ (#T(regexp$ (":nth-child(" \s* b \s* ")")
+ (?b))
+ (cons (make-instance 'nth-child-selector :a 0 :b b)
+ (parse-selector &rest)))
+ (#T(regexp$ (":nth-last-child(" \s* b \s* ")")
+ (?b))
+ (cons (make-instance 'nth-last-child-selector :a 0 :b b)
+ (parse-selector &rest)))
;; named (odd, even)
- (#T(regexp$ ":nth-child\\([ ]*(odd|even)[ ]*\\)" (?which))
- (cons (make-instance 'nth-child-selector :arg (cons 2 (if (string-equal "odd" which) 1 0)))
+ (#T(regexp$ (":nth-child(" \s* odd/even \s* ")")
+ (?which))
+ (cons (make-instance 'nth-child-selector :namedp t :b which)
(parse-selector &rest)))
- (#T(regexp$ ":first-child" ())
- (cons (make-instance 'nth-child-selector :arg (cons 0 1)) (parse-selector &rest)))
- (#T(regexp$ "[#](\\w+)" (?id))
+ (#T(regexp$ (":nth-last-child(" \s* odd/even \s* ")")
+ (?which))
+ (cons (make-instance 'nth-last-child-selector :namedp t :b which)
+ (parse-selector &rest)))
+ ;; Everybody else
+ (#T(regexp$ (":first-child") ())
+ (cons (make-instance 'nth-child-selector :a 0 :b 1)
+ (parse-selector &rest)))
+ (#T(regexp$ (":last-child") ())
+ (cons (make-instance 'nth-last-child-selector :a 0 :b 1)
+ (parse-selector &rest)))
+ (#T(regexp$ (":only-child") ())
+ (list* (make-instance 'nth-child-selector :a 0 :b 1)
+ (make-instance 'nth-last-child-selector :a 0 :b 1)
+ (parse-selector &rest)))
+ (#T(regexp$ (#\# $name) (?id))
(cons (make-instance 'id-selector :arg id) (parse-selector &rest)))
- (#T(regexp$ "[\\.](\\w+)" (?class))
+ (#T(regexp$ (#\. $name) (?class))
(cons (make-instance 'class-selector :arg class) (parse-selector &rest)))
- (#T(regexp$ "(\\w+)" (?type))
+ (#T(regexp$ ($name) (?type))
(cons (make-instance 'type-selector :arg type) (parse-selector &rest)))
- (#T(regexp$ "\\*" ())
+ (#T(regexp$ (#\*) ())
(cons (make-instance 'universal-selector) (parse-selector &rest)))
(t (unless (string= selector "")
(error "Unable to to parse selector: ~s" selector)))))
-;; Hrm... would something like this make things more or less clear?
-;#t(lex$ (":nth-child(" :s? (?a :int) "n" :s? (or #\+ #\-) :s? (?b :int) :s? ")"))
-;#t(lex$ ("#" (?id :identifier)))
-;#t(lex$ (?type :identifier))
-
(defun subjects-in-list (selector element-list)
(reduce #'nconc
(mapcar (curry #'subjects-of selector)
@@ -106,7 +160,7 @@
(defun subjects-of (selector element)
(nconc
- (when (subject-p element selector) (list element))
+ (when (subject-p selector element) (list element))
(subjects-in-list selector (element-children element))))
(defgeneric subject-p (selector element))
@@ -117,27 +171,30 @@
(defmethod subject-p ((selector id-selector) element)
(string= (element-id element) (selector-arg selector)))
+(defun an+b? (a b element siblings)
+ (when-let* ((pos (1+ (position element siblings :test #'eq))))
+ ;; pos = An + B
+ (cond
+ ;; pos = 0n + B
+ ((= 0 a) (= b pos))
+ ;; (pos - B)/A = n
+ (t (and (zerop (mod (- pos b) a))
+ (not (minusp (/ (- pos b) a))))))))
+
(defmethod subject-p ((selector nth-child-selector) element)
- (when-let* ((parent (element-parent element))
- (pos (position element (funcall (typecase selector
- (nth-last-child-selector #'reverse)
- (nth-child-selector #'identity))
- (element-children parent)) :test #'eq)))
- (let ((pos (1+ pos))
- (a (car (selector-arg selector)))
- (b (cdr (selector-arg selector))))
- ;; pos = An + B
- (cond
- ;; pos = 0n + B
- ((= 0 a) (= b pos))
- ;; (pos - B)/A = n
- (t (and (zerop (mod (- pos b) a))
- (not (minusp (/ (- pos b) a)))))))))
+ (when-let* ((arg (selector-arg selector))
+ (parent (element-parent element)))
+ (an+b? (car arg) (cdr arg) element (element-children parent))))
+
+(defmethod subject-p ((selector nth-last-child-selector) element)
+ (when-let* ((arg (selector-arg selector))
+ (parent (element-parent element)))
+ (an+b? (car arg) (cdr arg) element (reverse (element-children parent)))))
(defmethod subject-p ((selector class-selector) element)
(member (selector-arg selector)
- (element-classes element)
- :test #'string=))
+ (element-classes element)
+ :test #'string=))
(defmethod subject-p ((selector universal-selector) element)
(declare (ignore element selector))
diff -rN -u old-Oh, Ducks!/tests.lisp new-Oh, Ducks!/tests.lisp
--- old-Oh, Ducks!/tests.lisp 2013-07-04 18:59:12.000000000 +0000
+++ new-Oh, Ducks!/tests.lisp 2013-07-04 18:59:12.000000000 +0000
@@ -53,6 +53,17 @@
"<div>I do <i>not</i> <i>like</i> cheese.</div><div><span>I like <i>cheese</i>.</span></div>")
(values div i))
+(match (#T(html (:model dom)
+ ("div:nth-last-child(1)" . ?div)
+ ("div:last-child" . ?d2))
+ "<div>I do <i>not</i> <i>like</i> cheese.</div><div><span>I like <i>cheese</i>.</span></div>")
+ (values div d2))
+
+(match (#T(html (:model dom)
+ ("i:only-child" . ?i))
+ "<div>I do <i>not</i> <i>like</i> cheese.</div><div><span>I like <i>cheese</i>.</span></div>")
+ (values i))
+
;; throws 'unification-failure
(match (#T(html (:model dom)
("q" . ?div))