Sun Nov 15 14:25:29 UTC 2009 pix@kepibu.org * Status commit Move the CSS-selector-specific stuff into its own file. Still don't have it working, of course. Bleh. diff -rN -u old-Oh, Ducks!/css-selector-unifier.asd new-Oh, Ducks!/css-selector-unifier.asd --- old-Oh, Ducks!/css-selector-unifier.asd 2018-11-21 04:59:29.000000000 +0000 +++ new-Oh, Ducks!/css-selector-unifier.asd 2018-11-21 04:59:29.000000000 +0000 @@ -22,4 +22,5 @@ ((:file "interface") (:file "lhtml" :depends-on ("interface")) (:file "pt" :depends-on ("interface")))) + (:file "selectors") (:file "unification-templates"))) diff -rN -u old-Oh, Ducks!/regexp-template.lisp new-Oh, Ducks!/regexp-template.lisp --- old-Oh, Ducks!/regexp-template.lisp 2018-11-21 04:59:29.000000000 +0000 +++ new-Oh, Ducks!/regexp-template.lisp 2018-11-21 04:59:29.000000000 +0000 @@ -13,6 +13,29 @@ (append vars '(?&rest)) keys)))) +;; for parsing front-to-back +(defmethod make-template ((kind (eql 'regexp^)) (spec cons)) + (destructuring-bind (re-kwd regexp &optional vars &rest keys) + spec + (declare (ignore re-kwd)) + (make-instance 'unify::regular-expression-template + :spec (list* 'unify::regexp + (concatenate 'string "^" regexp "(.*)$") + (append vars '(?&rest)) + keys)))) + +;; For parsing back-to-front +(defmethod make-template ((kind (eql 'regexp$)) (spec cons)) + (destructuring-bind (re-kwd regexp &optional vars &rest keys) + spec + (declare (ignore re-kwd)) + (make-instance 'unify::regular-expression-template + :spec (list* 'unify::regexp + (concatenate 'string "^(.*)" regexp "$") + (append '(?&rest) vars) + keys)))) + + ;; (match (#t(regexp+ "^f(o+)" (?o)) "fooooooobar") (values o &rest)) ;; => "ooooooo", "bar" diff -rN -u old-Oh, Ducks!/selectors.lisp new-Oh, Ducks!/selectors.lisp --- old-Oh, Ducks!/selectors.lisp 1970-01-01 00:00:00.000000000 +0000 +++ new-Oh, Ducks!/selectors.lisp 2018-11-21 04:59:29.000000000 +0000 @@ -0,0 +1,170 @@ +#|| +Okay, here's how I figure selectors should work: +* breadth-first traversal through the document +* collect nodes (elements) which match the selector(s) + +Matching selectors: +- The original plan was to start with the first selector in our + list and work our way into the document. +- Another plan might be to start with the last selector in our + list and work our way up the document tree. +- Yet another option would be to utilize the recursive structure + of the document in our search, keeping track of which nodes + match which selectors as we traverse into the document. + Though, by that description, I'm not sure I'm clever enough to + actually make it work. +We have to work our way through the entire document structure +anyway, which means starting from the outside and working our way +in won't gain us any efficiency, as I had originally thought. + +For example, given a structure of + (html + (body + (p ((class "foo")) "text") + (p () (span ((class "bar")) "more text")))) +and a selector of + html p>span.bar +we would walk the document tree asking first + "Does this element have class 'bar'?" +and only if that is true, continuing to ask + "Is this a 'span' element?" + "Is this element a child of a 'p' element?" + "Is that 'p' element a descendant of an 'html' element?" + +I note, however, that a fully-reversed ordering should not be strictly +necessary--we really only need reverse at the combinators. So we +could also ask: + "Is this a 'span' element?" + "Is it of the 'bar' class?" + "Is it a child of a 'p' element?" + "Is that 'p' element a descendant of an 'html' element?" + +Hrm... how does ScrAPI do this? Or any of the other projects which +offer element selection by CSS selector? +||# +(in-package #:css-selector-unifier) + +#.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|) + +(defclass selector (unify::string-template) + ((matcher :reader matcher :initarg :matcher))) + +(defgeneric selector-p (object) + (:method ((ob selector)) t) + (:method ((ob t)) nil)) + +(defclass simple-selector (simple-selector) + ((arg :reader selector-arg :initarg :arg))) + +(defmethod print-object ((selector simple-selector) stream) + (format stream "#" (selector-arg selector))) + +(defclass combinator (selector) ()) +(defclass child-combinator (combinator) ()) +(defclass descendant-combinator (combinator) ()) +(defclass adjacent-combinator (combinator) ()) +(defclass sibling-combinator (combinator) ()) + +(defclass type-selector (simple-selector) ()) +(defclass id-selector (simple-selector) ()) +(defclass class-selector (simple-selector) ()) + +(defmethod initialize-instance :after ((template selector) &key) + (unless (slot-boundp template 'matcher) + (let ((selector (template-spec template))) + (setf (slot-value template 'matcher) (parse-selector (string-trim " " selector)))))) + +;; forwards +(defun parse-selector (selector) + (match-case (selector) + ;; combinators + (#T(regexp+ "^[ ]*[>][ ]*" ()) (list (make-instance 'child-combinator :matcher (parse-selector &rest)))) + (#T(regexp+ "^[ ]+" ()) (list (make-instance 'descendant-combinator :matcher (parse-selector &rest)))) + ;; simple selector + (#T(regexp+ "^(\\w+)" (?type)) (cons (make-instance 'type-selector :arg type) (parse-selector &rest))) + (#T(regexp+ "^[#](\\w+)" (?id)) (cons (make-instance 'id-selector :arg id) (parse-selector &rest))) + (#T(regexp+ "^[\\.](\\w+)" (?class)) (cons (make-instance 'class-selector :arg class) (parse-selector &rest))))) + +;; backwards +#+(or) +(defun parse-selector (selector) + (match-case (selector) + ;; combinators + (#T(regexp$ "[ ]*[>][ ]*" ()) (list (make-instance 'child-combinator :matcher (parse-selector &rest)))) + (#T(regexp$ "[ ]+" ()) (list (make-instance 'descendant-combinator :matcher (parse-selector &rest)))) + ;; simple selector + (#T(regexp$ "(\\w+)" (?type)) (cons (make-instance 'type-selector :arg type) (parse-selector &rest))) + (#T(regexp$ "[#](\\w+)" (?id)) (cons (make-instance 'id-selector :arg id) (parse-selector &rest))) + (#T(regexp$ "[\\.](\\w+)" (?class)) (cons (make-instance 'class-selector :arg class) (parse-selector &rest))))) + + +;; FIXME: the find/matches split seems to be causing me some mental +;; trouble. I'm not sure how to handle combinators now. (Not that I +;; was doing very well with them before.) +;; +;; Should probably map this out on a whiteboard. I'm not doing very +;; well with just trying to hack it. + +(defgeneric find-matching-elements (selector elements)) + +(defmethod find-matching-elements ((selector simple-selector) (elements list)) + (nconc + (remove-if-not (lambda (el) (element-matches-p selector el)) elements) + (remove-if #'null + (mapcar (lambda (element) (find-matching-elements selector (element-children element))) + elements))) + #+(or) + (loop :for element :in elements + :when (element-matches-p selector element) + :collect element)) + +#+(or) ;; hrm...this doesn't seem right... doesn't handle combinators! +(defmethod find-matching-elements ((selector list) (elements list)) + (flet ((all-selectors-match (element) + (every (lambda (s) (element-matches-p s element)) selector))) + (remove-if-not #'all-selectors-match elements)) + #+(or) + (loop :for element :in elements + :when (every (lambda (s) (element-matches-p m element)) selector) + :collect element)) + +(defvar *parent*) +;; if *parent* is equal to element-parent, we've found a child element. +;; But, uh, who sets *parent*? +(defmethod find-matching-elements ((selector child-combinator) (elements list)) + ) + +(defvar *ancestor*) + + +(defgeneric element-matches-p (selector element)) + +(defmethod element-matches-p ((selector selector) (element t)) + nil) + +(defmethod element-matches-p ((selector type-selector) element) + (element-type-equal element (selector-arg selector))) + +(defmethod element-matches-p ((selector id-selector) element) + (string= (element-id element) (selector-arg selector))) + +(defmethod element-matches-p ((selector class-selector) element) + (member (selector-arg selector) + (element-classes element) + :test #'string=)) + +(defmethod element-matches-p ((selector child-combinator) element) + (plusp (length (find-matching-elements selector (element-children element))))) + +#+TODO +(defmethod element-matches-p ((selector descendant-combinator) element) + (flet ((all-match (element) (every (lambda (m) (element-matches-p m element)) (matcher selector)))) + #+nil + (when (all-match element) element) + (let ((elements (cddr element))) + (format t "el: ~s~%" elements) + (case (selector-arg selector) + (:direct (loop :for element :in elements + :when (all-match element) + :collect element)) + (t (css-collect-elements selector elements)))))) diff -rN -u old-Oh, Ducks!/unification-templates.lisp new-Oh, Ducks!/unification-templates.lisp --- old-Oh, Ducks!/unification-templates.lisp 2018-11-21 04:59:29.000000000 +0000 +++ new-Oh, Ducks!/unification-templates.lisp 2018-11-21 04:59:29.000000000 +0000 @@ -1,7 +1,5 @@ (in-package #:css-selector-unifier) -#.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|) - (defclass css-selector-template (unify::expression-template) (#+(or) (parser :reader parser) ;; subtype determines parser @@ -61,109 +59,3 @@ &optional (env (make-empty-environment)) &key &allow-other-keys) (unify template (chtml:parse document (chtml:make-lhtml-builder)) env)) - - - -;; FIXME: rename to selector -(defclass css-specifier (unify::string-template) - ((matcher :reader matcher :initarg :matcher))) - -(defgeneric specifier-p (object) - (:method ((ob css-specifier)) t) - (:method ((ob t)) nil)) - -;; FIXME: rename to simple-selector -(defclass css-selector (unify::template) - ((arg :reader selector-arg :initarg :arg))) - -#|| TODO - (defclass combinator (selector) ()) - (defclass child-combinator (combinator) ()) - (defclass descendant-combinator (combinator) ()) - (defclass adjacent-combinator (combinator) ()) - (defclass sibling-combinator (combinator) ()) -||# - -(defmethod print-object ((selector css-selector) stream) - (format stream "#" (selector-arg selector))) - -(defclass css-type-selector (css-selector) ()) -(defclass css-id-selector (css-selector) ()) -(defclass css-class-selector (css-selector) ()) - -(defclass css-descendant-selector (css-specifier css-selector) ()) - -(defmethod initialize-instance :after ((template css-specifier) &key) - (unless (slot-boundp template 'matcher) - (let ((specifier (template-spec template))) - (setf (slot-value template 'matcher) (parse-css-specifier (string-trim " " specifier)))))) - -(defun parse-css-specifier (specifier) - (match-case (specifier) - ;; combinators - (#T(regexp+ "^[ ]*[>][ ]*" ()) (list (make-instance 'css-descendant-selector :arg :direct :matcher (parse-css-specifier &rest)))) - (#T(regexp+ "^[ ]+" ()) (list (make-instance 'css-descendant-selector :arg t :matcher (parse-css-specifier &rest)))) - ;; simple selector sequence - (#T(regexp+ "^(\\w+)" (?type)) (cons (make-instance 'css-type-selector :arg type) (parse-css-specifier &rest))) - (#T(regexp+ "^[#](\\w+)" (?id)) (cons (make-instance 'css-id-selector :arg id) (parse-css-specifier &rest))) - (#T(regexp+ "^[\\.](\\w+)" (?class)) (cons (make-instance 'css-class-selector :arg class) (parse-css-specifier &rest))))) - -;; FIXME: rename to select-subject? -;; FIXME: make css-select methods use functions defined in traversal/implementation.lisp -(defgeneric css-select (specifier document)) - -;; FIXME?: move to within (css-select css-specifier cons) -;; FIXME: Should this really be this ugly? -(defun css-collect-elements (selector elements) - (flet ((last-matcher () (car (last (matcher selector)))) - (all-match (element) (every (lambda (m) (css-select m element)) (matcher selector)))) - (loop :for element :in elements - :when (all-match element) - :if (specifier-p (last-matcher)) - :nconc (css-select (last-matcher) element) - :else - :collect element - :end - :end - :when (consp element) - :nconc (css-collect-elements selector (cddr element))))) - -(defmethod css-select ((selector css-selector) (element string)) - nil) - -(defmethod css-select ((selector css-specifier) (document cons)) - (css-collect-elements selector - ;; Urg. I may be doing something wrong here. :P - (if (and (listp document) (listp (car document))) - document - (list document)))) - - -(defmethod css-select ((selector css-type-selector) (element cons)) - (when (string-equal (car element) (selector-arg selector)) - element)) - -(defun lhtml-attr (attr element) - (cadr (assoc attr (cadr element)))) - -(defmethod css-select ((selector css-id-selector) (element cons)) - (when (string= (lhtml-attr :id element) (selector-arg selector)) - element)) - -(defmethod css-select ((selector css-class-selector) (element cons)) - (when (member (selector-arg selector) - (split-sequence:split-sequence #\Space (lhtml-attr :class element) :remove-empty-subseqs t) - :test #'string=) - element)) - -(defmethod css-select ((selector css-descendant-selector) (element cons)) - (flet ((all-match (element) (every (lambda (m) (css-select m element)) (matcher selector)))) - #+nil - (when (all-match element) element) - (let ((elements (cddr element))) - (format t "el: ~s~%" elements) - (case (selector-arg selector) - (:direct (loop :for element :in elements - :when (all-match element) - :collect element)) - (t (css-collect-elements selector elements))))))