Status commit
Sun Nov 15 14:25:29 UTC 2009 pix@kepibu.org
* Status commit
Move the CSS-selector-specific stuff into its own file.
Still don't have it working, of course. Bleh.
diff -rN -u old-Oh, Ducks!/css-selector-unifier.asd new-Oh, Ducks!/css-selector-unifier.asd
--- old-Oh, Ducks!/css-selector-unifier.asd 2013-12-05 00:36:16.000000000 +0000
+++ new-Oh, Ducks!/css-selector-unifier.asd 2013-12-05 00:36:16.000000000 +0000
@@ -22,4 +22,5 @@
((:file "interface")
(:file "lhtml" :depends-on ("interface"))
(:file "pt" :depends-on ("interface"))))
+ (:file "selectors")
(:file "unification-templates")))
diff -rN -u old-Oh, Ducks!/regexp-template.lisp new-Oh, Ducks!/regexp-template.lisp
--- old-Oh, Ducks!/regexp-template.lisp 2013-12-05 00:36:16.000000000 +0000
+++ new-Oh, Ducks!/regexp-template.lisp 2013-12-05 00:36:16.000000000 +0000
@@ -13,6 +13,29 @@
(append vars '(?&rest))
keys))))
+;; for parsing front-to-back
+(defmethod make-template ((kind (eql 'regexp^)) (spec cons))
+ (destructuring-bind (re-kwd regexp &optional vars &rest keys)
+ spec
+ (declare (ignore re-kwd))
+ (make-instance 'unify::regular-expression-template
+ :spec (list* 'unify::regexp
+ (concatenate 'string "^" regexp "(.*)$")
+ (append vars '(?&rest))
+ keys))))
+
+;; For parsing back-to-front
+(defmethod make-template ((kind (eql 'regexp$)) (spec cons))
+ (destructuring-bind (re-kwd regexp &optional vars &rest keys)
+ spec
+ (declare (ignore re-kwd))
+ (make-instance 'unify::regular-expression-template
+ :spec (list* 'unify::regexp
+ (concatenate 'string "^(.*)" regexp "$")
+ (append '(?&rest) vars)
+ keys))))
+
+
;; (match (#t(regexp+ "^f(o+)" (?o)) "fooooooobar") (values o &rest))
;; => "ooooooo", "bar"
diff -rN -u old-Oh, Ducks!/selectors.lisp new-Oh, Ducks!/selectors.lisp
--- old-Oh, Ducks!/selectors.lisp 1970-01-01 00:00:00.000000000 +0000
+++ new-Oh, Ducks!/selectors.lisp 2013-12-05 00:36:16.000000000 +0000
@@ -0,0 +1,170 @@
+#||
+Okay, here's how I figure selectors should work:
+* breadth-first traversal through the document
+* collect nodes (elements) which match the selector(s)
+
+Matching selectors:
+- The original plan was to start with the first selector in our
+ list and work our way into the document.
+- Another plan might be to start with the last selector in our
+ list and work our way up the document tree.
+- Yet another option would be to utilize the recursive structure
+ of the document in our search, keeping track of which nodes
+ match which selectors as we traverse into the document.
+ Though, by that description, I'm not sure I'm clever enough to
+ actually make it work.
+We have to work our way through the entire document structure
+anyway, which means starting from the outside and working our way
+in won't gain us any efficiency, as I had originally thought.
+
+For example, given a structure of
+ (html
+ (body
+ (p ((class "foo")) "text")
+ (p () (span ((class "bar")) "more text"))))
+and a selector of
+ html p>span.bar
+we would walk the document tree asking first
+ "Does this element have class 'bar'?"
+and only if that is true, continuing to ask
+ "Is this a 'span' element?"
+ "Is this element a child of a 'p' element?"
+ "Is that 'p' element a descendant of an 'html' element?"
+
+I note, however, that a fully-reversed ordering should not be strictly
+necessary--we really only need reverse at the combinators. So we
+could also ask:
+ "Is this a 'span' element?"
+ "Is it of the 'bar' class?"
+ "Is it a child of a 'p' element?"
+ "Is that 'p' element a descendant of an 'html' element?"
+
+Hrm... how does ScrAPI do this? Or any of the other projects which
+offer element selection by CSS selector?
+||#
+(in-package #:css-selector-unifier)
+
+#.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|)
+
+(defclass selector (unify::string-template)
+ ((matcher :reader matcher :initarg :matcher)))
+
+(defgeneric selector-p (object)
+ (:method ((ob selector)) t)
+ (:method ((ob t)) nil))
+
+(defclass simple-selector (simple-selector)
+ ((arg :reader selector-arg :initarg :arg)))
+
+(defmethod print-object ((selector simple-selector) stream)
+ (format stream "#<selector ~s>" (selector-arg selector)))
+
+(defclass combinator (selector) ())
+(defclass child-combinator (combinator) ())
+(defclass descendant-combinator (combinator) ())
+(defclass adjacent-combinator (combinator) ())
+(defclass sibling-combinator (combinator) ())
+
+(defclass type-selector (simple-selector) ())
+(defclass id-selector (simple-selector) ())
+(defclass class-selector (simple-selector) ())
+
+(defmethod initialize-instance :after ((template selector) &key)
+ (unless (slot-boundp template 'matcher)
+ (let ((selector (template-spec template)))
+ (setf (slot-value template 'matcher) (parse-selector (string-trim " " selector))))))
+
+;; forwards
+(defun parse-selector (selector)
+ (match-case (selector)
+ ;; combinators
+ (#T(regexp+ "^[ ]*[>][ ]*" ()) (list (make-instance 'child-combinator :matcher (parse-selector &rest))))
+ (#T(regexp+ "^[ ]+" ()) (list (make-instance 'descendant-combinator :matcher (parse-selector &rest))))
+ ;; simple selector
+ (#T(regexp+ "^(\\w+)" (?type)) (cons (make-instance 'type-selector :arg type) (parse-selector &rest)))
+ (#T(regexp+ "^[#](\\w+)" (?id)) (cons (make-instance 'id-selector :arg id) (parse-selector &rest)))
+ (#T(regexp+ "^[\\.](\\w+)" (?class)) (cons (make-instance 'class-selector :arg class) (parse-selector &rest)))))
+
+;; backwards
+#+(or)
+(defun parse-selector (selector)
+ (match-case (selector)
+ ;; combinators
+ (#T(regexp$ "[ ]*[>][ ]*" ()) (list (make-instance 'child-combinator :matcher (parse-selector &rest))))
+ (#T(regexp$ "[ ]+" ()) (list (make-instance 'descendant-combinator :matcher (parse-selector &rest))))
+ ;; simple selector
+ (#T(regexp$ "(\\w+)" (?type)) (cons (make-instance 'type-selector :arg type) (parse-selector &rest)))
+ (#T(regexp$ "[#](\\w+)" (?id)) (cons (make-instance 'id-selector :arg id) (parse-selector &rest)))
+ (#T(regexp$ "[\\.](\\w+)" (?class)) (cons (make-instance 'class-selector :arg class) (parse-selector &rest)))))
+
+
+;; FIXME: the find/matches split seems to be causing me some mental
+;; trouble. I'm not sure how to handle combinators now. (Not that I
+;; was doing very well with them before.)
+;;
+;; Should probably map this out on a whiteboard. I'm not doing very
+;; well with just trying to hack it.
+
+(defgeneric find-matching-elements (selector elements))
+
+(defmethod find-matching-elements ((selector simple-selector) (elements list))
+ (nconc
+ (remove-if-not (lambda (el) (element-matches-p selector el)) elements)
+ (remove-if #'null
+ (mapcar (lambda (element) (find-matching-elements selector (element-children element)))
+ elements)))
+ #+(or)
+ (loop :for element :in elements
+ :when (element-matches-p selector element)
+ :collect element))
+
+#+(or) ;; hrm...this doesn't seem right... doesn't handle combinators!
+(defmethod find-matching-elements ((selector list) (elements list))
+ (flet ((all-selectors-match (element)
+ (every (lambda (s) (element-matches-p s element)) selector)))
+ (remove-if-not #'all-selectors-match elements))
+ #+(or)
+ (loop :for element :in elements
+ :when (every (lambda (s) (element-matches-p m element)) selector)
+ :collect element))
+
+(defvar *parent*)
+;; if *parent* is equal to element-parent, we've found a child element.
+;; But, uh, who sets *parent*?
+(defmethod find-matching-elements ((selector child-combinator) (elements list))
+ )
+
+(defvar *ancestor*)
+
+
+(defgeneric element-matches-p (selector element))
+
+(defmethod element-matches-p ((selector selector) (element t))
+ nil)
+
+(defmethod element-matches-p ((selector type-selector) element)
+ (element-type-equal element (selector-arg selector)))
+
+(defmethod element-matches-p ((selector id-selector) element)
+ (string= (element-id element) (selector-arg selector)))
+
+(defmethod element-matches-p ((selector class-selector) element)
+ (member (selector-arg selector)
+ (element-classes element)
+ :test #'string=))
+
+(defmethod element-matches-p ((selector child-combinator) element)
+ (plusp (length (find-matching-elements selector (element-children element)))))
+
+#+TODO
+(defmethod element-matches-p ((selector descendant-combinator) element)
+ (flet ((all-match (element) (every (lambda (m) (element-matches-p m element)) (matcher selector))))
+ #+nil
+ (when (all-match element) element)
+ (let ((elements (cddr element)))
+ (format t "el: ~s~%" elements)
+ (case (selector-arg selector)
+ (:direct (loop :for element :in elements
+ :when (all-match element)
+ :collect element))
+ (t (css-collect-elements selector elements))))))
diff -rN -u old-Oh, Ducks!/unification-templates.lisp new-Oh, Ducks!/unification-templates.lisp
--- old-Oh, Ducks!/unification-templates.lisp 2013-12-05 00:36:16.000000000 +0000
+++ new-Oh, Ducks!/unification-templates.lisp 2013-12-05 00:36:16.000000000 +0000
@@ -1,7 +1,5 @@
(in-package #:css-selector-unifier)
-#.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|)
-
(defclass css-selector-template (unify::expression-template)
(#+(or)
(parser :reader parser) ;; subtype determines parser
@@ -61,109 +59,3 @@
&optional (env (make-empty-environment))
&key &allow-other-keys)
(unify template (chtml:parse document (chtml:make-lhtml-builder)) env))
-
-
-
-;; FIXME: rename to selector
-(defclass css-specifier (unify::string-template)
- ((matcher :reader matcher :initarg :matcher)))
-
-(defgeneric specifier-p (object)
- (:method ((ob css-specifier)) t)
- (:method ((ob t)) nil))
-
-;; FIXME: rename to simple-selector
-(defclass css-selector (unify::template)
- ((arg :reader selector-arg :initarg :arg)))
-
-#|| TODO
- (defclass combinator (selector) ())
- (defclass child-combinator (combinator) ())
- (defclass descendant-combinator (combinator) ())
- (defclass adjacent-combinator (combinator) ())
- (defclass sibling-combinator (combinator) ())
-||#
-
-(defmethod print-object ((selector css-selector) stream)
- (format stream "#<selector ~s>" (selector-arg selector)))
-
-(defclass css-type-selector (css-selector) ())
-(defclass css-id-selector (css-selector) ())
-(defclass css-class-selector (css-selector) ())
-
-(defclass css-descendant-selector (css-specifier css-selector) ())
-
-(defmethod initialize-instance :after ((template css-specifier) &key)
- (unless (slot-boundp template 'matcher)
- (let ((specifier (template-spec template)))
- (setf (slot-value template 'matcher) (parse-css-specifier (string-trim " " specifier))))))
-
-(defun parse-css-specifier (specifier)
- (match-case (specifier)
- ;; combinators
- (#T(regexp+ "^[ ]*[>][ ]*" ()) (list (make-instance 'css-descendant-selector :arg :direct :matcher (parse-css-specifier &rest))))
- (#T(regexp+ "^[ ]+" ()) (list (make-instance 'css-descendant-selector :arg t :matcher (parse-css-specifier &rest))))
- ;; simple selector sequence
- (#T(regexp+ "^(\\w+)" (?type)) (cons (make-instance 'css-type-selector :arg type) (parse-css-specifier &rest)))
- (#T(regexp+ "^[#](\\w+)" (?id)) (cons (make-instance 'css-id-selector :arg id) (parse-css-specifier &rest)))
- (#T(regexp+ "^[\\.](\\w+)" (?class)) (cons (make-instance 'css-class-selector :arg class) (parse-css-specifier &rest)))))
-
-;; FIXME: rename to select-subject?
-;; FIXME: make css-select methods use functions defined in traversal/implementation.lisp
-(defgeneric css-select (specifier document))
-
-;; FIXME?: move to within (css-select css-specifier cons)
-;; FIXME: Should this really be this ugly?
-(defun css-collect-elements (selector elements)
- (flet ((last-matcher () (car (last (matcher selector))))
- (all-match (element) (every (lambda (m) (css-select m element)) (matcher selector))))
- (loop :for element :in elements
- :when (all-match element)
- :if (specifier-p (last-matcher))
- :nconc (css-select (last-matcher) element)
- :else
- :collect element
- :end
- :end
- :when (consp element)
- :nconc (css-collect-elements selector (cddr element)))))
-
-(defmethod css-select ((selector css-selector) (element string))
- nil)
-
-(defmethod css-select ((selector css-specifier) (document cons))
- (css-collect-elements selector
- ;; Urg. I may be doing something wrong here. :P
- (if (and (listp document) (listp (car document)))
- document
- (list document))))
-
-
-(defmethod css-select ((selector css-type-selector) (element cons))
- (when (string-equal (car element) (selector-arg selector))
- element))
-
-(defun lhtml-attr (attr element)
- (cadr (assoc attr (cadr element))))
-
-(defmethod css-select ((selector css-id-selector) (element cons))
- (when (string= (lhtml-attr :id element) (selector-arg selector))
- element))
-
-(defmethod css-select ((selector css-class-selector) (element cons))
- (when (member (selector-arg selector)
- (split-sequence:split-sequence #\Space (lhtml-attr :class element) :remove-empty-subseqs t)
- :test #'string=)
- element))
-
-(defmethod css-select ((selector css-descendant-selector) (element cons))
- (flet ((all-match (element) (every (lambda (m) (css-select m element)) (matcher selector))))
- #+nil
- (when (all-match element) element)
- (let ((elements (cddr element)))
- (format t "el: ~s~%" elements)
- (case (selector-arg selector)
- (:direct (loop :for element :in elements
- :when (all-match element)
- :collect element))
- (t (css-collect-elements selector elements))))))