Wed Nov 18 10:23:05 UTC 2009 pix@kepibu.org * Status commit; split to avoid absolute dependency on cxml and closure-html diff -rN -u old-Oh, Ducks!/chtml.lisp new-Oh, Ducks!/chtml.lisp --- old-Oh, Ducks!/chtml.lisp 1970-01-01 00:00:00.000000000 +0000 +++ new-Oh, Ducks!/chtml.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -0,0 +1,26 @@ +(in-package #:oh-ducks) + +;; avoid conflicting with 'sgml:pt +(eval-when (:compile-toplevel :load-toplevel :execute) + (import 'closure-html:pt)) +(eval-when (:compile-toplevel :load-toplevel :execute) + (export 'pt) + (export 'lhtml)) + +(defclass lhtml-template (html-template) ()) +(defclass pt-template (html-template) ()) + +(defmethod document-parser ((template lhtml-template)) + (lambda (document) (chtml:parse document (chtml:make-lhtml-builder)))) +(defmethod document-parser ((template pt-template)) + (lambda (document) (chtml:parse document (chtml:make-pt-builder)))) + +(defmethod make-template-for-parser ((parser (eql 'lhtml)) spec) + (make-instance 'lhtml-template :spec spec)) +(defmethod make-template-for-parser ((parser (eql :lhtml)) spec) + (make-template-for-parser 'lhtml spec)) + +(defmethod make-template-for-parser ((parser (eql 'pt)) spec) + (make-instance 'pt-template :spec spec)) +(defmethod make-template-for-parser ((parser (eql :pt)) spec) + (make-template-for-parser 'pt spec)) diff -rN -u old-Oh, Ducks!/cxml.lisp new-Oh, Ducks!/cxml.lisp --- old-Oh, Ducks!/cxml.lisp 1970-01-01 00:00:00.000000000 +0000 +++ new-Oh, Ducks!/cxml.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -0,0 +1,14 @@ +(in-package #:oh-ducks) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (export 'dom)) + +(defclass dom-template (xml-template) ()) + +(defmethod document-parser ((template dom-template)) + (lambda (document) (cxml:parse document (cxml-dom:make-dom-builder)))) + +(defmethod make-template-for-parser ((parser (eql 'dom)) spec) + (make-instance 'dom-template :spec spec)) +(defmethod make-template-for-parser ((parser (eql :dom)) spec) + (make-template-for-parser 'dom spec)) diff -rN -u old-Oh, Ducks!/oh-ducks.asd new-Oh, Ducks!/oh-ducks.asd --- old-Oh, Ducks!/oh-ducks.asd 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/oh-ducks.asd 2015-10-02 05:34:09.000000000 +0000 @@ -1,8 +1,17 @@ +#+(or fixme todo) +(cerror "Continue anyway." + "The author of \"Oh, ducks!\" tends to use #+FIXME and #+TODO to ~ + mark things as being in-progress. At least one of these exists ~ + in *features*, which may cause unusual behavior.") + +(eval-when (:compile-toplevel :load-toplevel :execute) + (asdf:operate 'asdf:load-op 'asdf-system-connections)) + (defpackage #:oh-ducks.system (:use #:cl #:asdf)) (in-package #:oh-ducks.system) -(asdf:defsystem oh-ducks +(defsystem oh-ducks :version "0" :description "cl-unification templates using CSS-style selectors" :maintainer "pinterface " @@ -11,17 +20,27 @@ ;; TODO: submit a patch for cl-unification to use ;; asdf-system-connections. Getting an unmodified version of ;; cl-unification to load the cl-ppcre stuff is a PITA. - :depends-on (:cl-unification :cl-ppcre :cxml :closure-html :split-sequence) + :depends-on (:cl-unification :cl-ppcre :split-sequence) :serial t - ;; FIXME: ordering :components ((:file "package") (:file "regexp-template") - #+(or) (:file "tests") - (:module traversal + (:module "traversal" :components - ((:file "interface") - (:file "lhtml" :depends-on ("interface")) - (:file "pt" :depends-on ("interface")) - (:file "dom" :depends-on ("interface")))) + ((:file "interface"))) (:file "selectors") - (:file "unification-templates"))) + (:file "templates") + (:file "unify") + #+FIXME (:file "tests"))) + +(defsystem-connection ducks+closure-html + :requires (:oh-ducks :closure-html) + :components ((:file "chtml") + (:module "traversal" + :components (#+FIXME (:file "lhtml") + (:file "pt"))))) + +(defsystem-connection ducks+cxml + :requires (:oh-ducks :cxml) + :components ((:file "cxml") + (:module "traversal" + :components ((:file "dom"))))) diff -rN -u old-Oh, Ducks!/package.lisp new-Oh, Ducks!/package.lisp --- old-Oh, Ducks!/package.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/package.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -1,3 +1,18 @@ +(defpackage #:oh-ducks.traversal + (:use #:cl) + (:export #:element-children + #:element-parent + #:element-attribute + #:element-type + + #:element-id + #:element-classes + #:element-type-equal + #:element-ancestors)) + (defpackage #:oh-ducks - (:use #:cl #:unify) - (:export #:lhtml)) + (:use #:cl #:unify #:oh-ducks.traversal) + (:export ;; template machinery + #:*default-parser* + #:html + )) diff -rN -u old-Oh, Ducks!/selectors.lisp new-Oh, Ducks!/selectors.lisp --- old-Oh, Ducks!/selectors.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/selectors.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -1,47 +1,3 @@ -#|| -Okay, here's how I figure selectors should work: -* breadth-first traversal through the document -* collect nodes (elements) which match the selector(s) - -Matching selectors: -- The original plan was to start with the first selector in our - list and work our way into the document. -- Another plan might be to start with the last selector in our - list and work our way up the document tree. -- Yet another option would be to utilize the recursive structure - of the document in our search, keeping track of which nodes - match which selectors as we traverse into the document. - Though, by that description, I'm not sure I'm clever enough to - actually make it work. -We have to work our way through the entire document structure -anyway, which means starting from the outside and working our way -in won't gain us any efficiency, as I had originally thought. - -For example, given a structure of - (html - (body - (p ((class "foo")) "text") - (p () (span ((class "bar")) "more text")))) -and a selector of - html p>span.bar -we would walk the document tree asking first - "Does this element have class 'bar'?" -and only if that is true, continuing to ask - "Is this a 'span' element?" - "Is this element a child of a 'p' element?" - "Is that 'p' element a descendant of an 'html' element?" - -I note, however, that a fully-reversed ordering should not be strictly -necessary--we really only need reverse at the combinators. So we -could also ask: - "Is this a 'span' element?" - "Is it of the 'bar' class?" - "Is it a child of a 'p' element?" - "Is that 'p' element a descendant of an 'html' element?" - -Hrm... how does ScrAPI do this? Or any of the other projects which -offer element selection by CSS selector? -||# (in-package #:oh-ducks) #.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|) @@ -93,7 +49,7 @@ (#T(regexp$ "[#](\\w+)" (?id)) (cons (make-instance 'id-selector :arg id) (parse-selector &rest))) (#T(regexp$ "[\\.](\\w+)" (?class)) (cons (make-instance 'class-selector :arg class) (parse-selector &rest))) (#T(regexp$ "(\\w+)" (?type)) (cons (make-instance 'type-selector :arg type) (parse-selector &rest))) - #+(or) + #+TODO (#T(regexp$ "\\*" ()) (cons (make-instance 'universal-selector) (parse-selector &rest))))) (defgeneric find-matching-elements (selector elements)) diff -rN -u old-Oh, Ducks!/templates.lisp new-Oh, Ducks!/templates.lisp --- old-Oh, Ducks!/templates.lisp 1970-01-01 00:00:00.000000000 +0000 +++ new-Oh, Ducks!/templates.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -0,0 +1,68 @@ +(in-package #:oh-ducks) + +(defclass css-selector-template (unify::expression-template) + ((parser :initarg :parser :initform nil) ;; subtype generally determines parser + (specifiers :reader specifiers) ;; list of (specifier . variable) and (specifier . template) + )) + +(defclass xml-template (css-selector-template) ()) ;; parses xml + +(defclass html-template (css-selector-template) ()) ;; parses html + + +(defvar *default-parser* nil "Determines the default parser when none is specified.") + +(defgeneric document-parser (template) + (:documentation "Returns a function which, given an unparsed document, parses that document into some sort of structure.")) + +(defmethod document-parser ((template css-selector-template)) + (slot-value template 'parser)) + +(defgeneric make-template-for-parser (parser spec) + (:documentation "Returns a template of the appropriate type for a given parser.") + (:method ((parser t) spec) + (make-instance 'css-selector-template :parser parser :spec spec)) + (:method ((parser null) spec) + (error "No parser specified."))) + +(defun %spec-includes-opts (spec) + (keywordp (first (second spec)))) + +(defmethod make-template ((kind (eql 'html)) (spec cons)) + (destructuring-bind (&key parser) + (if (%spec-includes-opts spec) + (second spec) + (list :parser *default-parser*)) + (make-template-for-parser parser spec))) + +(defun combine-selectors (selector parent) + (let ((combinator (car (last selector)))) + (cond + ((null parent) + selector) + ((combinator-p combinator) + (setf (slot-value combinator 'matcher) parent) + selector) + (t + (nconc selector (list (make-instance 'descendant-combinator :matcher parent))))))) + +(defun parse-specifiers (specs template parent) + (loop :for (css-specifier . rest) :in specs + :for selector = (combine-selectors (parse-selector css-specifier) parent) + :collect (cons selector + (cond + ((unify::template-p rest) rest) + ((unify::variablep rest) rest) + ((consp rest) + (make-instance (class-of template) + :spec (list* (first (template-spec template)) rest) + :css-specifiers rest + :parent selector)))))) + +(defmethod initialize-instance :after ((template css-selector-template) &key css-specifiers parent &allow-other-keys) + (let* ((spec (template-spec template)) + (specifiers-and-vars (or css-specifiers (if (%spec-includes-opts spec) + (cddr spec) + (rest spec))))) + (setf (slot-value template 'specifiers) + (parse-specifiers specifiers-and-vars template parent)))) diff -rN -u old-Oh, Ducks!/tests.lisp new-Oh, Ducks!/tests.lisp --- old-Oh, Ducks!/tests.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/tests.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -2,6 +2,10 @@ ;; FIXME: the switch to chtml:pt nodes means our #'equalp no longer ;; works. +#.(set-dispatch-macro-character #\# #\T 'unify::|sharp-T-reader|) + +#+(or) (setq *default-parser* 'pt) + (equalp '(:div ((:id "id")) "I " (:i () "like") " cheese.") (match (#T(html ("#id" . ?div)) "
I like cheese.
") diff -rN -u old-Oh, Ducks!/traversal/dom.lisp new-Oh, Ducks!/traversal/dom.lisp --- old-Oh, Ducks!/traversal/dom.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/traversal/dom.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -1,4 +1,4 @@ -(in-package #:oh-ducks) +(in-package #:oh-ducks.traversal) ;;; general accessors @@ -6,17 +6,22 @@ (coerce (dom:child-nodes element) 'list)) (defmethod element-parent ((element dom:node)) (dom:parent-node element)) -(defmethod element-attribute (attribute (element dom:node)) nil) +(defmethod element-attribute (attribute (element dom:node)) + (declare (ignore attribute element)) + nil) (defmethod element-attribute ((attribute symbol) (element dom:element)) (element-attribute (string-downcase (symbol-name attribute)) element)) (defmethod element-attribute ((attribute string) (element dom:element)) (dom:get-attribute element attribute)) -(defmethod element-type ((element dom:node)) nil) +(defmethod element-type ((element dom:node)) + (declare (ignore element)) + nil) (defmethod element-type ((element dom:element)) (dom:tag-name element)) ;;; special accessors in case something special needs to happen (defmethod element-id ((element dom:node)) + (declare (ignore element)) nil) (defmethod element-id ((element dom:element)) (element-attribute "id" element)) diff -rN -u old-Oh, Ducks!/traversal/interface.lisp new-Oh, Ducks!/traversal/interface.lisp --- old-Oh, Ducks!/traversal/interface.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/traversal/interface.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -1,7 +1,7 @@ ;;;; type-defines-accessors ;;;; Under this implementation strategy, elements would need only implement ;;;; accessors for traversing the node graph. -(in-package #:oh-ducks) +(in-package #:oh-ducks.traversal) ;;; general accessors diff -rN -u old-Oh, Ducks!/traversal/lhtml.lisp new-Oh, Ducks!/traversal/lhtml.lisp --- old-Oh, Ducks!/traversal/lhtml.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/traversal/lhtml.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -1,7 +1,7 @@ ;;; WARNING: lhtml will conflict with any handler which also uses lists. ;;; xmls, for instance (though I think that's at least ;;; structurally compatible). Sorry, but that's the way it goes. -(in-package #:oh-ducks) +(in-package #:oh-ducks.traversal) ;;; general accessors diff -rN -u old-Oh, Ducks!/traversal/pt.lisp new-Oh, Ducks!/traversal/pt.lisp --- old-Oh, Ducks!/traversal/pt.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/traversal/pt.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -1,4 +1,4 @@ -(in-package #:oh-ducks) +(in-package #:oh-ducks.traversal) ;;; general accessors diff -rN -u old-Oh, Ducks!/unification-templates.lisp new-Oh, Ducks!/unification-templates.lisp --- old-Oh, Ducks!/unification-templates.lisp 2015-10-02 05:34:09.000000000 +0000 +++ new-Oh, Ducks!/unification-templates.lisp 1970-01-01 00:00:00.000000000 +0000 @@ -1,121 +0,0 @@ -(in-package #:oh-ducks) -;; FIXME: rather than having separate -;; #t(pt-html ...) -;; #t(lhtml ...) -;; etc. -;; syntaxes for every possible parser, have a single -;; #t(html [(:parser parser-function)] ...) -;; which uses the value of :parser to handle parsing. Or, if no -;; parser is specified, requires an already-parsed document be passed -;; in. - -(defvar *default-parser* 'pt "Determines the default parser when none is specified.") - -(defclass css-selector-template (unify::expression-template) - ((parser :initarg :parser) ;; subtype generally determines parser - (specifiers :reader specifiers) ;; list of (specifier . variable) and (specifier . template) - )) - -(defclass xml-template (css-selector-template) ()) ;; parses using closure-xml - -(defclass dom-template (xml-template) ()) - -(defclass html-template (css-selector-template) ()) ;; parses using closure-html - -(defclass lhtml-template (html-template) ()) -(defclass pt-template (html-template) ()) - -(defgeneric document-parser (template) - (:documentation "Returns a function which, given an unparsed document, parses that document into some sort of structure.") - (:method ((template css-selector-template)) - (slot-value template 'parser)) - (:method ((template dom-template)) - (lambda (document) (cxml:parse document (cxml-dom:make-dom-builder)))) - (:method ((template lhtml-template)) - (lambda (document) (chtml:parse document (chtml:make-lhtml-builder)))) - (:method ((template pt-template)) - (lambda (document) (chtml:parse document (chtml:make-pt-builder))))) - -(defun %spec-includes-opts (spec) - (keywordp (first (second spec)))) - -(defmethod make-template ((kind (eql 'html)) (spec cons)) - (destructuring-bind (&key parser) - (if (%spec-includes-opts spec) - (second spec) - (list :parser *default-parser*)) - (case parser - ;; short names - ((lhtml :lhtml) (make-instance 'lhtml-template :spec spec)) - ((pt :pt) (make-instance 'pt-template :spec spec)) - ((dom :dom) (make-instance 'dom-template :spec spec)) - ;; user-specified - (t (make-instance 'css-selector-template :parser parser :spec spec))))) - -(defmethod initialize-instance :after ((template css-selector-template) &key css-specifiers parent &allow-other-keys) - (let* ((spec (template-spec template)) - (specifiers-and-vars (or css-specifiers (if (%spec-includes-opts spec) - (cddr spec) - (rest spec))))) - (setf (slot-value template 'specifiers) - (parse-specifiers specifiers-and-vars template parent)))) - -(defun combine-selectors (selector parent) - (let ((combinator (car (last selector)))) - (cond - ((null parent) - selector) - ((combinator-p combinator) - (setf (slot-value combinator 'matcher) parent) - selector) - (t - (nconc selector (list (make-instance 'descendant-combinator :matcher parent))))))) - -(defun parse-specifiers (specs template parent) - (loop :for (css-specifier . rest) :in specs - :for selector = (combine-selectors (parse-selector css-specifier) parent) - :collect (cons selector - (cond - ((unify::template-p rest) rest) - ((unify::variablep rest) rest) - ((consp rest) - (make-instance (class-of template) - :spec (list* (first (template-spec template)) rest) - :css-specifiers rest - :parent selector)))))) - -(defmethod unify ((a css-selector-template) (b css-selector-template) - &optional (env (make-empty-environment)) - &key &allow-other-keys) - (declare (ignore env)) - (error 'unification-failure - :format-control "Do not know how to unify the two css-selector-templates ~S and ~S." - :format-arguments (list a b))) - -(defmethod unify ((template css-selector-template) document - &optional (env (make-empty-environment)) - &key &allow-other-keys) - (loop :for (css-specifier . template) :in (specifiers template) - :do - (let ((val (find-matching-elements css-specifier document))) - (cond - ((unify::template-p template) (unify template val env)) - ((unify::variablep template) (unify::extend-environment template val env)) - (t (error "whoops: ~s, ~s" css-specifier template))))) - env) - -(defmethod unify (document (template css-selector-template) - &optional (env (make-empty-environment)) - &key &allow-other-keys) - (unify template document env)) - -(defmethod unify ((template css-selector-template) (document string) - &optional (env (make-empty-environment)) - &key &allow-other-keys) - (unify template (funcall (document-parser template) document) env)) - -(defmethod unify ((template css-selector-template) (document pathname) - &optional (env (make-empty-environment)) - &key &allow-other-keys) - (unify template (funcall (document-parser template) document) env)) - diff -rN -u old-Oh, Ducks!/unify.lisp new-Oh, Ducks!/unify.lisp --- old-Oh, Ducks!/unify.lisp 1970-01-01 00:00:00.000000000 +0000 +++ new-Oh, Ducks!/unify.lisp 2015-10-02 05:34:09.000000000 +0000 @@ -0,0 +1,37 @@ +(in-package #:oh-ducks) + +(defmethod unify ((a css-selector-template) (b css-selector-template) + &optional (env (make-empty-environment)) + &key &allow-other-keys) + (declare (ignore env)) + (error 'unification-failure + :format-control "Do not know how to unify the two css-selector-templates ~S and ~S." + :format-arguments (list a b))) + +(defmethod unify ((template css-selector-template) document + &optional (env (make-empty-environment)) + &key &allow-other-keys) + (loop :for (css-specifier . template) :in (specifiers template) + :do + (let ((val (find-matching-elements css-specifier document))) + (cond + ((unify::template-p template) (unify template val env)) + ((unify::variablep template) (unify::extend-environment template val env)) + (t (error "whoops: ~s, ~s" css-specifier template))))) + env) + +(defmethod unify (document (template css-selector-template) + &optional (env (make-empty-environment)) + &key &allow-other-keys) + (unify template document env)) + +(defmethod unify ((template css-selector-template) (document string) + &optional (env (make-empty-environment)) + &key &allow-other-keys) + (unify template (funcall (document-parser template) document) env)) + +(defmethod unify ((template css-selector-template) (document pathname) + &optional (env (make-empty-environment)) + &key &allow-other-keys) + (unify template (funcall (document-parser template) document) env)) +