;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; LITTLE-JAY - gensym-lookup JSON parser prototype ;;;; MichaƂ "phoe" Herda ;;;; License: MIT ;;;; (ql:quickload '(:alexandria :trivial-indent)) (uiop:define-package #:little-jay (:use #:cl) (:local-nicknames (#:a #:alexandria)) (:shadow #:get #:peek-char #:pop #:push) (:export #:key #:get #:parse)) (in-package #:little-jay) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Key cache (defvar *key-cache* (make-hash-table :test #'equal :weakness :value)) (defun key (thing) (check-type thing a:string-designator) (let ((string (string thing))) (multiple-value-bind (value foundp) (gethash string *key-cache*) (cond (foundp value) (t (setf (gethash string *key-cache*) (make-symbol string))))))) (define-compiler-macro key (&whole whole thing &environment env) (cond ((not (constantp thing env)) whole) (t (a:with-gensyms (string value foundp) `(load-time-value (let ((,string (string ,thing))) (multiple-value-bind (,value ,foundp) (gethash ,string *key-cache*) (cond (,foundp ,value) (t (setf (gethash ,string *key-cache*) (make-symbol ,string))))))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Object accessor (defun get (object name) (gethash (key name) object)) (define-compiler-macro get (object name) `(gethash (key ,name) ,object)) (defun (setf get) (newval object name) (setf (gethash (key name) object) newval)) (define-compiler-macro (setf get) (newval object name) `(setf (gethash (key ,name) ,object) ,newval)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Errors (define-condition with-char-mixin () ((character :reader invalid-character-char :initarg :char))) (define-condition json-parse-error (parse-error stream-error) ()) (define-condition internal-error (json-parse-error) ()) (define-condition unexpected-end-of-json (json-parse-error end-of-file) ()) (define-condition invalid-literal (json-parse-error) ()) (define-condition invalid-object-key (json-parse-error) ()) (define-condition junk-at-end-of-stream (json-parse-error with-char-mixin) ()) (define-condition invalid-character (json-parse-error with-char-mixin) ()) (define-condition invalid-digit (invalid-character) ()) (define-condition invalid-string-character (invalid-character) ()) (define-condition invalid-string-escape-character (invalid-string-character) ()) (define-condition invalid-escape-digit (invalid-string-character) ()) (define-condition invalid-digit (invalid-character) ()) (define-condition colon-not-found (invalid-character) ()) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Parser (eval-when (:compile-toplevel :load-toplevel :execute) (defparameter +whitespace+ '(#.(code-char #x20) ; #\Space #.(code-char #x09) ; #\Tab #.(code-char #x0A) ; #\Newline #.(code-char #x0D))) ; #\Return (defparameter +digits+ (coerce "0123456789" 'list)) (defparameter +exponents+ (coerce "Ee" 'list))) (trivial-indent:define-indentation with-state-machine (as prog)) (defmacro with-state-machine ((&key starting-state) &body body) (let ((debug t)) `(macrolet ((peek-char () `(cl:peek-char nil stream nil nil)) (pop-char () `(prog1 (setf char (read-char stream nil nil)) ,@,(when debug ``((format t "~&;; Popped character ~S" char))))) (push-char () `(progn ,@,(when debug ``((format t "~&;; Pushed character ~S" char))) (unread-char char stream))) (peek (&optional (n 1)) `(aref stack (- (fill-pointer stack) ,n))) (pop () `(progn ,@,(when debug ``((format t "~&;; Popped ~S" (peek)))) (vector-pop stack))) (push (x) `(prog1 (vector-push-extend ,x stack) ,@,(when debug ``((format t "~&;; Pushed ~S" (peek)))))) (jump (state) `(progn ,@,(when debug ``((format t "~&;; Going to state ~S" ,state))) (cl:go ,state))) (fail (type &rest args) `(error ',type :stream stream ,@args))) (symbol-macrolet ((stack-size (fill-pointer stack))) (prog ((stack (make-array initial-stack-size :adjustable t :fill-pointer 0)) char) (go ,starting-state) ,@body))))) (defun parse (stream &optional (initial-stack-size 1024)) (declare (optimize (speed 3) (space 0) (compilation-speed 0))) (with-state-machine (:starting-state :read-value) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General dispatch :read-value (case (peek-char) (#.+whitespace+ (pop-char) (jump :read-value)) (#.(code-char #x74) (jump :read-true)) ; #\t (#.(code-char #x66) (jump :read-false)) ; #\f (#.(code-char #x6E) (jump :read-null)) ; #\n (#.(code-char #x7B) (jump :read-object)) ; #\Left_Curly_Bracket (#.(code-char #x5B) (jump :read-array)) ; #\Left_Square_Bracket (#.(code-char #x22) (jump :read-string)) ; #\Quotation_Mark (#.(code-char #x2D) (jump :read-number)) ; #\Hyphen-Minus (#.+digits+ (jump :read-number)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-character :char (peek-char)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Literals :read-true (pop-char) ; #\t (if (and (char= (pop-char) #.(code-char #x72)) ; #\r (char= (pop-char) #.(code-char #x75)) ; #\u (char= (pop-char) #.(code-char #x65))) ; #\e (vector-push-extend t stack) (fail invalid-literal)) (jump :finish-value) :read-false (pop-char) ; #\f (if (and (char= (pop-char) #.(code-char #x61)) ; #\a (char= (pop-char) #.(code-char #x6C)) ; #\l (char= (pop-char) #.(code-char #x73)) ; #\s (char= (pop-char) #.(code-char #x65))) ; #\e (vector-push-extend nil stack) (fail invalid-literal)) (jump :finish-value) :read-null (pop-char) ; #\n (if (and (char= (pop-char) #.(code-char #x75)) ; #\u (char= (pop-char) #.(code-char #x6C)) ; #\l (char= (pop-char) #.(code-char #x6C))) ; #\l (vector-push-extend :null stack) (fail invalid-literal)) (jump :finish-value) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Objects :read-object (pop-char) (jump :read-first-object-element) :read-first-object-element (case (peek-char) (#.+whitespace+ (pop-char) (jump :read-first-object-element)) (#.(code-char #x7D) ; #\Right_Curly_Bracket (pop-char) (jump :finish-value)) (#.(code-char #x22) ; #\Quotation_Mark (push (make-hash-table :test #'eq)) (jump :read-next-key)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-object-key))) :read-next-key (case (peek-char) (#.+whitespace+ (pop-char) (jump :read-next-key)) (#.(code-char #x22) (jump :read-string)) ; #\Quotation_Mark ((nil) (fail unexpected-end-of-json)) (t (fail invalid-string-escape-character))) :read-colon (case (pop-char) (#.+whitespace+ (jump :read-object)) (#.(code-char #x3A) (jump :read-value)) ; #\Colon ((nil) (fail unexpected-end-of-json)) (t (fail colon-not-found :char char))) :augment-object (let ((value (pop)) (key (pop))) (setf (gethash (key key) (peek)) value)) (jump :maybe-finish-object) :maybe-finish-object (case (pop-char) (#.+whitespace+ (jump :maybe-finish-object)) (#\, (jump :read-next-key)) (#\} (jump :finish-value)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-string-escape-character))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Arrays :read-array (pop-char) (push (make-array 0 :adjustable t :fill-pointer 0)) (case (pop-char) (#.+whitespace+ (jump :read-array)) (#.(code-char #x5D) (jump :finish-value)) ; #\Right_Square_Bracket ((nil) (fail unexpected-end-of-json)) (t (push-char) (jump :read-value))) :augment-array (vector-push-extend (pop) (peek)) (jump :maybe-finish-array) :maybe-finish-array (case (pop-char) (#.+whitespace+ (jump :maybe-finish-array)) (#\, (jump :read-value)) (#\] (jump :finish-value)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-string-escape-character))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Strings :read-string (pop-char) (push (make-string-output-stream)) (jump :read-string-single-character) :read-string-single-character (case (pop-char) (#.(cdr +whitespace+) (fail invalid-string-character)) (#.(code-char #x5C) ; #\Reverse_Solidus (jump :read-string-escaped-character)) (#.(code-char #x22) ; #\Quotation_Mark (push (get-output-stream-string (pop))) (jump :finish-value)) ((nil) (fail unexpected-end-of-json)) (t (princ char (peek)) (jump :read-string-single-character))) :read-string-escaped-character (case (pop-char) (#.(code-char #x22) (princ char (peek))) ; #\Quotation_Mark (#.(code-char #x5C) (princ char (peek))) ; #\Reverse_Solidus (#.(code-char #x2F) (princ char (peek))) ; #\Solidus (#.(code-char #x62) ; #\b (princ (code-char #x08) (peek))) ; #\Backspace (#.(code-char #x66) ; #\f (princ (code-char #x0C) (peek))) ; #\Page (#.(code-char #x6E) ; #\n (princ (code-char #x0A) (peek))) ; #\Newline (#.(code-char #x72) ; #\r (princ (code-char #x0D) (peek))) ; #\Return (#.(code-char #x74) ; #\t (princ (code-char #x09) (peek))) ; #\Tab (#.(code-char #x75) ; #\u (jump :read-string-unicode-character)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-string-escape-character))) (jump :read-string-single-character) :read-string-unicode-character (let ((code 0)) (declare (type (unsigned-byte 16) code)) (dotimes (i 4) (let ((digit-char (pop-char))) (when (null digit-char) (fail unexpected-end-of-json)) (let ((digit (digit-char-p digit-char 16))) (when (null digit) (fail invalid-escape-digit :char digit-char)) (setf code (logior (ash code 4) digit))))) (princ (code-char code) (peek))) (jump :read-string-single-character) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Numbers :read-number (jump :read-sign) :read-sign (case (peek-char) ((#.(code-char #x2D)) ; #\Hyphen-Minus (pop-char) (push -1)) (t (push 1))) (jump :read-first-digit) :read-first-digit (case (peek-char) (#.(car +digits+) (push 0) (jump :read-maybe-fraction)) (#.(cdr +digits+) (push (make-string-output-stream)) (princ (pop-char) (peek)) (jump :read-digit)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-digit :char (peek-char)))) :read-digit (case (peek-char) (#.+digits+ (princ (pop-char) (peek)) (jump :read-digit)) (t (push (parse-integer (get-output-stream-string (pop)))) (jump :read-maybe-fraction))) :read-maybe-fraction (case (peek-char) (#.(code-char #x2E) ; #\Full_Stop (jump :read-fraction)) (t (push 0) (jump :read-maybe-exponent))) :read-fraction (pop-char) (push (make-string-output-stream)) (case (peek-char) (#.+digits+ (jump :read-fraction-digit)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-digit))) :read-fraction-digit (case (peek-char) (#.+digits+ (princ (pop-char) (peek)) (jump :read-fraction-digit)) (t (push (parse-integer (get-output-stream-string (pop)))) (jump :read-maybe-exponent))) :read-maybe-exponent (case (peek-char) (#.+exponents+ (pop-char) (jump :read-exponent)) (t (push 1) (push 0) (jump :finish-number))) :read-exponent (jump :read-exponent-sign) :read-exponent-sign (cond ((eql (peek-char) #.(code-char #x2D)) ; #\Hyphen-Minus (pop-char) (push -1)) ((eql (peek-char) #.(code-char #x2B)) ; #\Plus_Sign (pop-char) (push 1)) (t (push 1))) (jump :read-first-exponent-digit) :read-first-exponent-digit (case (peek-char) (#.+digits+ (push (make-string-output-stream)) (jump :read-exponent-digit)) ((nil) (fail unexpected-end-of-json)) (t (fail invalid-digit :char (peek-char)))) :read-exponent-digit (case (peek-char) (#.+digits+ (princ (pop-char) (peek)) (jump :read-exponent-digit)) (t (push (parse-integer (get-output-stream-string (pop)))) (jump :finish-number))) :finish-number (print stack) (let* ((exponent (pop)) (exponent-sign (pop)) (fraction (pop)) (integer (pop)) (sign (pop))) (declare (type integer integer)) (declare (type unsigned-byte fraction exponent)) (declare (type (member 1 -1) sign exponent-sign)) #+sbcl ; so it shuts up about GENERIC-+ (declare (sb-ext:muffle-conditions sb-ext:compiler-note)) (let ((real-exponent (* exponent-sign exponent))) (if (= fraction 0) (push (* sign integer (expt 10 real-exponent))) (let* ((fraction-expt (ceiling (log (abs fraction) 10))) (real-fraction (* fraction (expt 10 (- fraction-expt)))) (number (+ integer (float real-fraction 0.0d0)))) (push (* sign number (expt 10 real-exponent)))))) (jump :finish-value)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Epilogue :finish-value (if (= 1 stack-size) (jump :end) (let ((previous (peek 2))) (typecase previous (hash-table (jump :read-colon)) (string (jump :augment-object)) (vector (jump :augment-array)) (t (fail internal-error))))) :end (case (peek-char) (#.+whitespace+ (pop-char) (jump :end)) ((nil) (return (vector-pop stack))) (t (fail junk-at-end-of-stream :char (peek-char))))))