;;;; -*- mode:lisp;coding:utf-8 -*- ;;;;************************************************************************** ;;;;FILE: parser.lisp ;;;;LANGUAGE: Common-Lisp ;;;;SYSTEM: Common-Lisp ;;;;USER-INTERFACE: NONE ;;;;DESCRIPTION ;;;; ;;;; A simple latex parser. ;;;; ;;;;AUTHORS ;;;; Pascal J. Bourguignon ;;;;MODIFICATIONS ;;;; 2020-05-16 Corrected a few syntax (thanks phoe) and macro errors. ;;;; 2013-02-09 Created. ;;;;BUGS ;;;;LEGAL ;;;; AGPL3 ;;;; ;;;; Copyright Pascal J. Bourguignon 2013 - 2016 ;;;; ;;;; This program is free software: you can redistribute it and/or modify ;;;; it under the terms of the GNU Affero General Public License as published by ;;;; the Free Software Foundation, either version 3 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; This program is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU Affero General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Affero General Public License ;;;; along with this program. If not, see . ;;;;************************************************************************** (eval-when (:compile-toplevel :load-toplevel :execute) (setf *readtable* (copy-readtable nil))) (defpackage "COM.INFORMATIMAGO.COMMON-LISP.LATEX" (:use "COMMON-LISP") (:shadow "SET")) (in-package "COM.INFORMATIMAGO.COMMON-LISP.LATEX") (defun latex-env-name (text) (let* ((left (position #\{ text)) (right (position #\} text :start left))) (subseq text (1+ left) right))) (defun test/latex-env-name () (assert (string= (latex-env-name "{Hello} {world}!") "Hello")) (assert (string= (latex-env-name "Hello {world}!") "world")) (assert (string= (latex-env-name "{world}") "world")) (assert (string= (latex-env-name "{world") "world")) (assert (null (ignore-errors (latex-env-name "world"))))) (defun escape-quotes (text) (with-output-to-string (*standard-output*) (loop :for ch :across text :do (when (char= #\" ch) (princ "\\")) (princ ch)))) (defun test/escape-quotes () (assert (string= (escape-quotes "Hello\"World\"!") "Hello\\\"World\\\"!")) (assert (string= (escape-quotes "Hello '\"' World!") "Hello '\\\"' World!")) (assert (string= (escape-quotes "Hello 'world'!") "Hello 'world'!")) (assert (string= (escape-quotes "Hello world!") "Hello world!"))) (defun get-macro-name (text) "Return the suffix of TEXT starting after the last backslash." (subseq text (1+ (or (position #\\ text :from-end t) -1)))) (defun test/get-macro-name () (assert (string= (get-macro-name "\\Hello") "Hello")) (assert (string= (get-macro-name "\\Hello\\World") "World")) (assert (string= (get-macro-name "Hello") "Hello"))) (defparameter *max-include-depth* 10) (defun set (chars) `(set ,chars)) (defun set-not (chars) `(set-not ,chars)) (defun alt (&rest options) `(alt ,@options)) (defun opt (&rest options) `(opt ,@options)) (defun rep+ (re) `(rep+ ,re)) (defun rep* (re) `(rep* ,re)) (defun seq (&rest res) `(seq ,@res)) (defun any () `(any)) (defun scan-match-regexp (stream regex) (declare (ignore stream regex)) (cerror "Woop." "Implement me!")) (defun terminate ()) (defmacro scan-rules ((token-variable stream-variable) &body rules) (let ((vstate (gensym "state")) (vscan-buffer (gensym "buffer"))) `(let ((,vstate nil) (,token-variable nil) (,vscan-buffer (make-array 8 :element-type 'character :adjustable t :fill-pointer 0))) (flet ((goto (state) (setf ,vstate state)) (scan-current-stream () ,stream-variable) ((setf scan-current-stream) (new-stream) (setf ,stream-variable new-stream)) (scan-current-buffer () ,vscan-buffer) ((setf scan-current-buffer) (new-buffer) (assert (and (stringp new-buffer) (adjustable-array-p new-buffer) (array-has-fill-pointer-p new-buffer))) (setf ,vscan-buffer new-buffer)) (scan-reset-current-buffer () (setf ,vscan-buffer (make-array 8 :element-type 'character :adjustable t :fill-pointer 0)))) (macrolet ((when-state (state &body body) `(when (eq ,',vstate ,state) ,@body)) (rule (regexp &body body) `(when (setf ,',token-variable (scan-match-regexp ,',stream-variable ,regexp)) ,@body))) ,@rules))))) (defun scan (stream &key (read-tex-dir #P".") (external-format :default) (if-include-fails :continue) ; or :error or some value to return from SCAN. ) (let* ((token-text "") (include-stack '()) (chapter-flag nil) (section-flag nil) (subsection-flag nil) (subsubsection-flag nil) (aparagraph-flag nil) (inline-quote-flag nil) (inline-math-flag nil) (display-math-flag nil) (table-count 0) (array-count 0) (saved-math-state nil) (brace-count 0) (list-environment-count 0) (list-stack nil) (current-list nil) (states '(:math :macro :picture)) (A-Z "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") (CONTROL-M #\return) (CONTROL-L #\page) (ALPHABET (set A-Z)) (ACCENT-ACUTE "\\'") (ACCENT-GRAV "\\`") (ACCENT-UMLAUT "\\\"" ) (ACCENT-CIRCUMFLEX "\\^") (ACCENT (alt ACCENT-CIRCUMFLEX ACCENT-GRAV ACCENT-ACUTE ACCENT-UMLAUT)) (FILENAME-CHAR (set A-Z)) (LETTER (alt (set A-Z) #\- ACCENT)) (DIGIT (set "0123456789")) (NUMBER (rep+ DIGIT)) (MATH-NUMBER (seq (rep* DIGIT) (opt ".") NUMBER)) (TEXT-NUMBER (seq NUMBER (rep* (seq (opt ",") MATH-NUMBER)))) (TIME-NUMBER (seq (rep+ DIGIT) (opt ":") (rep+ digit))) (ALPHABET-STRING (rep* LETTER)) (WHITESPACE (alt #\space #\tab #\newline)) (H-SPACE (alt #\space #\tab)) (NEWLINE #\newline) (BLANK-LINE (seq (rep* H-SPACE) NEWLINE) ) (PARAGRAPH-SEPARATOR (seq NEWLINE (rep+ BLANK-LINE))) (PUNCT (set "-=_^/.,:;!?\"()@[]|<>*+'`")) (BACKSLASH #\\) (LATEX-SPECIAL-CHAR (set "#$%&~_^{}\\")) (MATH-OP (set "+*/'<>=-")) (POS-TAB-ARG (seq "[" (rep+ (set "hbtp")) "]")) (OPT-TAB-ARG (seq "{" (rep* (set "lrc|")) "}")) (BEGIN-ENV-OPEN (seq BACKSLASH "begin" (rep* H-SPACE) "{")) (BEGIN-ENV-CLOSE (seq "}"(rep* WHITESPACE))) (BEGIN-DISPLAY-MATH "$$") (END-DISPLAY-MATH "$$") ) (scan-rules (token-text stream) (labels ((get-tex-filename-without-braces (text) "Return the tex file path designated by the token-text \"filename\". Use variable READ-TEX-DIR." (let ((dirname read-tex-dir) (filename text)) (concatenate 'string (or dirname "") filename (if (find #\. filename) "" ".tex")))) (get-tex-filename (text) "Return the tex file path designated by the token-text \"{filename}\". Use variable READ-TEX-DIR." (get-tex-filename-without-braces (latex-env-name text))) (open-list (&rest prefix) (push current-list list-stack) (setf current-list (reverse prefix))) (close-list () (let ((new-list (nreverse current-list))) (setf current-list (pop list-stack)) (push new-list current-list) new-list)) (add-as-string (text) (push text current-list)) (add-sexp (sexp) (push sexp current-list)) (clean-up-quotes () (when inline-quote-flag (setf inline-quote-flag nil) (close-list))) (list-env-p (text) (member text '("list" "exercises" "Exercises" "closeitemize" "alphlist" "Alphlist" "chapterex") :test (function string=))) (push-include (filename) (if (<= (length include-stack) *max-include-depth* ) (error "Includes nested too deeply.") (let ((infile (open filename :external-format external-format :if-does-not-exist (if (eq if-include-fails :error) :error nil)))) (if infile (case if-include-fails (:continue (goto nil)) (:error (error "Could not open include file ~S" filename)) (otherwise (return-from scan if-include-fails))) (progn (push (cons (scan-current-stream) (scan-current-buffer)) include-stack) (setf (scan-current-stream) infile) (scan-reset-current-buffer) (format *trace-output* "Reading input from include file ~S~%" filename) (goto :initial)))))) (pop-include () (when include-stack (close (scan-current-stream)) (setf (scan-current-stream) (car (car include-stack)) (scan-current-buffer) (cdr (car include-stack))) (pop include-stack) t)) (terminate-includes () (loop :while include-stack :do (pop-include))) (aparagraph-closer (closer) (lambda () (when aparagraph-flag (funcall closer) (close-list) (setf aparagraph-flag nil)))) (subsubsection-closer (closer) (lambda () (when subsubsection-flag (funcall closer) (close-list) (setf subsubsection-flag nil)))) (subsection-closer (closer) (lambda () (when subsection-flag (funcall closer) (close-list) (setf subsection-flag nil)))) (section-closer (closer) (lambda () (when section-flag (funcall closer) (clean-up-quotes) (close-list) (setf section-flag nil)))) (open-group (flag tag close-subgroup) (if flag (progn (funcall close-subgroup) (close-list) (open-list tag) nil) (progn (clean-up-quotes) (open-list tag) t)))) (unwind-protect (progn (rule (seq "\\end " (rep* H-SPACE)) (error "space after \\end can cause problems")) (rule "\\input" (goto :include)) (rule "\\include" (goto :include)) (when-state :include (rule (rep* H-SPACE) #|eat the whitespace|#) (rule (seq "{" (rep+ (set-not (alt "}" WHITESPACE))) "}") (let ((filename (get-tex-filename token-text))) (push-include filename))) (rule (rep+ filename-char) (let ((filename (get-tex-filename-without-braces token-text))) (push-include filename)))) (when-state :<> (unless (pop-include) (terminate))) ;; Dectalk abbreviations: (rule (alt "Jan." "Feb." "Mar." "Apr." "Aug." "Sep." "Oct." "Nov." "Dec.") (add-as-string token-text)) (rule "i.e." (add-sexp '(cs "ie"))) (rule "e.g." (add-sexp '(cs "eg"))) ;; Misc abbreviations: (rule (alt "Corp." "etc.") (add-as-string token-text)) ;; Initials: (rule (seq ALPHABET ".") ;; A letter followed by a period is probably an initial. (add-as-string token-text)) ;; Salutations: (rule (alt "Prof." "Mr." "Mrs." "St." "PhD." "Phd." "Dr." ) (add-as-string token-text)) ;; Days of the week: (rule (alt "Sun." "Mon." "Tue." "Wed." "Thu." "Fri." "Sat.") (add-as-string token-text)) ;; tex commands that are stripped (rule (alt "\\hspace*" "\\bigskip" "\\noalign" "\\bigl" "\\bigr" "\\biggl" "\\biggr" "\\bigggl" "\\bigggr" (seq "\\thispagestyle{" (rep* (set-not #\})) "}" ) "\\medskip" "\\advance" "\\leftskip" "\\rightskip" (seq "by" (rep+ digit) "pc") (seq "by" (rep+ digit) "pt") (seq "by" (rep+ digit) "in") "\\protect" "\\smallskip" "\\displaystyle" "\\left" "\\right" "\\boldmath" "\\unboldmath" "\\raggedright" "\\vfil" "\\vfill" "\\hfil" "\\hfill" "\\scriptstyle" (seq "\\vskip" (rep* H-space) (opt (seq (rep+ digit) "pt"))) (seq "\\hspace*" (rep* H-space) (opt (seq (rep+ digit) "in"))) (seq "\\vspace*" (rep* H-space) (opt (seq (rep+ digit) "pt"))) (seq "\\vspace*" (rep* H-space) (opt (seq (rep+ digit) "in"))) (seq "\\kern*" (rep* H-space) (opt (seq (rep+ digit) "pt"))) "\\eject" "\\maketitle" "\\noindent" "\\goodbreak" "\\," "\\!" "\\:" "\\" "\\hline" "\\ " "\\/" "\\mathstrut" "\\pagebreak" "\\linebreak" ) #|nothing|#) (rule "\\@" (add-sexp '(cs "@"))) (rule "\\ul" (add-sexp '(cs "em"))) ;; Some special verb commands (rule "\\verb|[|" (add-as-string "[")) (rule "\\verb|{|" (add-as-string "{")) ;; special tex and latex characters */ (rule (alt "--" "---") (add-as-string token-text)) (rule "~" (add-as-string " ")); or ignore? (rule (alt "\\cr" "\\headrow" "\\newrow" (seq BACKSLASH BACKSLASH)) (if (or (plusp table-count) (plusp array-count)) (progn (close-list) ; End current array element. (close-list) ; End current row. (open-list) ; Now start next row (open-list))) ; and start its first element (add-sexp '(newline))) (rule (seq backslash latex-special-char) ;; print it after stripping off the backslash (add-as-string (escape-quotes (subseq token-text 1)))) (rule "&" (if (or (plusp table-count) (plusp array-count)) (progn (close-list) (open-list)) ;; output field separator so that we can handle tex matrix command (add-sexp '(field-separator)))) (when-state :math (rule "^" (add-as-string "^")) (rule "_" (add-as-string "_"))) ;; Skip pictures (rule (seq BEGIN-ENV-OPEN "picture" BEGIN-ENV-CLOSE) (goto :picture)) (rule (seq (rep* whitespace) backslash "end{picture}" (rep* h-space)) (goto nil)) (when-state :picture (rule (rep* (any)) #|ignore|#)) ;; kluges for handling macro definitions (rule (seq backslash "def" (rep* whitespace) backslash (rep* letter)) ;; handle macro definitions (add-sexp `(cs "def" ,(get-macro-name token-text))) ;; start arg list (open-list 'arglist) (goto :macro)) (rule (seq "#" digit) ;; Convert #1 to arg1 etc. (add-as-string (format nil "arg~C" (aref token-text 1)))) (when-state :macro (rule "{" ;; ending of arg list seen so mark it and remember (close-list) (open-list 'block) (goto nil))) ;; braces start groups (when-state :math (rule "{" (incf brace-count) (open-list 'subformula))) (rule "{" (incf brace-count) (open-list 'block)) (rule "}" (decf brace-count) (close-list) (when (zerop brace-count) ;; reset state to math if necessary (case saved-math-state ((nil)) ((:inline-math) (setf inline-math-flag t saved-math-state nil) (goto :math)) ((:display-math) (setf display-math-flag t saved-math-state nil) (goto :math)) (otherwise (error "Unknown saved-math-state ~S" saved-math-state))))) ;; hbox and mbox change state (rule (alt (seq backslash "fbox" (rep* h-space) "{") (seq backslash "hbox" (rep* h-space) "{") (seq backslash "mbox" (rep* h-space) "{")) (setf brace-count 1) ; reset brace count ;; When brace_count reaches 0 we have seen the ;; matching close brace and can close the hbox. ;; Both hbox and mbox marked as mbox and will be ;; processed as if they were user defined macros ;; ie: using define-text-object (clean-up-quotes) ;; first set up state (cond (display-math-flag (setf display-math-flag nil inline-math-flag nil ; kludge saved-math-state :display-math) (goto nil)) (inline-math-flag (setf display-math-flag nil ; kludge inline-math-flag nil saved-math-state :inline-math) (goto nil))) (add-sexp `(cs "mbox")) (open-list 'block)) ;; Brackets in latex (when-state :math (rule "[" (add-as-string "[")) (rule "]" (add-as-string "]"))) (rule "[" ; optional args in a block (open-list 'block)) (rule "]" (close-list)) ;; paragraph breaks (rule (alt (seq backslash "par") (seq backslash "paragraph") PARAGRAPH-SEPARATOR) ;; Don't put parbreaks inside math. (unless (or inline-math-flag display-math-flag) (clean-up-quotes) ;; Paragraph delimiter is a newline followed by ;; an arbitrary number blank lines, where a ;; blank is defined as a line with an arbitrary ;; amount of optional h-space followed by a ;; newline. Close apar if one opened. (when aparagraph-flag (close-list) (setf aparagraph-flag nil)) (add-sexp 'parbreak))) ;; Beginning and ending math mode (rule "\\[" (clean-up-quotes) (setf display-math-flag t) (open-list 'display-math) (goto :math)) (rule "\\]" (setf display-math-flag nil) (close-list) (goto nil)) (rule "$$" (when inline-math-flag (error "Display math started inside inline math? Probably an inline math was closed and immediately opened. Check the latex file.")) (if display-math-flag (progn (setf display-math-flag nil) (close-list) (goto nil)) (progn (clean-up-quotes) (setf display-math-flag t) (open-list 'display-math) (goto :math)))) (rule "\\(" (setf inline-math-flag t) (open-list 'inline-math) (goto :math)) (rule "\\)" (setf inline-math-flag nil) (close-list) (goto :math)) (rule "$" (if inline-math-flag (progn (setf inline-math-flag nil) (close-list) (goto nil)) (progn (setf inline-math-flag t) (open-list 'inline-math) (goto :math)))) ;; Math operators (when-state :math (rule "'" ; catch single quote in math mode (add-as-string "prime")) (rule (seq "''" (rep* h-space)) ; catch double prime in math mode (add-as-string "double-prime")) (rule MATH-OP (add-as-string token-text))) ;; tex comment (rule (seq "%" (rep* (any))) ; Latex comments run to the end of the line. ;; Not doing anything with comments, so throw them away. ;; (open-list 'comment) ;; (add-as-string (escape-quotes token-text)) ;; (close-list) ) ;; begin various environments (rule (seq begin-env-open (alt "document" "abstract" "center") begin-env-close) (open-list (intern (latex-env-name token-text)))) (rule (seq begin-env-open (alt "quote" "quotation" "verbatim") begin-env-close) (clean-up-quotes) (open-list (intern (latex-env-name token-text)))) (rule (seq begin-env-open (alt "description" "deflist" "enumerate" "itemize") begin-env-close) (clean-up-quotes) (incf list-environment-count) (open-list (intern (latex-env-name token-text))) ;; Generate dummy item ;; this is to allow \item to be handled cleanly (open-list 'item)) (rule (seq (rep* whitespace) backslash "item") (clean-up-quotes) ;; begin a new item after ending previous item (close-list) (when (zerop list-environment-count) (error "An item was found outside known list environment.")) ;; Note this is a quick fix, ;; and will leave a null list as the first item of each enumerated list (open-list 'item)) ;; begin equation (rule (seq begin-env-open "equation" begin-env-close) (clean-up-quotes) (open-list 'equation) (setf display-math-flag t) (goto :math)) ;; begin eqnarray (rule (seq begin-env-open (alt "eqnarray*" "eqnarray" "eqalign" "eqalign*") begin-env-close) ;; starting an eqnarray or an eqalign (incf array-count) (setf display-math-flag t) (goto :math) (open-list (intern (string-trim "*" (latex-env-name token-text)))) (open-list) ; start the first eqnarray row (open-list)) ; start the first eqnarray element (rule (seq begin-env-open (opt "tabular" "array") "}" (opt POS-TAB-ARG) (opt OPT-TAB-ARG) (rep* WHITESPACE)) (clean-up-quotes) (let ((op (intern (string-trim "*" (latex-env-name token-text))))) (if (eq op 'tabular) (incf table-count) (incf array-count)) (open-list op)) (open-list) ; start the first table row (open-list)) ; start the first table element ;; begin cases ;; Cases handled like table environment. Allow for text ;; inside math mode by saving state. If this works, use ;; similar approach for mbox hbox etc. (rule (seq begin-env-open "cases" "}" (opt POS-TAB-ARG) (rep* WHITESPACE)) (clean-up-quotes) (cond (display-math-flag (setf display-math-flag nil saved-math-state :display-math) (goto nil)) (inline-math-flag (setf inline-math-flag nil saved-math-state :inline-math) (goto nil))) (incf table-count) (open-list 'cases) (open-list) ; start the first table row (open-list)) ; start the first table element (rule (alt (seq (rep* blank-line) begin-env-open "slide}{}") (seq begin-env-open "slide" begin-env-close)) (clean-up-quotes) ;; starting a latex slide (open-list 'slide)) (rule (seq begin-env-open "displaymath" begin-env-close) (clean-up-quotes) (setf display-math-flag t) (open-list 'display-math) (goto :math)) ;; unrecognized environment (rule (alt (seq begin-env-open alphabet-string "}" (opt pos-tab-arg) (opt pos-tab-arg)) (seq begin-env-open (rep* (set-not "}")) "}")) ; environment names can have more that alphabets (clean-up-quotes) ;; Some new environments maybe declared as ;; enumerable by adding their name to ;; list_env_names. Handle this by checking if the ;; env name present in the table. (let ((name (latex-env-name token-text))) (if (list-env-p name) (progn (open-list 'new-environment name) ;; expect items in this env (incf list-environment-count) ;; generate dummy item ;; this is to allow \item to be handled cleanly (open-list 'item)) (open-list 'new-environment name)))) (rule (seq (rep* whitespace) backslash (alt "grieschapter" "chapter*" "chapterx" "chapter")) (setf chapter-flag (open-group chapter-flag 'chapter (section-closer (subsection-closer (subsubsection-closer (lambda ()))))))) (rule (seq (rep* whitespace) backslash (alt "griessection" "section*" "section")) (setf section-flag (open-group section-flag 'section (subsection-closer (subsubsection-closer (lambda ())))))) (rule (seq (rep* whitespace) backslash (alt "subsection*" "subsection")) (setf subsection-flag (open-group subsection-flag 'subsection (subsubsection-closer (lambda ()))))) (rule (seq (rep* whitespace) backslash (alt "subsubsection*" "subsubsection")) (setf subsubsection-flag (open-group subsubsection-flag 'subsubsection (lambda ())))) ;; absolute sectioning constructs (rule (seq (rep* whitespace) backslash "achapter") (setf chapter-flag (open-group chapter-flag 'achapter (section-closer (subsection-closer (subsubsection-closer (aparagraph-closer (lambda ())))))))) (rule (seq (rep* whitespace) backslash "asection") (setf section-flag (open-group section-flag 'asection (subsection-closer (subsubsection-closer (aparagraph-closer (lambda ()))))))) (rule (seq (rep* whitespace) backslash "asubsection") (setf subsection-flag (open-group subsection-flag 'asubsection (subsubsection-closer (aparagraph-closer (lambda ())))))) (rule (seq (rep* whitespace) backslash "asubsubsection") (setf subsubsection-flag (open-group subsubsection-flag 'asubsubsection (aparagraph-closer (lambda ()))))) (rule (seq (rep* whitespace) backslash "apar") (setf aparagraph-flag (open-group aparagraph-flag 'apar (lambda ())))) ;; end various environments (rule (seq (rep* whitespace) backslash "end{abstract}" (rep* h-space)) (clean-up-quotes) (when aparagraph-flag (close-list) (setf aparagraph-flag nil)) (close-list)) (rule (seq (rep* whitespace) backslash "end{" (alt "center" "quote" "quotation") "}" (rep* h-space)) (clean-up-quotes) (close-list)) (rule (seq (rep* whitespace) backslash "end{equation}" (rep* h-space)) (clean-up-quotes) (close-list) (setf display-math-flag nil) (goto nil)) (rule (seq (rep* whitespace) backslash "end{" (alt "eqnarray*" "eqnarray" "eqalign*" "eqalign") "}" (rep* h-space)) (decf array-count) (close-list) (close-list) (close-list) (setf display-math-flag nil) (goto nil)) (rule (seq (rep* whitespace) backslash "end{array}" (rep* h-space)) (decf array-count) (close-list) (close-list) (close-list)) (rule (seq (rep* whitespace) backslash "end{tabular}" (rep* h-space)) (decf table-count) (close-list) (close-list) (close-list)) ;; Cases handled like tabular (rule (seq (rep* whitespace) backslash "end{tabular}" (rep* h-space)) (decf table-count) (close-list) (close-list) (close-list) (case saved-math-state ((nil)) ((:inline-math) (setf inline-math-flag t saved-math-state nil) (goto :math)) ((:display-math) (setf display-math-flag t saved-math-state nil) (goto :math)) (otherwise (error "Unknown saved-math-state ~S" saved-math-state)))) (rule (seq (rep* whitespace) backslash "end{" (alt "enumerate" "description" "itemize") "}" (rep* h-space)) (clean-up-quotes) (decf list-environment-count) (close-list) (close-list)) (rule (seq (rep* whitespace) backslash "end{document}") (clean-up-quotes) (when aparagraph-flag (setf aparagraph-flag nil) (close-list)) (when subsubsection-flag (setf subsubsection-flag nil) (close-list)) (when subsection-flag (setf subsection-flag nil) (close-list)) (when section-flag (setf section-flag nil) (close-list)) (when chapter-flag (setf chapter-flag nil) (close-list)) (close-list)) (rule (seq (rep* whitespace) backslash "end{slide}") (clean-up-quotes) (close-list)) (rule (seq (rep* whitespace) backslash "end{displaymath}") (close-list) (setf display-math-flag nil) (goto nil)) (rule (seq backslash "end{" (alt (seq alphabet-string) (seq (rep* (set-not "}")))) (opt "*") "}") (clean-up-quotes) (let ((name (latex-env-name token-text))) (if (list-env-p name) (progn (decf list-environment-count) (close-list) (close-list)) (close-list)))) ;; tex control sequences eg macro names (rule (seq backslash (set-not a-z)) ;; handle single characters with a backslash in front eg \. etc. (add-as-string (escape-quotes (subseq token-text 1)))) (when-state :math (rule (seq backslash (rep* alphabet)) (open-list 'math-cs (subseq token-text 1)) (close-list))) (rule (seq backslash (rep* alphabet)) (open-list 'cs (subseq token-text 1)) (close-list)) ;; be smart about numbers (when-state :math (rule (seq backslash math-number) (open-list 'math-number token-text) (close-list))) (rule time-number ;; Dectalk speaks time numbers correctly (add-as-string token-text)) (rule text-number (open-list 'text-number token-text) (close-list)) ;; words handled according to mode (when-state :math (rule (seq "''" (rep* h-space)) (add-as-string "double-prime"))) (rule (alt alphabet (seq alphabet (rep* letter) (opt "'") (rep+ alphabet))) (if (or inline-math-flag display-math-flag) ;; In math mode, the string should be broken up ;; into strings of one character ie "a+b" is "a" ;; "+" "b" since TeX allows for only plain ;; single letter variables (loop :for ch :across token-text :do (add-as-string (string ch))) ;; Convert text to strings. Not escape quotes ;; since umlaut now handled as a letter if this ;; causes trouble, reintroduce escape_quote as ;; in. (add-as-string token-text))) (rule (seq (rep* h-space) "\"") (if inline-quote-flag (error "Unmatched escaped quote") (progn (setf inline-quote-flag t) (open-list 'inline-quote)))) (rule (seq (rep* h-space) "``") (unless inline-quote-flag (setf inline-quote-flag t) (open-list 'inline-quote))) (rule (seq (alt "\"" "''") (rep* h-space)) ;; matching " here is a concession (if inline-quote-flag (progn ; Marking matched inline-quote (add-as-string "''") (close-list) (setf inline-quote-flag nil)) (progn ; This does not match a quotation, so just put it in the text: (add-as-string (escape-quotes token-text))))) (rule PUNCT (add-as-string (escape-quotes token-text))) (rule (rep* h-space) (add-as-string token-text)) ;; Trap things that are not caught and echo to stderr (rule (alt control-m control-l) #|ignore|#) (rule "#" (add-as-string "#")) (rule (any) (error "This escaped ~S" token-text))) (terminate-includes)))))) (defun read-token (stream) (let ((ch (read-char stream))) (case ch ((#\\)) ((#\%)) (otherwise (loop :with text = (make-array 4 :element-type 'character :adjustable t :fill-pointer 0) :while (and (char/= #\\ ch) (char/= #\% ch)) :do (vector-push-extend ch text (length text)) :finally (unread-char ch stream) (return text)))))) #+testing (with-open-file (latex #-(and) #P"~/library/informatique/standards-and-protocol/cplusplus-draft/source/grammar.tex" #P"/Volumes/USER/srv/books/informatics/standards/cplusplus-draft/source/grammar.tex") (loop :for token = (read-token latex) :while token :do (print token))) #+testing (defun getenv (var) #+clisp (ext:getenv var) #+ccl (ccl:getenv var) #-(or clisp ccl) (error "Please implement getenv in ~A" (lisp-implementation-type))) #+testing (scan stream (getenv "READ_TEX_DIR"))