#!/bin/sh
exec ${GUILE-/usr/bin/guile} -e '(scripts twerp2texi)' -s $0 "$@" # -*- scheme -*-
!#
;;; twerp2texi --- Process .twerp to make .texi

;;	Copyright (C) 2002,2003 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
;; Boston, MA 02111-1307 USA
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Usage: twerp2texi -I INDEX [...] [-d DEPS] [-o OUTFILE] TWERP
;;
;; Process TWERP (a Texi With Eval-Requiring Predelictions file) and
;; write the result to OUTFILE, typically named w/ .texi extension, or
;; stdout if "-o OUTFILE" is not specified.  Processing requires one or
;; more index files prepared by "twerp2texi --make-index" (see above);
;; it is an error to omit "-I INDEX".
;;
;; * Options
;;
;;   At this time there is only one additional option: "-d DEPS" means
;;   write to the file DEPS a makefile fragment suitable for "include
;;   DEPS".  This fragment is typically named w/ extension .Ptexi to
;;   indicate the prerequisites (aka dependencies) of OUTFILE determined
;;   during twerp2texi operation (and not before!).  Note that DEPS only
;;   lists source files w/ no mention of .doc files.  Dependency
;;   tracking requires you to prepare the Makefile in a special way; see
;;   below for more info on "twerp2texi --prep".
;;
;; * Twerp Processing
;;
;;   A .twerp file is mostly texinfo, w/ a some directives thrown in.
;;   At this time, the entire directive must be on one line.  This
;;   restriction will probably be lifted in the future.
;;
;;   In its simplest form a directive names a single symbol:
;;
;;      @twerpdoc (SYMBOL)
;;
;;   This is replaced by the header
;;
;;      @deffn {Scheme Procedure} SYMBOL SIG ...
;;
;;   followed by text associated with SYMBOL from the appropriate .doc
;;   file.  The text is inserted verbatim.  SIG is also taken from the
;;   .doc file.
;;
;;   The list following the "@twerpdoc" directive can name additional
;;   symbols or sublists beginning w/ `C'.  These inherit the SIG and
;;   are formatted appropriately[1], depending on whether the language
;;   is Scheme (default) or C.  For example, the directive:
;;
;;      @twerpdoc (acons (C scm_acons))
;;
;;   results in the header:
;;
;;      @deffn {Scheme Procedure} acons key value alist
;;      @deffnx {C Function} scm_acons (key, value, alist)
;;
;;   Scheme macros are likewise handled with @twerpmacdoc.  For example:
;;
;;      @twerpmacdoc (begin-thread)
;;
;;   produces header:
;;
;;      @deffn {Scheme Macro} begin-thread first . rest
;;
;;   followed by the snarfed documentation.
;;
;;   [1] Style issues like what is "appropriate" are not yet finalized.
;;       There is much room for generalization here.
;;
;;   To include commentary from a module, use a "commentary directive"
;;   followed by a sexp of the form: (STYLE MODULE-NAME).  STYLE specifies
;;   STYLE specifies how to handle the raw text of the commentary, and
;;   MODULE-NAME specifies which module to consult.  For example:
;;
;;      @twerpcommentary (example (ice-9 documentation))
;;
;;   Currently the following styles are supported, corresponding to the
;;   same-named texinfo commands for enclosing blocks of text:
;;
;;      verbatim, quotation, example, smallexample,
;;      display, smalldisplay, format, smallformat
;;
;;   For example (literally), the `example' style results in something
;;   like the following to be generated:
;;
;;      @example
;;      COMMENTARY TEXT
;;      @end example
;;
;;   All styles except for `verbatim' also perform "@"-escaping for
;;   at-sign, left-curly-brace and right-curly-brace ("@", "{" and "}",
;;   respectively) to prevent texinfo misinterpretation.  You can
;;   inhibit this processing with the name pure-STYLE, for example
;;   `pure-example'.
;;
;;   You can specify a particular path to use for module searching
;;   with a setsearchpath directive, which takes a sexp: (DIR DIR...).
;;   For example:
;;
;;      @twerpsetsearchpath ("..")
;;
;;   This sets the search path to simply the parent directory.  This
;;   path is actually used as a prefix; if the search fails for the
;;   specified path, the normal `%load-path' is then searched.
;;
;; * Errors
;;
;;   The indexes are read first.  If there is an error during this
;;   process, the output file is not opened.  After index reading, any
;;   kind of error results in the output file being deleted (if not to
;;   stdout).
;;
;; * More Twerp Processing
;;
;;   All directives beginning w/ "@twerp" are reserved for future use.
;;
;;
;; TODO: Handle multi-line directives.
;;       Clean up error handling.
;;       Make `keep-going-on-missing-tag?' optional.

;;; Code:

(define-module (scripts twerp2texi)
  :autoload (scripts PROGRAM) (HVQC-MAIN)
  :autoload (ice-9 common-list) (some)
  :autoload (ice-9 rdelim) (read-line write-line)
  :autoload (ice-9 regex) (match:suffix)
  :autoload (ice-9 rw) (read-string!/partial)
  :autoload (scripts split-string-no-nulls) (split-string-no-nulls)
  :autoload (srfi srfi-13) (string-join string-delete string-index)
  :autoload (srfi srfi-14) (char-set)
  :autoload (ice-9 documentation) (file-commentary)
  ;; nothing for now
  ;; :export (twerp2texi)
  )

(define put set-object-property!)
(define get object-property)

(define check-collision? #f)            ; for now

(define (hasherr!! dir file tag tv-cdr cur)
  (format (current-error-port)
         "ERROR: hash collision: ~A\nnew: ~A/~A ~A\ncurrent: ~A\n"
         tag
         dir file tv-cdr
         cur)
  (throw 'hash-collision))

(define (read-one-index ht dir port)
  (for-each (lambda (file-info)
              (let ((file (car file-info)))
                (for-each (lambda (tag-info)
                            (let ((tag (car tag-info))
                                  (tv-cdr (cdr tag-info)))
                              (and check-collision?
                                   (hashq-ref ht tag)
                                   (not (equal? (cdr (hashq-ref ht tag))
                                                tv-cdr))
                                   (hasherr!! dir file tag tv-cdr
                                              (hashq-ref ht tag)))
                              (hashq-set! ht tag (cons (cons dir file)
                                                       tv-cdr))))
                          (cdr file-info))))
            (read port)))

(define (read-ext-indexes ports)
  (let ((ht (make-hash-table 1031)))
    (for-each (lambda (port)
                (read-one-index ht (get port 'dirname) port))
              ports)
    ht))

;;; {tag-info extraction}

(define (tv:dir tv)
  (caar tv))

(define (tv:file tv)                    ; "tag value" field extraction
  (in-vicinity (tv:dir tv) (cdar tv)))

(define (tv:sig-raw tv)
  (cadr tv))

(define (tv:sig-parts tv)
  (split-string-no-nulls (cadr tv)))

(define (tv:offsets tv)
  (cddr tv))

;;; {dependencies}

;; Return a proc WARD that knows how to write to MKFRAG (a filename) the
;; source dependencies of CHILD (the input filename) collected over standard
;; twerp2texi processing.  The output is in standard makefile format.  WARD
;; takes a command symbol and additional args, performing different tasks
;; depending on the command.
;;
;;  note! dep -- Add DEP to the list of dependencies.
;;               This command can be called many times.
;;
;;  write!    -- Write the collected dependencies to MKFRAG.
;;               This command can only be called once; further
;;               calls signal an error.
;;
;; For more info on why things are done in this way, see Automake info pages,
;; specifically the philosophy for the "depcomp" program.  (Eventually,
;; twerp2texi or some more generalized cousin will probably be merged into
;; automake, proper.  Probably this should be made into its own module...)
;;
(define (dep-ward child mkfrag)
  (let* ((deps '())
         (note! (lambda (dep)
                  (or (member dep deps)
                      (set! deps (cons dep deps)))))
         (write! (lambda ()
                   (cond ((not deps)
                          (error "write! called multiply!"))
                         ((null? deps)) ; do nothing
                         (else
                          (let ((p (open-file mkfrag OPEN_WRITE)))
                            (format p "~A :" (basename child))
                            (for-each (lambda (dep)
                                        (format p " \\\n ~A" dep))
                                      deps)
                            (newline p))))
                   (set! deps #f))))
    ;; retval
    (lambda (command . args)
      (case command
        ((note!) (note! (car args)))
        ((write!) (write!))))))

(define file-port-cache '())            ; todo: s/alist/weak-hash/

(define (file-frag tv ward)
  (let* ((file (tv:file tv))
         (offsets (tv:offsets tv))
         (port (or (assoc-ref file-port-cache file)
                   (let ((port (open-file file OPEN_READ)))
                     (set! file-port-cache
                           (acons file port file-port-cache))
                     port))))
    (seek port (car offsets) SEEK_SET)
    (let* ((len (- (cadr offsets) (car offsets)))
           (s (make-string len)))
      (or (= len (read-string!/partial s port))
          (error "lame (non-robust) file-frag implementation!"))
      (cond (ward
             (or (char=? #\soh (read-char port)) ; control-A
                 (error "corrupt .doc file!"))
             (ward 'note!
                   (let ((mystery (car (split-string-no-nulls
                                        (read-line port) ":[]")))
                         (hint (tv:dir tv)))
                     (if (char=? #\/ (string-ref mystery 0))
                         mystery
                         (string-append hint "/" mystery))))))
      s)))

(define (read-string s)
  (or (false-if-exception (with-input-from-string s (lambda () (read))))
      (throw 'twerp2texi-error 'bad-directive-data)))

(define (mapconcat proc ls sep)
  (string-join (map proc ls) sep))

(define (>> . args)
  (for-each display args))

;; Each entry in handlers is a triple (see `add' below):
;;
;;   MATCH? ARG-DISCIPLINE BACKEND
;;
;; MATCH? is a procedure that takes a line of input (string) and
;; performs some kind of regexp-exec on it, returning that value.
;; ARG-DISCIPLINE is a symbol, either `stylized-lookup' or `sexp'.
;; If ARG-DISCIPLINE is `stylized-lookup', then BACKEND should be
;; a procedure that accepts four arguments: tv full tag ward.  If
;; ARG-DISCIPLINE is `sexp', then BACKEND should be a procedure
;; that handles the sexp read from the remainder of the line
;; (after the @twerpFOO keyword).

(define *handlers*
  '())

(define (add key arg-discipline handler)
  (let ((rx (make-regexp (format #f "^@twerp~A" key))))
    (set! *handlers* (cons (list (lambda (line)
                                   (regexp-exec rx line))
                                 arg-discipline
                                 handler)
                           *handlers*))))

(add 'doc
     'stylized-lookup
     (let* (([]-set (char-set #\[ #\]))
            (del-[] (lambda (s) (string-delete s []-set)))
            (zonk-ellipsis (lambda (ls)
                             (if (null? ls)
                                 ls
                                 (let ((r (reverse ls)))
                                   (if (string=? "..." (car r))
                                       (reverse (cdr r))
                                       ls))))))
       (lambda (tv full tag ward)
         (define (extra ls tv)
           (if (null? ls)
               ""
               (string-append
                (mapconcat
                 (lambda (spec)
                   (cond ((symbol? spec)
                          (string-append
                           "@deffnx {Scheme Procedure} "
                           (symbol->string spec)
                           " "
                           (tv:sig-raw tv)))
                         ((eq? 'C (car spec))
                          (mapconcat
                           (lambda (sym)
                             (string-append
                              "@deffnx {C Function} "
                              (symbol->string sym)
                              " ("
                              (string-join (zonk-ellipsis
                                            (map del-[] (tv:sig-parts tv)))
                                           ", ")
                              ")"))
                           (cdr spec)
                           "\n"))
                         (else
                          (error "bad make-extra stuff!"))))
                 ls
                 "\n")
                "\n")))
         (>>
          ;; be nice to humans
          "@c " (tv:file tv) "\n"
          ;; header(s)
          "@deffn {Scheme Procedure} " tag " " (tv:sig-raw tv) "\n"
          (extra (cdr full) tv)
          ;; doc
          (file-frag tv ward)
          "@end deffn\n"))))

(add 'macdoc
     'stylized-lookup
     (lambda (tv full tag ward)
       (>>
        ;; be nice to humans
        "@c " (tv:file tv) "\n"
        ;; header
        "@deffn {Scheme Macro} " tag " " (tv:sig-raw tv) "\n"
        ;; doc
        (file-frag tv ward)
        "@end deffn\n")))

(define *search-path* #f)

(add 'setsearchpath
     'sexp
     (lambda (x)
       (set! *search-path* x)))

(define (module-name->filename-fragment ls)
  (string-append                        ; yuk!
   (symbol->string (car ls))
   (if (null? (cdr ls))
       ""
       (string-append                   ; double yuk!
        "/"
        (module-name->filename-fragment (cdr ls))))))

(define texinfo-quote
  (let ((dangerous "{}@"))
    (lambda (s)
      (let* ((cs (cond ((char-set? dangerous) dangerous)
                       (else (set! dangerous (string->char-set dangerous))
                             dangerous)))
             (holes (let loop ((hole (string-index s cs 0))
                               (acc '()))
                      (if (not hole)
                          acc
                          (loop (string-index s cs (1+ hole))
                                (cons hole acc))))))
        (if (null? holes)
            s                           ; optimization retval
            (let* ((len (string-length s))
                   (hole-count (length holes))
                   (new-s (make-string (+ len hole-count))))
              (let ((end (car holes)))
                (string-copy! new-s (+ end hole-count) s end len))
              (let loop ((holes holes)
                         (offset (1- hole-count)))
                (if (> 0 offset)
                    new-s               ; normal retval
                    (let* ((end (car holes))
                           (beg (if (= 0 offset) 0 (cadr holes))))
                      (string-copy! new-s (+ beg offset) s beg end)
                      (string-set! new-s (+ end offset) #\@)
                      (loop (cdr holes)
                            (1- offset)))))))))))

(add 'commentary
     'sexp
     (lambda (form)
       (let* ((style (car form))
              (tag (let ((s (symbol->string style)))
                     (if (and (< 5 (string-length s))
                              (string=? "pure-" (substring s 0 5)))
                         (substring s 5)
                         s)))
              (name (cadr form)))
         (cond ((search-path (or *search-path* %load-path)
                             (module-name->filename-fragment name)
                             '(".scm" ""))
                => (lambda (file)
                     (>> "@" tag "\n"
                         (case style
                           ((verbatim)
                            (file-commentary file))
                           ((pure-quotation pure-example pure-smallexample
                                            pure-display pure-smalldisplay
                                            pure-format pure-smallformat)
                            (file-commentary file))
                           ((quotation example smallexample
                                       display smalldisplay
                                       format smallformat)
                            (texinfo-quote (file-commentary file)))
                           (else
                            (error "bad @twerpcommentary style:" style)))
                         "@end " tag "\n")))
               (else
                (>> "[could not find commentary for " name "\n"
                    " *search-path* => " *search-path* "\n"))))))

(define keep-going-on-missing-tag? #t)  ; for now

(define (process inport index ward)
  (let loop ((line (read-line inport)))
    (or (eof-object? line)
        (begin
          (or (some (lambda (triple)
                      (let ((match? (car triple))
                            (arg-discipline (cadr triple))
                            (handle (caddr triple)))
                        (cond ((match? line)
                               => (lambda (m)
                                    (let* ((full (read-string (match:suffix m)))
                                           (tag (car full)))
                                      ;; be nice to humans
                                      (>> "@c " line "\n")
                                      (case arg-discipline
                                        ((sexp)
                                         (handle full))
                                        ((stylized-lookup)
                                         (cond ((hashq-ref index tag)
                                                => (lambda (tv)
                                                     (handle tv full tag ward)))
                                               (keep-going-on-missing-tag?
                                                (>> "[NOTE: docs missing"
                                                    " for " tag "]\n\n"))
                                               (else
                                                (throw 'twerp2texi-error
                                                       "no such tag: ~A" tag))))))))
                              (else #f))))
                    *handlers*)
              (write-line line))
          (loop (read-line inport))))))

;; maybe later
;; (define (twerp2texi ...) ...)

(define (twerp2texi/qop qop)
  (let* ((index (let ((res (qop 'index-file)))
                  (or res (error "must specify one or more indexes!"))
                  (read-ext-indexes
                   (map (lambda (file)
                          (let ((port (open-file file OPEN_READ)))
                            (put port 'dirname (dirname file))
                            port))
                        (if (list? res) res (list res))))))
         (infile (or (false-if-exception (car (qop '())))
                     (error "no input file specified")))
         (inport (open-file infile OPEN_READ))
         (outfile (or (qop 'output-file) #f))
         (outport (if outfile
                      (open-file outfile OPEN_WRITE)
                      (current-output-port)))
         (ward (qop 'write-deps
                    (lambda (mkfrag)
                      (dep-ward outfile mkfrag)))))
    (catch #t
           (lambda ()
             (write-line "@c generated file -- do not edit!" outport)
             (set-current-output-port outport)
             (process inport index ward)
             (for-each close-port (list inport outport)))
           (lambda (type . args)
             (cond (outfile
                    (close-port outport)
                    (delete-file outfile)
                    (format (current-error-port)
                            "twerp2texi: error caught, deleted ~A\n"
                            outfile)))
             ;; re-throw
             (scm-error type #f (car args) (cdr args) #f)))
    (and ward (ward 'write!)))
  #t)

(define (main args)
  (HVQC-MAIN args twerp2texi/qop
             '(usage . commentary)
             '(package . "Guile")
             '(option-spec (index-file (single-char #\I)
                                       (merge-multiple? #t)
                                       (value #t))
                           (output-file (single-char #\o)
                                        (value #t))
                           (write-deps (single-char #\d)
                                       (value #t)))))

;;; twerp2texi ends here
