#!/bin/sh
exec ${GUILE-/usr/bin/guile} -e '(scripts doc-snarf)' -s $0 "$@" # -*- scheme -*-
!#
;;; doc-snarf --- Extract documentation from source files

;; 	Copyright (C) 2001,2003 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
;; Boston, MA 02111-1307 USA
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Martin Grabmueller

;;; Commentary:

;; Usage: doc-snarf [OPTIONS] FILE
;;
;; This program reads in a Scheme source file and extracts docstrings
;; in the format specified below.  Additionally, a procedure protoype
;; is inferred from the procedure definition line starting with
;; (define... ).
;;
;; Currently, three output modi are implemented: plaintext, texinfo
;; and GDFv1.  Default is plaintext, texinfo can be switched on with the
;; `--texinfo, -t' command line option, and GDFv1 with `--dot-doc, -D'.
;;
;; Format: A docstring can span multiple lines and a docstring line
;; begins with `;; ' (two semicoli and a space). A docstring is ended
;; by either a line beginning with (define ...) or one or more lines
;; beginning with `;;-' (two semicoli and a dash). These lines are
;; called `options' and begin with a keyword, followed by a colon and
;; a string.
;;
;; Additionally, "standard internal docstrings" (for Scheme source) are
;; recognized and output as "options".  The output formatting is likely
;; to change in the future.
;;
;; Example:

;; This procedure foos, or bars, depending on the argument @var{braz}.
;;-Author: Martin Grabmueller
(define (foo/bar braz)
  (if braz 'foo 'bar))

;;; Which results in the following docstring if texinfo output is
;;; enabled:
#!
foo/bar
@deffn procedure foo/bar braz
This procedure foos, or bars, depending on the argument @var{braz}.
@c Author: Martin Grabmueller
@end deffn
!#

;;; Or in this if plaintext output is used:
#!
Procedure: foo/bar braz
This procedure foos, or bars, depending on the argument @var{braz}.
;; Author: Martin Grabmueller
^L
!#

;; The special option `sig' can be used to override the inferred
;; signature of a procedure, when GDFv1 output format is selected.
;; For example:
;;
;;   ;; Do something w/ @var{a} and optional arg @var{b}.
;;   ;;-sig: (a [b])
;;   (define (my-proc a . rest) ...)
;;
;; would yield signature: (my-proc a [b]).
;;
;; TODO: Convert option lines to alist.
;;       More parameterization.
;;       Integrate `process-file' better.

(define doc-snarf-version "0.0.4")      ; update before publishing!

;;; Code:

(define-module (scripts doc-snarf)
  :autoload (scripts PROGRAM) (HVQC-MAIN)
  :use-module (ice-9 getopt-long)
  :use-module (ice-9 regex)
  :use-module (ice-9 string-fun)
  :use-module (ice-9 rdelim)
  :export (doc-snarf))

(define command-synopsis
  '((version (single-char #\v) (value #f))
    (help    (single-char #\h) (value #f))
    (output  (single-char #\o) (value #t))
    (texinfo (single-char #\t) (value #f))
    (dot-doc (single-char #\D) (value #f))
    (lang    (single-char #\l) (value #t))))

;; Display version information and exit.
;;-ttn-mod: use var
(define (display-version)
  (display "doc-snarf ") (display doc-snarf-version) (newline))

;; Display the usage help message and exit.
;;-ttn-mod: change option "source" to "lang"
(define (display-help)
  (display "Usage: doc-snarf [options...] inputfile\n")
  (display "  -h, --help              Show this usage information\n")
  (display "  -v, --version           Show version information\n")
  (display
   "  --output=FILE, -o       Specify output file [default=stdout]\n")
  (display "  -t, --texinfo           Format output as texinfo\n")
  (display "  -D, --dot-doc           Format output as GDFv1 .doc\n")
  (display "  -l, --lang=[c,scheme]   Specify the input language\n"))

;; Main program.
;;-ttn-mod: canonicalize lang
(define (doc-snarf . args)
  (let ((options (getopt-long (cons "doc-snarf" args) command-synopsis)))
    (let ((help-wanted (option-ref options 'help #f))
	  (version-wanted (option-ref options 'version #f))
	  (texinfo-wanted (option-ref options 'texinfo #f))
          (dot-doc-wanted (option-ref options 'dot-doc #f))
	  (lang (string->symbol
                 (string-downcase (option-ref options 'lang "scheme")))))
      (cond
       (version-wanted (display-version))
       (help-wanted (display-help))
       ;; "integrate" here -- kludge
       (dot-doc-wanted (let ((go (lambda ()
                                   (process-file
                                    (car (option-ref options '() #f)))))
                             (out (option-ref options 'output #f)))
                         (if out
                             (with-output-to-file out go)
                             (go))))
       (else
	(let ((input (option-ref options '() #f))
	      (output (option-ref options 'output #f)))
	  (if
           ;; Bonard B. Timmons III says `(pair? input)' alone is sufficient.
           ;; (and input (pair? input))
           (pair? input)
           (snarf-file (car input) output lang
                       (cond (texinfo-wanted format-texinfo)
                             (dot-doc-wanted format-dot-doc)
                             (else format-plain)))
           (display-help))))))))

(define (main args)
  (HVQC-MAIN args (lambda (args)
                    (apply doc-snarf (cdr args)))
             '(usage . commentary)
             '(package . "Guile")
             `(version . ,doc-snarf-version)))

;; Supported languages and their parameters.  Each element has form:
;; (LANG DOC-START DOC-END DOC-PREFIX OPT-PREFIX SIG-START STD-INT-DOC?)
;; LANG is a symbol, STD-INT-DOC? is a boolean indicating whether or not
;; LANG supports "standard internal docstring" (a string after the formals),
;; everything else is a string specifying a regexp.
;;-ttn-mod: new var
(define supported-languages
  '((c
     "^/\\*(.*)"
     "^ \\*/"
     "^ \\* (.*)"
     "^ \\*-(.*)"
     "NOTHING AT THIS TIME!!!"
     #f
     )
    (scheme
     "^;; (.*)"
     "^;;\\."
     "^;; *(.*)"
     "^;;-(.*)"
     "^\\(define"
     #t
     )))

;; Get @var{lang}'s @var{parameter}.  Both args are symbols.
;;-ttn-mod: new proc
(define (lang-parm lang parm)
  (list-ref (assq-ref supported-languages lang)
            (case parm
              ((docstring-start)  0)
              ((docstring-end)    1)
              ((docstring-prefix) 2)
              ((option-prefix)    3)
              ((signature-start)  4)
              ((std-int-doc?)     5))))

;; Snarf all docstrings from the file @var{input} and write them to
;; file @var{output}.  Use texinfo format for the output if
;; @var{texinfo?} is true.
;;-ttn-mod: don't use string comparison, consult table instead
(define (snarf-file input output lang writer)
  (or (memq lang (map car supported-languages))
      (error "doc-snarf: input language must be c or scheme."))
  (write-output (snarf input lang) output writer))

;; fixme: this comment is required to trigger standard internal
;; docstring snarfing...  ideally, it wouldn't be necessary.
;;-ttn-mod: new proc, from snarf-docs (aren't these names fun?)
(define (find-std-int-doc line input-port)
  "Unread @var{line} from @var{input-port}, then read in the entire form and
return the standard internal docstring if found.  Return #f if not."
  (unread-string line input-port)       ; ugh
  (let ((form (read input-port)))
    (cond ((and (list? form)            ; (define (PROC ARGS) "DOC" ...)
                (< 3 (length form))
                (eq? 'define (car form))
                (pair? (cadr form))
                (symbol? (caadr form))
                (string? (caddr form)))
           (caddr form))
          ((and (list? form)            ; (define VAR (lambda ARGS "DOC" ...))
                (< 2 (length form))
                (eq? 'define (car form))
                (symbol? (cadr form))
                (list? (caddr form))
                (< 3 (length (caddr form)))
                (eq? 'lambda (car (caddr form)))
                (string? (caddr (caddr form))))
           (caddr (caddr form)))
          (else #f))))

;; Split @var{string} into lines, adding @var{prefix} to each.
;;-ttn-mod: new proc
(define (split-prefixed string prefix)
  (separate-fields-discarding-char
   #\newline string
   (lambda lines
     (map (lambda (line)
            (string-append prefix line))
          lines))))

;; snarf input-file output-file
;; Extract docstrings from the input file @var{input}, presumed
;; to be written in language @var{lang}.
;;-Author: Martin Grabmueller <mgrabmue@cs.tu-berlin.de>
;;-Created: 2001-02-17
;;-ttn-mod: regluarize lang parm lookup, add "std int doc" snarfing (2 places)
(define (snarf input-file lang)
  (let* ((i-p (open-input-file input-file))
         (parm-regexp (lambda (parm) (make-regexp (lang-parm lang parm))))
         (docstring-start  (parm-regexp 'docstring-start))
         (docstring-end    (parm-regexp 'docstring-end))
         (docstring-prefix (parm-regexp 'docstring-prefix))
         (option-prefix    (parm-regexp 'option-prefix))
         (signature-start  (parm-regexp 'signature-start))
         (augmented-options
          (lambda (line i-p options)
            (let ((int-doc (and (lang-parm lang 'std-int-doc?)
                                (let ((d (find-std-int-doc line i-p)))
                                  (and d (split-prefixed d "internal: "))))))
              (if int-doc
                  (append (reverse int-doc) options)
                  options)))))

    (let lp ((line (read-line i-p)) (state 'neutral) (doc-strings '())
	     (options '()) (entries '()) (lno 0))
      (cond
       ((eof-object? line)
	(close-input-port i-p)
	(reverse entries))

       ;; State 'neutral: we're currently not within a docstring or
       ;; option section
       ((eq? state 'neutral)
	(let ((m (regexp-exec docstring-start line)))
	  (if m
	    (lp (read-line i-p) 'doc-string
		(list (match:substring m 1)) '() entries (+ lno 1))
	    (lp (read-line i-p) state '() '() entries (+ lno 1)))))

       ;; State 'doc-string: we have started reading a docstring and
       ;; are waiting for more, for options or for a define.
       ((eq? state 'doc-string)
	(let ((m0 (regexp-exec docstring-prefix line))
	      (m1 (regexp-exec option-prefix line))
	      (m2 (regexp-exec signature-start line))
	      (m3 (regexp-exec docstring-end line)))
	  (cond
	   (m0
	    (lp (read-line i-p) 'doc-string
		(cons (match:substring m0 1) doc-strings) '() entries
		(+ lno 1)))
	   (m1
	    (lp (read-line i-p) 'options
		doc-strings (cons (match:substring m1 1) options) entries
		(+ lno 1)))
	   (m2
            (let ((options (augmented-options line i-p options))) ; ttn-mod
              (lp (read-line i-p) 'neutral '() '()
                  (cons (parse-entry doc-strings options line input-file lno)
                        entries)
                  (+ lno 1))))
           (m3
	    (lp (read-line i-p) 'neutral '() '()
		(cons (parse-entry doc-strings options #f input-file lno)
		      entries)
		(+ lno 1)))
	   (else
	    (lp (read-line i-p) 'neutral '() '() entries (+ lno 1))))))

       ;; State 'options: We're waiting for more options or for a
       ;; define.
       ((eq? state 'options)
	(let ((m1 (regexp-exec option-prefix line))
	      (m2 (regexp-exec signature-start line))
	      (m3 (regexp-exec docstring-end line)))
	  (cond
	   (m1
	    (lp (read-line i-p) 'options
		doc-strings (cons (match:substring m1 1) options) entries
		(+ lno 1)))
	   (m2
            (let ((options (augmented-options line i-p options))) ; ttn-mod
              (lp (read-line i-p) 'neutral '() '()
                  (cons (parse-entry doc-strings options line input-file lno)
                        entries)
                  (+ lno 1))))
	   (m3
	    (lp (read-line i-p) 'neutral '() '()
		(cons (parse-entry doc-strings options #f input-file lno)
		      entries)
		(+ lno 1)))
	   (else
	    (lp (read-line i-p) 'neutral '() '() entries (+ lno 1))))))))))

(define (make-entry symbol signature docstrings options filename line)
  (vector 'entry symbol signature docstrings options filename line))
(define (entry-symbol e)
  (vector-ref e 1))
(define (entry-signature e)
  (vector-ref e 2))
(define (entry-docstrings e)
  (vector-ref e 3))
(define (entry-options e)
  (vector-ref e 4))
(define (entry-filename e)
  (vector-ref e 5))
(define (entry-line e)
  "This docstring will not be snarfed, unfortunately..."
  (vector-ref e 6))

;; Create a docstring entry from the docstring line list
;; @var{doc-strings}, the option line list @var{options} and the
;; define line @var{def-line}
(define (parse-entry docstrings options def-line filename line-no)
;  (write-line docstrings)
  (cond
   (def-line
     (make-entry (get-symbol def-line)
		 (make-prototype def-line) (reverse docstrings)
		 (reverse options) filename
		 (+ (- line-no (length docstrings) (length options)) 1)))
   ((> (length docstrings) 0)
    (make-entry (string->symbol (car (reverse docstrings)))
		(car (reverse docstrings))
		(cdr (reverse docstrings))
		(reverse options) filename
		(+ (- line-no (length docstrings) (length options)) 1)))
   (else
    (make-entry 'foo "" (reverse docstrings) (reverse options) filename
		(+ (- line-no (length docstrings) (length options)) 1)))))

;; Create a string which is a procedure prototype.  The necessary
;; information for constructing the prototype is taken from the line
;; @var{def-line}, which is a line starting with @code{(define...}.
(define (make-prototype def-line)
  (call-with-input-string
   def-line
   (lambda (s-p)
     (let* ((paren (read-char s-p))
	    (keyword (read s-p))
	    (tmp (read s-p)))
       (cond
	((pair? tmp)
	 (join-symbols tmp))
	((symbol? tmp)
	 (symbol->string tmp))
	(else
	 ""))))))

(define (get-symbol def-line)
  (call-with-input-string
   def-line
   (lambda (s-p)
     (let* ((paren (read-char s-p))
	    (keyword (read s-p))
	    (tmp (read s-p)))
       (cond
	((pair? tmp)
	 (car tmp))
	((symbol? tmp)
	 tmp)
	(else
	 'foo))))))

;; Append the symbols in the string list @var{s}, separated with a
;; space character.
(define (join-symbols s)
  (cond ((null? s)
	 "")
	((symbol? s)
	 (string-append ". " (symbol->string s)))
	((null? (cdr s))
	 (symbol->string (car s)))
	(else
	 (string-append (symbol->string (car s)) " " (join-symbols (cdr s))))))

;; Write @var{entries} to @var{output-file} using @var{writer}.
;; @var{writer} is a proc that takes one entry.
;; If @var{output-file} is #f, write to stdout.
;;-ttn-mod: new proc
(define (write-output entries output-file writer)
  (with-output-to-port (cond (output-file (open-output-file output-file))
                             (else (current-output-port)))
    (lambda () (for-each writer entries))))

;; Write an @var{entry} using Guile Documentation Format Version 2.
(define (format-dot-doc entry)
  (display "\f\n")
  (display "(")
  (display (entry-signature entry))
  (display ")")
  (newline)
  (for-each write-line (entry-docstrings entry))
  (display "")                        ; control-A
  (display "[")
  (display (entry-filename entry))
  (display ":")
  (display (entry-line entry))
  (display "]")
  (newline))

;; Write an @var{entry} using texinfo format.
;;-ttn-mod: renamed from `texinfo-output', distilled
(define (format-texinfo entry)
  (display "\n\f")
  (display (entry-symbol entry))
  (newline)
  (display "@c snarfed from ")
  (display (entry-filename entry))
  (display ":")
  (display (entry-line entry))
  (newline)
  (display "@deffn procedure ")
  (display (entry-signature entry))
  (newline)
  (for-each (lambda (s) (write-line s))
            (entry-docstrings entry))
  (for-each (lambda (s) (display "@c ") (write-line s))
            (entry-options entry))
  (write-line "@end deffn"))

;; Write an @var{entry} using plain format.
;;-ttn-mod: renamed from `texinfo-output', distilled
(define (format-plain entry)
  (display "Procedure: ")
  (display (entry-signature entry))
  (newline)
  (for-each (lambda (s) (write-line s))
            (entry-docstrings entry))
  (for-each (lambda (s) (display ";; ") (write-line s))
            (entry-options entry))
  (display "Snarfed from ")
  (display (entry-filename entry))
  (display ":")
  (display (entry-line entry))
  (newline)
  (write-line "\f"))

;;;---------------------------------------------------------------------------
;;; stuff from snarf-docs (to be better integrated)

(use-modules ((scripts read-scheme-source)
              :select ((read-scheme-source-silently . file-forms)
                       quoted? clump-comments))
             ((srfi srfi-13) :select (string-join string-trim-both)))

;; Display INVOCATION, DOC, FILE and LINE info of a procedure.
;; The format is compatible with Guile Documentation Format Version 1.
;;
(define (display-procedure-doc-entry invocation doc file line)
  (format #t "\f\n~A\n~A\n~A[~A:~A]\n"
          invocation
          doc
          #\soh                         ; control-A
          file (1- line)))

(define option-prefix-rx (make-regexp "^;+-([-A-Za-z]+):[ \t]*"))

;; Snarf procedure documentation from FILE.
;;
(define (process-file file)

  (define (->line x) (assq-ref x 'line))
  (define (->sig  x) (assq-ref x 'signature))
  (define (->sid  x) (assq-ref x 'std-int-doc))
  (define (->type x) (assq-ref x 'type))

  (define (make-chop level)
    (let* ((rx (make-regexp "[- ]*")))
      (lambda (s)
        (cond ((= level (string-length s))
               99999)
              ((regexp-exec rx s level)
               => (lambda (m)
                    (- (match:end m) (match:start m))))
              (else 0)))))

  (define (clean level lines)
    (let* ((chop (make-chop level))
           (bye-bye (+ level (apply min (map chop lines)))))
      (let loop ((lines lines) (opts '()) (after '()))
        (if (null? lines)
            (let ((chunk (string-join (reverse after) "\n")))
              (or (null? opts) (set-object-property! chunk 'opts opts))
              (cons level chunk))       ; retval
            (let ((line (car lines)))
              (cond ((regexp-exec option-prefix-rx line)
                     => (lambda (m)
                          (loop (cdr lines)
                                (acons (string->symbol
                                        (match:substring m 1))
                                       (match:suffix m)
                                       opts)
                                after)))
                    (else
                     (loop (cdr lines)
                           opts
                           (cons (make-shared-substring
                                  line (if (= level (string-length line))
                                           level
                                           bye-bye))
                                 after)))))))))

  (define (override-sig tag chunk)
    (cond ((object-property chunk 'opts)
           => (lambda (opts)
                (cond ((assq-ref opts 'sig)
                       => (lambda (sig)
                            (cons tag (read (open-input-string sig)))))
                      (else #f))))
          (else #f)))

  ;; do it!
  (let loop ((forms (clump-comments (file-forms file) clean))
             (stash #f))
    (or (null? forms)
        (let ((form (car forms)))
          (cond ((quoted? 'following-form-properties form)
                 => (lambda (alist)
                      (case (->type alist)
                        ((procedure syntax)
                         (let ((sid (cond ((->sid alist))
                                          (else #f))))
                           (and (or sid stash)
                                (display-procedure-doc-entry
                                 (or (override-sig (car (->sig alist)) stash)
                                     (->sig alist))
                                 (string-trim-both
                                  ;; venerate tradition
                                  (cond (sid) (else stash))
                                  #\newline)
                                 file
                                 (->line alist))))))
                      (loop (cdr forms) #f)))
                ((and (pair? form) (number? (car form))) ; level
                 (loop (cdr forms) (cdr form)))
                (else
                 (loop (cdr forms) #f)))))))

;;; doc-snarf ends here
