;;; rtfrtf.el - Read The F* .RTF-files. ;;; Copyright (C) 1991 Per Cederqvist (ceder@lysator.liu.se) ;;; This program converts the two BASIS 15-files b15body.rtf and ;;; b15annex.rtf to human-readable ASCII. ;;; Note that much information such as italics, boldface, font ;;; changes, placement of footnotes, page references et cetera are ;;; lost in the process. ;;; This program is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 1, or (at your option) ;;; any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program; if not, write to the Free Software ;;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ;;; How to use this program: ;;; ;;; Start emacs. ;;; Load this file with M-x load-file rtfrtf.el ;;; Load b15body.rtf. ;;; M-x rtfrtf. ; This takes a few minutes... Be patient! ;;; C-x b *rtf* ; Go to result buffer *rtf*. ;;; C-x C-w b15body.txt ; Save the result. ;;; Load b15annex.rtf. ; Take next file. ;;; M-x rtfrtf. ;;; C-x b *rtf* ;;; C-x C-w b15annex.txt ;;; This is not a good example of clean code. This program was created ;;; with the sole purpose to be able to read BASIS 15. It is very likely ;;; to fail i you try it on another .rtf-file. ;;; Don't ask me for a rtfrtf'd version of basis 15. There are ;;; copyright notices in that file, and I have no permission to ;;; redistribute it. Sorry. (defun rtfrtf () "Decode contents of current buffer. Place result in buffer *rtf*." (interactive) (goto-char (point-min)) (re-search-forward (regexp-quote "{\\rtf")) (backward-char 4) (setq state-stack (list (copy-state default-state))) (setq res-buf (get-buffer-create "*rtf*")) (setq src-buf (current-buffer)) (set-buffer res-buf) (erase-buffer) (set-buffer src-buf) (rtf-loop)) (defun rtf-loop () (while state-stack (let ((n (char-after (point)))) (cond ((null n) (setq state-stack nil)) ((= 92 n) ;\ (forward-char 1) (cond ((looking-at "[a-zA-Z]") (rtf-control-word)) (t (rtf-control-symbol)))) ((= ?{ n) (forward-char 1) (setq state-stack (cons (copy-state (car state-stack)) state-stack))) ((= ?} n) (pop-state) (if state-stack (forward-char 1))) ((= 13 n) (forward-char 1)) ((= 10 n) (forward-char 1)) (t (forward-char 1) (rtf-insert n)))))) (defun rtf-insert (char) "Insert a char into the current rtf-source." (set-buffer res-buf) (goto-char (point-max)) (insert char) (cond ((> (current-column) fill-column) (cond ((re-search-backward " " (save-excursion (beginning-of-line) (point)) t) (delete-char 1) (rtf-newline))) (goto-char (point-max)))) (set-buffer src-buf)) (defun rtf-newline () (cond ((state->box (car state-stack)) (while (< (current-column) fill-column) (insert " ")) (insert " +\n+ ")) (t (insert "\n")))) (defun rtf-control-word () "A backslash was found. Decode the control word." (re-search-forward "[a-zA-Z]*") (let ((word (buffer-substring (match-beginning 0) (match-end 0))) (n ) (arg nil)) ;Numeric argument? (cond ((string-match (regexp-quote (char-to-string (char-after (point)))) "-0123456789") (re-search-forward "[-0-9][0-9]*") (setq arg (read (buffer-substring (match-beginning 0) (match-end 0)))))) ;Trailing space? (cond ((eq 32 (char-after (point))) (forward-char 1))) (let ((function (assoc word rtf-control-word-list))) (cond ((null function) (error "Unknown control word: %s" word))) (if arg (funcall (car (cdr function)) arg) (funcall (car (cdr function))))))) (defun rtf-control-symbol () "Decode control symbol." (let ((symbol (char-after (point)))) (forward-char 1) (let ((sym (assoc symbol rtf-control-symbol-list))) (cond ((null sym) (error "Unknown control symbol: %s" symbol))) (eval (car (cdr sym)))))) (setq rtf-control-symbol-list '((92 (rtf-insert 92)) (?{ (rtf-insert ?{)) (?} (rtf-insert ?})) (?~ (rtf-insert 32)) (?- nil) (?_ (rtf-insert ?-)) (10 nil) (?' (rtf-insert ?')) (?* (ignore-group)))) (setq rtf-control-word-list '(("page" rtf-page-break) ("rtf" dummy) ("ansi" dummy) ("deff" dummy) ("fonttbl" ignore-group) ("info" ignore-group) ("widowctrl" dummy) ("ftnbj" dummy) ("template" ignore-group) ("sectd" rtf-sectd) ("linex" dummy) ("endnhere" rtf-endnhere) ("header" rtf-header) ("pard" rtf-pard) ("colortbl" ignore-group) ("stylesheet" ignore-group) ("plain" rtf-plain) ("s" rtf-s) ("li" rtf-li) ("sl" rtf-sl) ("brdrb" rtf-brdrb) ("brsp" dummy) ("brdrs" rtf-brdrs) ("tqc" rtf-tqc) ("tx" rtf-tx) ("fs" rtf-fs) ("tqr" rtf-tqr) ("field" dummy) ("flddirty" dummy) ("fldinst" dummy) ("fldrslt" dummy) ("b" rtf-b) ("pagebb" rtf-pagebb) ("tab" rtf-tab) ("par" rtf-par) ("footer" rtf-footer) ("brdrt" rtf-brdrt) ("sa" rtf-sa) ("keepn" rtf-keepn) ("keep" rtf-keep) ("f" rtf-f) ("fldedit" dummy) ("fldlock" dummy) ("tqdec" rtf-tqdec) ("tldot" rtf-tldot) ("sb" rtf-sb) ("bkmkstart" dummy) ("bkmkend" dummy) ("i" rtf-i) ("fi" rtf-fi) ("ri" rtf-ri) ("box" rtf-box) ("line" rtf-line) ("brdrdb" rtf-brdrdb) ("ul" rtf-ul) ("margl" dummy) ("margr" dummy) ("facingp" dummy) ("gutter" rtf-gutter) ("ftnsep" dummy) ("sbknone" dummy) ("pgnlcrm" dummy) ("chftnsep" dummy) ("titlepg" dummy) ("headerl" dummy) ("headerr" dummy) ("footerl" dummy) ("footerr" dummy) ("headerf" dummy) ("footerf" dummy) ("footnote" dummy) ("qr" dummy) ("phmrg" dummy) ("absw" dummy) ("posxr" dummy) ("qc" dummy) ("sect" rtf-sect) ("cols" dummy) ("colsx" dummy) ("sbkodd" dummy) ("vertalc" dummy) ("pgnrestart" dummy) ("up" dummy) ("qj" dummy) ("cf" dummy) ("pict" ignore-group) ("scaps" dummy) ("column" dummy) ("chftn" dummy) ("trowd" dummy) ("trleft" dummy) ("clbrdrt" dummy) ("clbrdrl" dummy) ("clbrdrb" dummy) ("clbrdrr" dummy) ("cellx" dummy) ("intbl" dummy) ("cell" dummy) ("row" dummy) ("headery" dummy) ("footery" dummy) ("v" dummy) ("expnd" dummy) ("ulw" dummy) )) (defun rtf-tab () "Same as ASCII 9" (rtf-insert 9)) (defun rtf-par () "End of paragraph." (set-buffer res-buf) (rtf-newline) (rtf-newline) (set-buffer src-buf) (rtf-insert 32) (rtf-insert 32)) (defun rtf-pard () "Reset to default para properties." (let ((s (car state-stack))) (if (state->box s) (terminate-box)))) (defun rtf-chpgn () "Current page number (as in headers)") (defun rtf-chftn () "Auto numbered footnote reference (footnote to follow in a group)") (defun rtf-chpict () "Placeholder character for picture (picture to follow in a group)") (defun rtf-chdate () "Current date (as in headers)") (defun rtf-chtime () "Current time (as in headers)") (defun rtf-page () "Required page break") (defun rtf-line () "Required line break (no paragraph break)" (set-buffer res-buf) (rtf-newline) (set-buffer src-buf)) (defun rtf-sect () "End of section and end of paragraph.") (defun rtf-pict () "The destination is a picture. The group must immediately follow a \chpict character. The plain text describes the picture as a hex dump (string of characters 0,1,... 9, a, ..., e, f.) (Formatting properties to determine data interpretation, size)") (defun rtf-footnote () "The destination is a footnote text. The group must immediately follow the footntoe reference character(s).") (defun rtf-header () "The destination is the header text for the current section.The group must precede the first plain text character in the section.") (defun rtf-headerl () "Same as above, but header for left-hand pages.") (defun rtf-headerr () "Same as above, but header for right-hand pages.") (defun rtf-headerf () "Same as above, but header for first page.") (defun rtf-footer () "Same as above, but footer.") (defun rtf-footerl () "Same as above, but footer for left-hand pages.") (defun rtf-footerr () "Same as above, but footer for right-hand pages.") (defun rtf-footerf () "Same as above, but header for first page.") (defun rtf-ftnsep () "Same as above, but text is footnote separator") (defun rtf-ftnsepc () "Same as above, but text is separator for continued footnotes.") (defun rtf-ftncn () "Same as above, but text is continued footnote notice.") (defun rtf-info () "Text is information block for the document. Parts of the text is further classified by "properties" of the text that are listed below - such as "title". These are not formatting properties, but a device to delimit and identify parts of the info from the text in the group.") (defun rtf-stylesheet () "Text is the style sheet for the document.More precisely, text between semicolons are taken to be style names which will be defined to stand for the formatting properties which are in effect.") (defun rtf-fonttbl () "Font table. See below.") (defun rtf-colortbl () "Color table. See below.") (defun rtf-comment () "Text will be ignored.") (defun rtf-paperw () "Paper width in twips 12240") (defun rtf-paperh () "Paper height 15840") (defun rtf-margl () "Left margin 1800") (defun rtf-margr () "Right margin 1800") (defun rtf-margt () "Top margin 1440") (defun rtf-margb () "Bottom margin 1440") (defun rtf-facingp () "Facing pages") (defun rtf-gutter (gutter) "Gutter width") (defun rtf-deftab () "Default tab width 720") (defun rtf-widowctrl () "Enable widow control") (defun rtf-endnotes () "Footnotes at end of section") (defun rtf-ftnbj () "Footnotes at bottom of page default") (defun rtf-ftntj () "Footnotes beneath text (top just)") (defun rtf-ftnstart () "Starting footnote number 1") (defun rtf-ftnrestart () "Restart footnote numbers each page") (defun rtf-pgnstart () "Starting page number 1") (defun rtf-linestart () "Starting line number 1") (defun rtf-landscape () "Printed in landscape format") (defun rtf-sectd () "Reset to default section properties") (defun rtf-nobreak () "Break code") (defun rtf-colbreak () "Break code default") (defun rtf-pagebreak () "Break code") (defun rtf-evenbreak () "Break code") (defun rtf-oddbreak () "Break code") (defun rtf-pgnrestart () "Restart page numbers at 1") (defun rtf-pgndec () "Page number format decimal default") (defun rtf-pgnucrm () "Page number format uc roman") (defun rtf-pgnlcrm () "Page number format lc roman") (defun rtf-pgnucltr () "Page number format uc letter") (defun rtf-pgnlcltr () "Page number format lc letter") (defun rtf-pgnx () "Auto page number x pos 720") (defun rtf-pgny () "Auto page number y pos 720") (defun rtf-linemod () "Line number modulus") (defun rtf-linex () "Line number - text distance 360") (defun rtf-linerestart () "Line number restart at 1 default") (defun rtf-lineppage () "Line number restart on each page") (defun rtf-linecont () "Line number continued from prev section") (defun rtf-headery () "Header y position from top of page 720") (defun rtf-footery () "Footer y position from bottom of page 720") (defun rtf-cols () "Number of columns 1") (defun rtf-colsx () "Space between columns 720") (defun rtf-endnhere () "Include endnotes in this section") (defun rtf-titlepg () "Title page is special") (defun rtf-s (style) "Style") (defun rtf-ql () "Quad left default") (defun rtf-qr () "Right") (defun rtf-qj () "Justified") (defun rtf-qc () "Centered") (defun rtf-fi (fi) "First line indent") (defun rtf-li (li) "Left indent") (defun rtf-ri (ri) "Right indent") (defun rtf-sb (sb) "Space before") (defun rtf-sa (sa) "Space after") (defun rtf-sl (sl) "Space between lines") (defun rtf-keep () "Keep") (defun rtf-keepn () "Keep with next para") (defun rtf-sbys () "Side by side") (defun rtf-pagebb () "Page break before" (set-buffer res-buf) (rtf-newline) (set-buffer src-buf) (rtf-insert 12) (set-buffer res-buf) (rtf-newline) (set-buffer src-buf)) (defun rtf-noline () "No line numbering") (defun rtf-brdrt () "Border top") (defun rtf-brdrb () "Border bottom") (defun rtf-brdrl () "Border left") (defun rtf-brdrr () "Border right") (defun rtf-box () "Border all around" (set-buffer res-buf) (insert "\n +----------------------------------------------------------------------+ + ") (set-buffer src-buf) (set-state->box (car state-stack) t)) (defun terminate-box () (set-buffer res-buf) (beginning-of-line) (cond ((looking-at " *+ *$") (delete-region (point) (point-max)) (insert " +\n+----------------------------------------------------------------------+")) (t (while (< (current-column) fill-column) (insert " ")) (insert " +\n+----------------------------------------------------------------------+"))) (set-buffer src-buf) (set-state->box (car state-stack) nil)) (defun rtf-brdrs () "Single thickness") (defun rtf-brdrth () "Thick") (defun rtf-brdrsh () "Shadow") (defun rtf-brdrdb () "Double") (defun rtf-tx (tx) "Tab position") (defun rtf-tqr () "Right flush tab (these apply to last specified pos)") (defun rtf-tqc () "Centered tab") (defun rtf-tqdec () "Decimal aligned tab") (defun rtf-tldot () "Leader dots") (defun rtf-tlhyph () "Leader hyphens") (defun rtf-tlul () "Leader underscore") (defun rtf-tlth () "Leader thick line") (defun rtf-plain () "Reset to default text properties.") (defun rtf-b () "Bold") (defun rtf-i () "Italic") (defun rtf-strike () "Strikethrough") (defun rtf-outl () "Outline") (defun rtf-shad () "Shadow") (defun rtf-scaps () "Small caps") (defun rtf-caps () "All caps") (defun rtf-v () "Invisible text") (defun rtf-f (n) "Font number n") (defun rtf-fs (fs) "Font size in half points 24") (defun rtf-ul () "Underline") (defun rtf-ulw () "Word underline") (defun rtf-uld () "Dotted underline") (defun rtf-uldb () "Double underline") (defun rtf-up () "Superscript in half points") (defun rtf-dn () "Subscript in half points") (defun rtf-title () "Following plain text is the title") (defun rtf-subject () "Following text is the subject") (defun rtf-doccomm () "Comments (not to be cofused with \comment )") (defun rtf-nextfile () "Following text is name of "next" file") (defun rtf-verno () "Internal version number") (defun rtf-creatim () "Creation time follows") (defun rtf-yr () "Year to be assigned to previously specified time field") (defun rtf-revtim () "Revision time follows") (defun rtf-printtim () "Print time follows") (defun rtf-buptim () "Backup time follows") (defun rtf-edmins () "Editing minutes") (defun dummy (&optional arg) "Ignore arg, if present.") (defun ignore-group () "Ignore rest of current group." (let ((level 1) (src-buf (current-buffer)) (ign-buf (get-buffer-create "*rtf-ignored*"))) (set-buffer ign-buf) (goto-char (point-max)) (set-buffer src-buf) (while (> level 0) (let ((n (char-after (point)))) (forward-char 1) (set-buffer ign-buf) (insert n) (set-buffer src-buf) (cond ((= 92 n) ;\ (forward-char 1)) ((= ?{ n) (setq level (1+ level))) ((= ?} n) (setq level (1- level)))))))) (defun state->box (s) (aref s 0)) (defun set-state->box (s bool) (aset s 0 bool)) (setq default-state [nil]) (defun copy-state (s) (copy-sequence s)) (defun pop-state () (cond ((null state-stack) (error "No state to pop!")) ((< (length state-stack) 2) (let ((s (car state-stack))) (if (state->box s) (terminate-box)))) (t (let ((s (car state-stack)) (p (car (cdr state-stack)))) (if (and (state->box s) (not (state->box p))) (terminate-box))))) (setq state-stack (cdr state-stack)))