|
|
;;; ox-hugo-pandoc-cite.el --- Pandoc Citations support for ox-hugo -*- lexical-binding: t -*- |
|
|
|
|
|
;; Authors: Kaushal Modi <kaushal.mod@gmail.com> |
|
|
;; URL: https://ox-hugo.scripter.co |
|
|
|
|
|
;;; Commentary: |
|
|
|
|
|
;; *This is NOT a stand-alone package.* |
|
|
;; |
|
|
;; It is used by ox-hugo to add support for parsing Pandoc Citations. |
|
|
|
|
|
;;; Code: |
|
|
|
|
|
;; TODO: Change the defconst to defvar |
|
|
(defvar org-hugo-pandoc-cite-pandoc-args-list |
|
|
`("-f" "markdown" |
|
|
"-t" ,(concat "markdown-citations" |
|
|
"-simple_tables" |
|
|
"+pipe_tables" |
|
|
"-fenced_divs" |
|
|
"-fenced_code_attributes" |
|
|
"-bracketed_spans") |
|
|
"--atx-headers" |
|
|
"--id-prefix=fn:") |
|
|
"Pandoc arguments used in `org-hugo-pandoc-cite--run-pandoc'. |
|
|
|
|
|
-f markdown : Convert *from* Markdown |
|
|
|
|
|
-t markdown : Convert *to* Markdown |
|
|
-citations : Remove the \"citations\" extension. This will cause |
|
|
citations to be expanded instead of being included as |
|
|
markdown citations. |
|
|
|
|
|
-simple_tables : Remove the \"simple_tables\" style. |
|
|
|
|
|
+pipe_tables : Add the \"pipe_tables\" style insted that Blackfriday |
|
|
understands. |
|
|
|
|
|
-fenced_divs : Do not replace HTML <div> tags with Pandoc fenced |
|
|
divs \":::\". |
|
|
|
|
|
-fenced_code_attributes : Create fenced code blocks like |
|
|
\"``` lang .. ```\" instead of \"``` {.lang} .. ```\". |
|
|
|
|
|
-bracketed_spans : Do not replace HTML <span> tags with Pandoc |
|
|
bracketed class notation \"{.some-class}\". |
|
|
|
|
|
--atx-headers : Use \"# foo\" style heading for output markdown. |
|
|
|
|
|
--id-prefix=fn: : Create footnote ID's like \"[^fn:1]\" instead of |
|
|
\"[^1]\" to be consistent with default ox-hugo |
|
|
exported Markdown footnote style. |
|
|
|
|
|
These arguments are added to the `pandoc' call in addition to the |
|
|
\"--bibliography\", output file (\"-o\") and input file |
|
|
arguments.") |
|
|
|
|
|
(defvar org-hugo-pandoc-cite-pandoc-meta-data |
|
|
'("nocite" "csl" "link-citations") |
|
|
"List of meta-data fields specific to Pandoc.") |
|
|
|
|
|
(defvar org-hugo-pandoc-cite--run-pandoc-buffer "*Pandoc Citations*" |
|
|
"Buffer to contain the `pandoc' run output and errors.") |
|
|
|
|
|
(defvar org-hugo-pandoc-cite--references-header-regexp |
|
|
"^<div id=\"refs\" class=\"references\">$" |
|
|
"Regexp to match the Pandoc-inserted references header string. |
|
|
|
|
|
This string is present only if Pandoc has resolved one or more |
|
|
references.") |
|
|
|
|
|
(defun org-hugo-pandoc-cite--restore-fm-in-orig-outfile (orig-outfile fm &optional orig-full-contents) |
|
|
"Restore the intended front-matter format in ORIG-OUTFILE. |
|
|
|
|
|
ORIG-OUTFILE is the Org exported file name. |
|
|
|
|
|
FM is the intended front-matter format. |
|
|
|
|
|
ORIG-FULL-CONTENTS is a string of ORIG-OUTFILE contents. If this |
|
|
is nil it is created in this function. |
|
|
|
|
|
If FM is already in YAML format, this function doesn't do |
|
|
anything. Otherwise, the YAML format front-matter in |
|
|
ORIG-OUTFILE is replaced with TOML format." |
|
|
(unless (string= fm org-hugo--fm-yaml) |
|
|
(unless orig-full-contents |
|
|
(setq orig-full-contents (with-temp-buffer |
|
|
(insert-file-contents orig-outfile) |
|
|
(buffer-substring-no-properties |
|
|
(point-min) (point-max))))) |
|
|
(setq fm (org-hugo-pandoc-cite--remove-pandoc-meta-data fm)) |
|
|
(let* ((orig-contents-only |
|
|
(replace-regexp-in-string |
|
|
;; The `orig-contents-only' will always be in YAML. |
|
|
;; Delete that first. |
|
|
"\\`---\n\\(.\\|\n\\)+\n---\n" "" orig-full-contents)) |
|
|
(toml-fm-plus-orig-contents (concat fm orig-contents-only))) |
|
|
;; (message "[ox-hugo-pandoc-cite] orig-contents-only: %S" orig-contents-only) |
|
|
(write-region toml-fm-plus-orig-contents nil orig-outfile)))) |
|
|
|
|
|
(defun org-hugo-pandoc-cite--run-pandoc (orig-outfile bib-list) |
|
|
"Run the `pandoc' process and return the generated file name. |
|
|
|
|
|
ORIG-OUTFILE is the Org exported file name. |
|
|
|
|
|
BIB-LIST is a list of one or more bibliography files." |
|
|
;; First kill the Pandoc run buffer if already exists (from a |
|
|
;; previous run). |
|
|
(when (get-buffer org-hugo-pandoc-cite--run-pandoc-buffer) |
|
|
(kill-buffer org-hugo-pandoc-cite--run-pandoc-buffer)) |
|
|
(let* ((pandoc-outfile (make-temp-file ;ORIG_FILE_BASENAME.RANDOM.md |
|
|
(concat (file-name-base orig-outfile) ".") |
|
|
nil ".md")) |
|
|
(bib-args (mapcar (lambda (bib-file) |
|
|
(concat "--bibliography=" |
|
|
bib-file)) |
|
|
bib-list)) |
|
|
(pandoc-arg-list (append |
|
|
org-hugo-pandoc-cite-pandoc-args-list |
|
|
bib-args |
|
|
`("-o" ,pandoc-outfile ,orig-outfile))) ;-o <OUTPUT FILE> <INPUT FILE> |
|
|
(pandoc-arg-list-str (mapconcat #'identity pandoc-arg-list " ")) |
|
|
exit-code) |
|
|
(message (concat "[ox-hugo] Post-processing citations using Pandoc command:\n" |
|
|
" pandoc " pandoc-arg-list-str)) |
|
|
|
|
|
(setq exit-code (apply 'call-process |
|
|
(append |
|
|
`("pandoc" nil |
|
|
,org-hugo-pandoc-cite--run-pandoc-buffer :display) |
|
|
pandoc-arg-list))) |
|
|
|
|
|
(unless (= 0 exit-code) |
|
|
(user-error (format "[ox-hugo] Pandoc execution failed. See the %S buffer" |
|
|
org-hugo-pandoc-cite--run-pandoc-buffer))) |
|
|
pandoc-outfile)) |
|
|
|
|
|
(defun org-hugo-pandoc-cite--remove-pandoc-meta-data (fm) |
|
|
"Remove Pandoc meta-data from front-matter string FM and return it. |
|
|
|
|
|
The list of Pandoc specific meta-data is defined in |
|
|
`org-hugo-pandoc-cite-pandoc-meta-data'." |
|
|
(with-temp-buffer |
|
|
(insert fm) |
|
|
(goto-char (point-min)) |
|
|
(let ((regexp (format "^%s\\(:\\| =\\) " |
|
|
(regexp-opt org-hugo-pandoc-cite-pandoc-meta-data 'words)))) |
|
|
(delete-matching-lines regexp)) |
|
|
(buffer-substring-no-properties (point-min) (point-max)))) |
|
|
|
|
|
(defun org-hugo-pandoc-cite--fix-pandoc-output (content loffset) |
|
|
"Fix the Pandoc output CONTENT and return it. |
|
|
|
|
|
Required fixes: |
|
|
|
|
|
- Prepend Pandoc inserted \"references\" class div with Markdown |
|
|
heading \"## References\" where the number of hashes depends on |
|
|
LOFFSET. LOFFSET = 1 will insert 2 hashes. |
|
|
|
|
|
- Add the Blackfriday required \"<div></div>\" hack to Pandoc |
|
|
divs with \"ref\" id's. |
|
|
|
|
|
- Unescape the Hugo shortcodes: \"{{\\\\=< shortcode \\\\=>}}\" -> |
|
|
\"{{< shortcode >}}\"." |
|
|
(with-temp-buffer |
|
|
(insert content) |
|
|
(let ((case-fold-search nil) |
|
|
(level-mark (make-string (+ loffset 1) ?#))) |
|
|
(goto-char (point-min)) |
|
|
|
|
|
;; Prepend the Pandoc inserted "references" class div with |
|
|
;; "References" heading in Markdown. |
|
|
(save-excursion |
|
|
;; There should be at max only one replacement needed for |
|
|
;; this. |
|
|
(when (re-search-forward org-hugo-pandoc-cite--references-header-regexp nil :noerror) |
|
|
(replace-match (concat level-mark |
|
|
" References {#references}\n\n" |
|
|
"\\&\n <div></div>\n")))) ;See footnote 1 |
|
|
|
|
|
;; Add the Blackfriday required hack to Pandoc ref divs. |
|
|
(save-excursion |
|
|
(let ((regexp "^<div id=\"ref-[^\"]+\">$")) |
|
|
(while (re-search-forward regexp nil :noerror) |
|
|
(replace-match "\\&\n <div></div>")))) ;See footnote 1 |
|
|
|
|
|
;; Fix Hugo shortcodes. |
|
|
(save-excursion |
|
|
(let ((regexp (concat "{{\\\\<" |
|
|
"\\(\\s-\\|\n\\)+" |
|
|
"\\(?1:[[:ascii:][:nonascii:]]+?\\)" |
|
|
"\\(\\s-\\|\n\\)+" |
|
|
"\\\\>}}"))) |
|
|
(while (re-search-forward regexp nil :noerror) |
|
|
(let* ((sc-body (match-string-no-properties 1)) |
|
|
(sc-body-no-newlines (replace-regexp-in-string |
|
|
"\n" " " sc-body))) |
|
|
(replace-match (format "{{< %s >}}" sc-body-no-newlines) |
|
|
:fixedcase))))) |
|
|
|
|
|
(buffer-substring-no-properties (point-min) (point-max))))) |
|
|
|
|
|
(defun org-hugo-pandoc-cite--parse-citations (info orig-outfile) |
|
|
"Parse Pandoc Citations in ORIG-OUTFILE and update that file. |
|
|
|
|
|
INFO is a plist used as a communication channel. |
|
|
|
|
|
ORIG-OUTFILE is the Org exported file name." |
|
|
(let ((bib-list (let ((bib-raw |
|
|
(org-string-nw-p |
|
|
(or (org-entry-get nil "EXPORT_BIBLIOGRAPHY" :inherit) |
|
|
(format "%s" (plist-get info :bibliography)))))) |
|
|
(when bib-raw |
|
|
;; Multiple bibliographies can be comma or |
|
|
;; newline separated. The newline separated |
|
|
;; bibliographies work only for the |
|
|
;; #+bibliography keyword; example: |
|
|
;; |
|
|
;; #+bibliography: bibliographies-1.bib |
|
|
;; #+bibliography: bibliographies-2.bib |
|
|
;; |
|
|
;; If using the subtree properties they need to |
|
|
;; be comma-separated (now don't use commas in |
|
|
;; those file names, you will suffer): |
|
|
;; |
|
|
;; :EXPORT_BIBLIOGRAPHY: bibliographies-1.bib, bibliographies-2.bib |
|
|
(let ((bib-list-1 (org-split-string bib-raw "[,\n]"))) |
|
|
;; - Don't allow spaces around bib names. |
|
|
;; - Remove duplicate bibliographies. |
|
|
(delete-dups |
|
|
(mapcar (lambda (bib-file) |
|
|
(let ((fname (org-trim bib-file))) |
|
|
(unless (file-exists-p fname) |
|
|
(user-error "[ox-hugo] Bibliography file %S does not exist" |
|
|
fname)) |
|
|
fname)) |
|
|
bib-list-1))))))) |
|
|
(if bib-list |
|
|
(let ((fm (plist-get info :front-matter)) |
|
|
(loffset (string-to-number |
|
|
(or (org-entry-get nil "EXPORT_HUGO_LEVEL_OFFSET" :inherit) |
|
|
(plist-get info :hugo-level-offset)))) |
|
|
(pandoc-outfile (org-hugo-pandoc-cite--run-pandoc orig-outfile bib-list))) |
|
|
;; (message "[ox-hugo parse citations] fm :: %S" fm) |
|
|
;; (message "[ox-hugo parse citations] loffset :: %S" loffset) |
|
|
;; (message "[ox-hugo parse citations] pandoc-outfile :: %S" pandoc-outfile) |
|
|
|
|
|
(let* ((pandoc-outfile-contents (with-temp-buffer |
|
|
(insert-file-contents pandoc-outfile) |
|
|
(buffer-substring-no-properties |
|
|
(point-min) (point-max)))) |
|
|
(content-has-references (string-match-p |
|
|
org-hugo-pandoc-cite--references-header-regexp |
|
|
pandoc-outfile-contents))) |
|
|
;; Prepend the original ox-hugo generated front-matter to |
|
|
;; Pandoc output, only if the Pandoc output contains |
|
|
;; references. |
|
|
(if content-has-references |
|
|
(let* ((contents-fixed (org-hugo-pandoc-cite--fix-pandoc-output |
|
|
pandoc-outfile-contents loffset)) |
|
|
(fm (org-hugo-pandoc-cite--remove-pandoc-meta-data fm)) |
|
|
(fm-plus-content (concat fm "\n" contents-fixed))) |
|
|
(write-region fm-plus-content nil orig-outfile)) |
|
|
(org-hugo-pandoc-cite--restore-fm-in-orig-outfile orig-outfile fm) |
|
|
(message (concat "[ox-hugo] Using the original Ox-hugo output instead " |
|
|
"of Pandoc output as it contained no References")))) |
|
|
(delete-file pandoc-outfile) |
|
|
|
|
|
(with-current-buffer org-hugo-pandoc-cite--run-pandoc-buffer |
|
|
(if (> (point-max) 1) ;buffer is not empty |
|
|
(message |
|
|
(format |
|
|
(concat "[ox-hugo] See the %S buffer for possible Pandoc warnings.\n" |
|
|
" Review the exported Markdown file for possible missing citations.") |
|
|
org-hugo-pandoc-cite--run-pandoc-buffer)) |
|
|
;; Kill the Pandoc run buffer if it is empty. |
|
|
(kill-buffer org-hugo-pandoc-cite--run-pandoc-buffer)))) |
|
|
(message "[ox-hugo] No bibliography file was specified")))) |
|
|
|
|
|
(defun org-hugo-pandoc-cite--parse-citations-maybe (info) |
|
|
"Check if Pandoc needs to be run to parse citations; and run it. |
|
|
|
|
|
INFO is a plist used as a communication channel." |
|
|
;; (message "pandoc citations keyword: %S" |
|
|
;; (org-hugo--plist-get-true-p info :hugo-pandoc-citations)) |
|
|
;; (message "pandoc citations prop: %S" |
|
|
;; (org-entry-get nil "EXPORT_HUGO_PANDOC_CITATIONS" :inherit)) |
|
|
(let* ((orig-outfile (plist-get info :outfile)) |
|
|
(fm (plist-get info :front-matter)) |
|
|
(has-nocite (string-match-p "^nocite\\(:\\| =\\) " fm)) |
|
|
(orig-outfile-contents (with-temp-buffer |
|
|
(insert-file-contents orig-outfile) |
|
|
(buffer-substring-no-properties |
|
|
(point-min) (point-max)))) |
|
|
;; http://pandoc.org/MANUAL.html#citations |
|
|
;; Each citation must have a key, composed of `@' + the |
|
|
;; citation identifier from the database, and may optionally |
|
|
;; have a prefix, a locator, and a suffix. The citation key |
|
|
;; must begin with a letter, digit, or _, and may contain |
|
|
;; alphanumerics, _, and internal punctuation characters |
|
|
;; (:.#$%&-+?<>~/). |
|
|
;; A minus sign (-) before the @ will suppress mention of the |
|
|
;; author in the citation. |
|
|
(valid-citation-key-char-regexp "a-zA-Z0-9_:.#$%&+?<>~/-") |
|
|
(citation-key-regexp (concat "[^" valid-citation-key-char-regexp "]" |
|
|
"\\(-?@[a-zA-Z0-9_]" |
|
|
"[" valid-citation-key-char-regexp "]+\\)")) |
|
|
(has-@ (string-match-p citation-key-regexp orig-outfile-contents))) |
|
|
;; Either the nocite front-matter should be there, or the |
|
|
;; citation keys should be present in the `orig-outfile'. |
|
|
(if (or has-nocite has-@) |
|
|
(progn |
|
|
(unless (executable-find "pandoc") |
|
|
(user-error "[ox-hugo] pandoc executable not found in PATH")) |
|
|
(unless (executable-find "pandoc-citeproc") |
|
|
(user-error "[ox-hugo] pandoc-citeproc executable not found in PATH")) |
|
|
(org-hugo-pandoc-cite--parse-citations info orig-outfile)) |
|
|
(org-hugo-pandoc-cite--restore-fm-in-orig-outfile |
|
|
orig-outfile fm orig-outfile-contents)))) |
|
|
|
|
|
|
|
|
(provide 'ox-hugo-pandoc-cite) |
|
|
|
|
|
|
|
|
|
|
|
;;; Footnotes |
|
|
|
|
|
;;;; Footnote 1 |
|
|
;; The empty HTML element tags like "<div></div>" is a hack to get |
|
|
;; around a Blackfriday limitation. Details: |
|
|
;; https://github.com/kaushalmodi/ox-hugo/issues/93. |
|
|
|
|
|
|
|
|
;;; ox-hugo-pandoc-cite.el ends here
|
|
|
|