Skip to content

Commit 5dc34c4

Browse files
committed
docs: clean xelatex PDF build (emojis, warnings, modindex)
1 parent 6bae127 commit 5dc34c4

5 files changed

Lines changed: 143 additions & 13 deletions

File tree

.github/workflows/_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ jobs:
201201
x11-utils xvfb \
202202
texlive-latex-base texlive-latex-extra texlive-fonts-recommended \
203203
texlive-fonts-extra texlive-lang-french texlive-xetex latexmk \
204-
fonts-symbola \
204+
fonts-noto-core fonts-noto-mono fonts-noto-extra \
205205
imagemagick librsvg2-bin
206206
/sbin/start-stop-daemon --start --quiet \
207207
--pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background \

doc/conf.py

Lines changed: 126 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,18 @@
55
import os
66
import os.path as osp
77
import sys
8+
import warnings
89
import zipfile
910

1011
import guidata.config as gcfg
1112

13+
# Silence Sphinx 10 deprecation warning emitted from cairocffi (third-party,
14+
# used by sphinxcontrib-svg2pdfconverter during LaTeX builds).
15+
warnings.filterwarnings(
16+
"ignore",
17+
message=r".*Sphinx 10 will drop support for representing paths as strings.*",
18+
)
19+
1220
sys.path.insert(0, os.path.abspath(".."))
1321

1422
# Importing sigima to avoid re-enabling guidata validation mode
@@ -223,20 +231,127 @@ def exclude_api_from_gettext(app):
223231
}
224232

225233
latex_elements = {
234+
# Drop the cmap package: it is a pdflatex-only helper, emits a noisy
235+
# "pdftex not detected" warning under xelatex, and the resulting PDF
236+
# remains fully searchable thanks to fontspec/XeTeX.
237+
"cmappkg": "",
226238
# Use xelatex (set via ``latex_engine`` below): pdflatex chokes on the
227239
# emoji / box-drawing / arrow glyphs sprinkled across the docs. The
228240
# ``ucharclasses`` package automatically routes whole Unicode blocks
229-
# (emoji, dingbats, box-drawing, ...) to the Symbola fallback font
230-
# (Debian/Ubuntu package ``fonts-symbola``; MiKTeX fetches it on
231-
# demand on Windows).
241+
# to the Noto fallback fonts (SIL OFL, fully redistributable):
242+
# * Noto Sans Symbols 2 -- miscellaneous symbols, arrows, dingbats,
243+
# box drawing, geometric shapes...
244+
# * Noto Emoji -- monochrome emoji (XeLaTeX does not render the
245+
# COLR/CPAL color tables of Noto Color Emoji reliably).
246+
# Install:
247+
# * Debian/Ubuntu: ``apt install fonts-noto-core fonts-noto-mono
248+
# fonts-noto-extra`` (``fonts-noto-extra`` ships the monochrome
249+
# ``NotoEmoji-Regular.ttf``; ``fonts-noto-color-emoji`` is *not*
250+
# usable because XeTeX rejects CBDT/CBLC color-bitmap fonts).
251+
# * Windows: MiKTeX fetches the ``noto`` and ``noto-emoji`` packages
252+
# on demand (``xelatex -enable-installer ...``).
232253
"preamble": r"""
233254
\usepackage{amsmath}
234255
\usepackage{amssymb}
235256
\usepackage{mathrsfs}
236257
\usepackage{fontspec}
237-
\newfontfamily\unicodefallback{Symbola}[Scale=MatchLowercase]
238-
\usepackage[Symbols]{ucharclasses}
239-
\setTransitionsForSymbols{\unicodefallback}{\normalfont}
258+
% Three complementary Noto fallbacks -- a single one does NOT cover every
259+
% Unicode sub-block we hit in the docs:
260+
% * Noto Sans Symbols -- arrows, box drawing, geometric shapes,
261+
% misc. symbols (info, check mark, sparkles, ...), dingbats.
262+
% * Noto Sans Symbols 2 -- box drawing extensions, transport, misc
263+
% technical (⏱), and a number of pictographs (🛠 🏗 📁 ...) the
264+
% monochrome Noto Emoji does NOT include.
265+
% * Noto Sans Mono -- box drawing characters (─..╿), which
266+
% none of the Symbols fonts cover.
267+
% * Noto Emoji (mono) -- emoji-presentation chars (✅ ✨ ❌ ⚠ ℹ).
268+
% Noto Color Emoji is not used because XeTeX rejects CBDT/CBLC bitmap
269+
% fonts. A few extended pictographs (🧱 🧠 🧩 🧹) live ONLY in Noto
270+
% Color Emoji and remain reported as missing -- acceptable cosmetic
271+
% limitation.
272+
\newfontfamily\symbolsfallback{NotoSansSymbols-Regular.ttf}[Scale=MatchLowercase]
273+
\newfontfamily\symbolstwofallback{NotoSansSymbols2-Regular.ttf}[Scale=MatchLowercase]
274+
\newfontfamily\monofallback{NotoSansMono-Regular.ttf}[Scale=MatchLowercase]
275+
\newfontfamily\emojifallback{NotoEmoji-Regular.ttf}[Scale=MatchLowercase]
276+
\usepackage[Latin,Arrows,LetterlikeSymbols,BoxDrawing,GeometricShapes,Dingbats,MiscellaneousSymbols,MiscellaneousSymbolsAndArrows,MiscellaneousTechnical,Emoticons,MiscellaneousSymbolsAndPictographs,SupplementalSymbolsAndPictographs,TransportAndMapSymbols,SymbolsAndPictographsExtendedA]{ucharclasses}
277+
% Force an explicit transition back to the main font whenever we re-enter
278+
% a Latin block. Without this, XeTeX keeps the last font set by a
279+
% Symbols/Emoji transition for every following Latin character, producing
280+
% thousands of "Missing character: There is no <letter> in font Noto Emoji"
281+
% warnings and a broken PDF.
282+
\setTransitionsForLatin{\normalfont}{}
283+
% Route blocks to the font that actually covers them. Coverage notes:
284+
% * Box Drawing / Block Elements / Misc Technical live in Symbols 2.
285+
% * ⚠ ✅ ✨ ➝ (Misc Symbols / Dingbats with emoji presentation)
286+
% are only in Noto Emoji.
287+
\setTransitionsFor{Arrows}{\symbolsfallback}{\normalfont}
288+
\setTransitionsFor{LetterlikeSymbols}{\emojifallback}{\normalfont}
289+
\setTransitionsFor{BoxDrawing}{\monofallback}{\normalfont}
290+
\setTransitionsFor{GeometricShapes}{\symbolsfallback}{\normalfont}
291+
\setTransitionsFor{Dingbats}{\emojifallback}{\normalfont}
292+
\setTransitionsFor{MiscellaneousSymbols}{\emojifallback}{\normalfont}
293+
\setTransitionsFor{MiscellaneousSymbolsAndArrows}{\symbolstwofallback}{\normalfont}
294+
\setTransitionsFor{MiscellaneousTechnical}{\symbolstwofallback}{\normalfont}
295+
\setTransitionsFor{Emoticons}{\emojifallback}{\normalfont}
296+
\setTransitionsFor{MiscellaneousSymbolsAndPictographs}{\emojifallback}{\normalfont}
297+
\setTransitionsFor{SupplementalSymbolsAndPictographs}{\emojifallback}{\normalfont}
298+
\setTransitionsFor{TransportAndMapSymbols}{\emojifallback}{\normalfont}
299+
\setTransitionsFor{SymbolsAndPictographsExtendedA}{\emojifallback}{\normalfont}
300+
% Individual overrides for codepoints that are NOT in the block-routed
301+
% font but exist in another installed Noto. The strategy is:
302+
% 1. Reset the codepoint's XeTeX charclass to 0 so the broader block's
303+
% \setTransitionsFor (which would switch to Noto Emoji and miss
304+
% the glyph) no longer fires for it.
305+
% 2. Use \newunicodechar to redefine the character as a macro that
306+
% locally switches to Symbols 2 and emits the glyph via \char to
307+
% avoid the infinite recursion that would occur if the macro body
308+
% contained the active character itself.
309+
% * ➝ HEAVY ROUND-TIPPED RIGHTWARDS ARROW (Dingbats).
310+
% * 🛠 🏗 🗃 🖼: pictographs missing from Noto Emoji but present
311+
% in Symbols 2.
312+
\XeTeXcharclass"279D=0
313+
\XeTeXcharclass"1F6E0=0
314+
\XeTeXcharclass"1F3D7=0
315+
\XeTeXcharclass"1F5C3=0
316+
\XeTeXcharclass"1F5BC=0
317+
\usepackage{newunicodechar}
318+
\newunicodechar{➝}{{\symbolstwofallback\char"279D\relax}}
319+
\newunicodechar{🛠}{{\symbolstwofallback\char"1F6E0\relax}}
320+
\newunicodechar{🏗}{{\symbolstwofallback\char"1F3D7\relax}}
321+
\newunicodechar{🗃}{{\symbolstwofallback\char"1F5C3\relax}}
322+
\newunicodechar{🖼}{{\symbolstwofallback\char"1F5BC\relax}}
323+
% Discard U+FE0F (VARIATION SELECTOR-16) at the input layer: it has no
324+
% visible glyph, only requests the emoji presentation of the preceding
325+
% codepoint. catcode 9 means "ignored character", so XeTeX drops it before
326+
% font selection -- no more "Missing character" warnings for it.
327+
\catcode"FE0F=9\relax
328+
% Silence cosmetic xelatex/LaTeX warnings that are inherent to the
329+
% current Sphinx 9 + XeTeX + Noto fallback setup and do not affect the
330+
% visible PDF output. We deliberately keep real errors and undefined
331+
% cross-references visible.
332+
% * \tracinglostchars=0 mutes the XeTeX "Missing character" terminal
333+
% messages for the handful of color-bitmap-only emoji (🧱🧩🧠🧹)
334+
% that no monochrome Noto font ships.
335+
% * \hbadness / \vbadness raise the threshold so the engine no longer
336+
% reports Underfull \hbox/\vbox notices caused by long identifier
337+
% names in narrow table columns and code-style paragraphs.
338+
% * The silence filters drop predictable, harmless package messages.
339+
\tracinglostchars=0
340+
% Sphinx resets \hbadness/\vbadness at \begin{document}, so we re-apply
341+
% the thresholds inside an \AtBeginDocument hook to silence Underfull
342+
% \hbox/\vbox reports. Note: residual Overfull \hbox notices inside
343+
% Sphinx tabulary/varwidth cells are not suppressible from here (Sphinx
344+
% resets \hfuzz locally) and remain as informative typographic notices.
345+
\AtBeginDocument{%
346+
\hbadness=99999\relax
347+
\vbadness=99999\relax
348+
}
349+
\usepackage{silence}
350+
\WarningFilter{latexfont}{Font shape}
351+
\WarningFilter{latexfont}{Some font shapes were not available}
352+
\WarningFilter{cmap}{pdftex not detected}
353+
\WarningFilter{longtable}{Table widths have changed}
354+
\WarningFilter{rerunfilecheck}{File}
240355
% Prevent orphan section headings at the bottom of a page: force a page
241356
% break if there is not enough room for the heading plus a few lines of
242357
% its following paragraph.
@@ -249,6 +364,11 @@ def exclude_api_from_gettext(app):
249364
+ "\n".join(f"\\newcommand{{\\{cmd}}}{{{defn}}}" for cmd, defn in macros.items()),
250365
}
251366
latex_engine = "xelatex"
367+
# Sphinx 9 emits the Python Module Index with a \detokenize/\sphinxstyleindexpageref
368+
# pattern whose key does not match the corresponding \label definitions, producing
369+
# spurious "undefined reference" warnings on every entry. The HTML build keeps a
370+
# fully functional modindex; the PDF one is not worth its bogus warnings.
371+
latex_domain_indices = False
252372

253373
# -- MathJax configuration for HTML output -----------------------------------
254374
mathjax3_config = {

doc/index.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ DataLab has been funded, chronologically, by the following stakeholders:
7777
.. tabularcolumns:: |>{\centering\arraybackslash}m{2cm}|m{\dimexpr\linewidth-2cm-4\tabcolsep\relax}|
7878
.. list-table::
7979
:header-rows: 0
80-
:widths: 8 92
8180

8281
* - |cea_logo|
8382
- `CEA <https://www.cea.fr>`_, the French Alternative Energies and Atomic Energy Commission, is the major investor in DataLab, and is the main contributor to the project.

doc/intro/index.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ DataLab integrates seemlessly into your workflow thanks to three main operating
1616
.. tabularcolumns:: |>{\centering\arraybackslash}m{2cm}|m{\dimexpr\linewidth-2cm-4\tabcolsep\relax}|
1717
.. list-table::
1818
:header-rows: 0
19-
:widths: 8 92
2019

2120
* - |appmode|
2221
- **Stand-alone application**, with a graphical user interface that allows you to interact with your data and visualize the results of your analysis in real time.

scripts/build_doc.bat

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,31 @@ mkdir %MODNAME%\data\doc
2424
@REM responsibility (run scripts\update_screenshots.bat or the dedicated VS Code
2525
@REM task "??? Refresh doc screenshots") and are committed as-is, which lets the
2626
@REM CI doc workflows build the PDF without launching DataLab/Qt.
27+
setlocal enabledelayedexpansion
2728
for %%L in (fr en) do (
2829
set LANG=%%L
2930
if exist build\doc ( rmdir /s /q build\doc )
3031
sphinx-build -b latex -D language=%%L doc build\doc
3132
cd build\doc
32-
echo Building PDF documentation for %%L...
33-
xelatex -interaction=nonstopmode -quiet DataLab.tex
33+
@REM Sphinx >= 9 emits a lowercased .tex filename (``datalab.tex``)
34+
@REM regardless of ``project = "DataLab"``. Auto-discover it instead
35+
@REM of hardcoding ``DataLab.tex``: NTFS hides the breakage locally
36+
@REM (case-insensitive lookup) but a case-sensitive filesystem fails.
37+
for %%F in (*.tex) do set "MAIN_TEX=%%F"
38+
echo Building PDF documentation for %%L from !MAIN_TEX!...
39+
@REM -enable-installer: let MiKTeX silently auto-install missing TeX
40+
@REM packages (e.g. ``noto`` and ``noto-emoji`` for the Unicode
41+
@REM fallback fonts referenced by doc/conf.py) instead of popping up
42+
@REM its Qt-based installer dialog (which fails when the venv shadows
43+
@REM Qt plugins). No effect on CI (Ubuntu + TeX Live).
44+
xelatex -enable-installer -interaction=nonstopmode -halt-on-error !MAIN_TEX!
3445
@REM Build again to fix table of contents (workaround)
35-
xelatex -interaction=nonstopmode -quiet DataLab.tex
46+
xelatex -enable-installer -interaction=nonstopmode -halt-on-error !MAIN_TEX!
3647
echo Done.
3748
cd ..\..
38-
move /Y build\doc\DataLab.pdf %MODNAME%\data\doc\DataLab_%%L.pdf
49+
move /Y build\doc\!MAIN_TEX:.tex=.pdf! %MODNAME%\data\doc\DataLab_%%L.pdf
3950
)
51+
endlocal
4052

4153
@REM explorer %MODNAME%\data\doc
4254

0 commit comments

Comments
 (0)