Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions pep_sphinx_extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
from docutils.writers.html5_polyglot import HTMLTranslator
from sphinx import environment

from pep_sphinx_extensions.generate_rss import (
create_rss_feed,
get_from_doctree,
pep_abstract,
)
from pep_sphinx_extensions.doctree import get_from_doctree
from pep_sphinx_extensions.generate_bibtex import create_bibtex_files
from pep_sphinx_extensions.generate_rss import create_rss_feed
from pep_sphinx_extensions.pep_processor.html import (
pep_html_builder,
pep_html_translator,
Expand Down Expand Up @@ -51,6 +49,7 @@ def _post_build(app: Sphinx, exception: Exception | None) -> None:
if "internal_builder" not in app.tags:
create_index_file(Path(app.outdir), app.builder.name)
create_rss_feed(app.doctreedir, app.outdir)
create_bibtex_files(app.doctreedir, app.outdir)


def set_description(
Expand Down
47 changes: 47 additions & 0 deletions pep_sphinx_extensions/doctree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This file is placed in the public domain or under the
# CC0-1.0-Universal license, whichever is more permissive.

from __future__ import annotations

import pickle
from pathlib import Path

from docutils import nodes

document_cache: dict[Path, dict[str, str]] = {}


def pep_abstract(document: nodes.document) -> str:
"""Return the first paragraph of the PEP abstract.
If not found, return the first paragraph of the introduction.
"""
introduction = ""
for node in document.findall(nodes.section):
title_node = node.next_node(nodes.title)
if title_node is None:
continue

if title_node.astext() == "Abstract":
if (para_node := node.next_node(nodes.paragraph)) is not None:
return para_node.astext().strip().replace("\n", " ")
return ""
if title_node.astext() == "Introduction":
introduction = node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")

return introduction


def get_from_doctree(full_path: Path, text: str) -> str:
"""Retrieve a header value from a pickled doctree, with caching."""
# Try and retrieve from cache
if full_path in document_cache:
return document_cache[full_path].get(text, "")

# Else load doctree
document = pickle.loads(full_path.read_bytes())
# Store the headers (populated in the PEPHeaders transform)
document_cache[full_path] = path_cache = document.get("headers", {})
# Store the Abstract
path_cache["Abstract"] = pep_abstract(document)
# Return the requested key
return path_cache.get(text, "")
73 changes: 73 additions & 0 deletions pep_sphinx_extensions/generate_bibtex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This file is placed in the public domain or under the
# CC0-1.0-Universal license, whichever is more permissive.

from __future__ import annotations

import re
import textwrap
from pathlib import Path

from pep_sphinx_extensions.doctree import get_from_doctree

# LaTeX special characters that need escaping in BibTeX values
_BIBTEX_SPECIAL = re.compile(r"([&%$#_{}~^])")
_EMAIL_ADDRESS = re.compile(r"\s*<[^>]+>")


def _escape_bibtex(text: str) -> str:
"""Escape special BibTeX characters in a string."""
return _BIBTEX_SPECIAL.sub(r"\\\1", text)


def _parse_created(created: str) -> tuple[str, str]:
"""Parse a PEP 'Created' date string (e.g. '01-Jan-2020') into (year, month).

Returns the year as a string and the BibTeX month abbreviation.
"""
_, month_abbr, year = created.split("-")
return year, month_abbr.lower()


def _format_authors(author_header: str) -> str:
"""Format the Author header value for BibTeX.

Strips email addresses and joins names with " and ".
"""
# Remove email addresses in angle brackets
author_header = _EMAIL_ADDRESS.sub("", author_header)
# Split on commas and clean up
authors = [name.strip() for name in author_header.split(",") if name.strip()]
return " and ".join(authors)


def _generate_bibtex_entry(full_path: Path) -> str:
"""Generate a BibTeX entry for a single PEP from its doctree."""
number = int(get_from_doctree(full_path, "PEP"))
created = get_from_doctree(full_path, "Created")
author = get_from_doctree(full_path, "Author")
title = get_from_doctree(full_path, "Title")

year, month = _parse_created(created)
authors_bibtex = _escape_bibtex(_format_authors(author))
title_escaped = _escape_bibtex(title)

return textwrap.dedent(f"""\
@techreport{{pep{number},
author = "{authors_bibtex}",
title = "PEP {number} --- {title_escaped}",
institution = "Python Software Foundation",
year = "{year}",
month = {month},
type = "PEP",
number = "{number}",
url = "https://peps.python.org/pep-{number:0>4}/",
}}""")


def create_bibtex_files(doctree_dir: str, output_dir: str) -> None:
"""Generate a .bib file for each PEP in the output directory."""
out = Path(output_dir)
for doctree_file in Path(doctree_dir).glob("pep-????.doctree"):
pep_name = doctree_file.stem # for example "pep-0008"
entry = _generate_bibtex_entry(doctree_file)
(out / f"{pep_name}.bib").write_text(entry + "\n", encoding="utf-8")
41 changes: 1 addition & 40 deletions pep_sphinx_extensions/generate_rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
from __future__ import annotations

import datetime as dt
import pickle
from email.utils import format_datetime, getaddresses
from html import escape
from pathlib import Path

from docutils import nodes
from pep_sphinx_extensions.doctree import get_from_doctree

RSS_DESCRIPTION = (
"Newest Python Enhancement Proposals (PEPs): "
Expand All @@ -23,24 +22,6 @@ def _format_rfc_2822(datetime: dt.datetime) -> str:
return format_datetime(datetime, usegmt=True)


document_cache: dict[Path, dict[str, str]] = {}


def get_from_doctree(full_path: Path, text: str) -> str:
# Try and retrieve from cache
if full_path in document_cache:
return document_cache[full_path].get(text, "")

# Else load doctree
document = pickle.loads(full_path.read_bytes())
# Store the headers (populated in the PEPHeaders transform)
document_cache[full_path] = path_cache = document.get("headers", {})
# Store the Abstract
path_cache["Abstract"] = pep_abstract(document)
# Return the requested key
return path_cache.get(text, "")


def pep_creation(full_path: Path) -> dt.datetime:
created_str = get_from_doctree(full_path, "Created")
try:
Expand All @@ -49,26 +30,6 @@ def pep_creation(full_path: Path) -> dt.datetime:
return dt.datetime.min


def pep_abstract(document: nodes.document) -> str:
"""Return the first paragraph of the PEP abstract.
If not found, return the first paragraph of the introduction.
"""
introduction = ""
for node in document.findall(nodes.section):
title_node = node.next_node(nodes.title)
if title_node is None:
continue

if title_node.astext() == "Abstract":
if (para_node := node.next_node(nodes.paragraph)) is not None:
return para_node.astext().strip().replace("\n", " ")
return ""
if title_node.astext() == "Introduction":
introduction = node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")

return introduction


def _generate_items(doctree_dir: Path):
# get list of peps with creation time (from "Created:" string in pep source)
peps_with_dt = sorted((pep_creation(path), path) for path in doctree_dir.glob("pep-????.doctree"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def apply(self) -> None:
self.document += nodes.transition()
self.document += _add_source_link(pep_source_path)
self.document += _add_commit_history_info(pep_source_path)
self.document += _add_bibtex_link(pep_source_path)


def _add_source_link(pep_source_path: Path) -> nodes.paragraph:
Expand All @@ -71,6 +72,13 @@ def _add_commit_history_info(pep_source_path: Path) -> nodes.paragraph:
return nodes.paragraph("", "Last modified: ", link_node)


def _add_bibtex_link(pep_source_path: Path) -> nodes.paragraph:
"""Add link to download BibTeX citation."""
bib_url = f"{pep_source_path.stem}.bib"
link_node = nodes.reference("", "BibTeX", refuri=bib_url)
return nodes.paragraph("", "Cite: ", link_node)


def _get_last_modified_timestamps():
# get timestamps and changed files from all commits (without paging results)
args = ("git", "--no-pager", "log", "--format=#%at", "--name-only")
Expand Down
150 changes: 150 additions & 0 deletions pep_sphinx_extensions/tests/test_generate_bibtex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from pathlib import Path
from unittest.mock import patch

import pytest

from pep_sphinx_extensions.generate_bibtex import (
_escape_bibtex,
_format_authors,
_generate_bibtex_entry,
_parse_created,
create_bibtex_files,
)

MOCK_TARGET = "pep_sphinx_extensions.generate_bibtex.get_from_doctree"

PEP_8_HEADERS = {
"PEP": "8",
"Title": "Style Guide for Python Code",
"Author": "Guido van Rossum, Barry Warsaw, Alyssa Coghlan",
"Created": "05-Jul-2001",
}


def _mock_doctree(headers: dict[str, str]):
"""Return a mock get_from_doctree that returns values from headers dict."""
return lambda full_path, text: headers.get(text, "")


@pytest.mark.parametrize(
("text", "expected"),
[
("Hello World", "Hello World"),
("Tom & Jerry", r"Tom \& Jerry"),
("100%", r"100\%"),
("$x$", r"\$x\$"),
("C#", r"C\#"),
("snake_case", r"snake\_case"),
("{}", r"\{\}"),
("~tilde", r"\~tilde"),
("no specials", "no specials"),
],
)
def test_escape_bibtex(text: str, expected: str) -> None:
assert _escape_bibtex(text) == expected


@pytest.mark.parametrize(
("created", "expected"),
[
("01-Jan-1990", ("1990", "jan")),
("15-Sep-2021", ("2021", "sep")),
("28-Feb-2000", ("2000", "feb")),
],
)
def test_parse_created(created: str, expected: tuple[str, str]) -> None:
assert _parse_created(created) == expected


@pytest.mark.parametrize(
("author_header", "expected"),
[
("Cardinal Ximénez", "Cardinal Ximénez"),
(
"Cardinal Ximénez <Cardinal.Ximenez@spanish.inquisition>,"
" Cardinal Biggles <Cardinal.Biggles@spanish.inquisition>",
"Cardinal Ximénez and Cardinal Biggles",
),
(
"Cardinal Ximénez,\n Cardinal Biggles",
"Cardinal Ximénez and Cardinal Biggles",
),
(
"Cardinal Ximénez, Cardinal Biggles, Cardinal Fang",
"Cardinal Ximénez and Cardinal Biggles and Cardinal Fang",
),
],
)
def test_format_authors(author_header: str, expected: str) -> None:
assert _format_authors(author_header) == expected


def test_generate_bibtex_entry() -> None:
# Arrange / Act
with patch(MOCK_TARGET, _mock_doctree(PEP_8_HEADERS)):
result = _generate_bibtex_entry(Path("pep-0008.doctree"))

# Assert
assert "@techreport{pep8," in result
assert 'author = "Guido van Rossum and Barry Warsaw and Alyssa Coghlan"' in result
assert 'title = "PEP 8 --- Style Guide for Python Code"' in result
assert 'year = "2001"' in result
assert "month = jul," in result
assert 'number = "8"' in result
assert 'url = "https://peps.python.org/pep-0008/"' in result


def test_generate_bibtex_entry_title_escaped() -> None:
# Arrange
headers = {**PEP_8_HEADERS, "PEP": "999", "Title": "Use of $ & % in PEPs"}

# Act
with patch(MOCK_TARGET, _mock_doctree(headers)):
result = _generate_bibtex_entry(Path("pep-0999.doctree"))

# Assert
assert r"Use of \$ \& \% in PEPs" in result


def test_generate_bibtex_entry_author_escaped() -> None:
# Arrange
headers = {**PEP_8_HEADERS, "Author": "Tom & Jerry <tj@example.com>"}

# Act
with patch(MOCK_TARGET, _mock_doctree(headers)):
result = _generate_bibtex_entry(Path("pep-0008.doctree"))

# Assert
assert r"Tom \& Jerry" in result


def test_create_bibtex_files(tmp_path: Path) -> None:
# Arrange
doctree_dir = tmp_path / "doctrees"
doctree_dir.mkdir()
output_dir = tmp_path / "output"
output_dir.mkdir()
(doctree_dir / "pep-0008.doctree").touch()

# Act
with patch(MOCK_TARGET, _mock_doctree(PEP_8_HEADERS)):
create_bibtex_files(str(doctree_dir), str(output_dir))

# Assert
bib = (output_dir / "pep-0008.bib").read_text()
assert "@techreport{pep8," in bib
assert 'author = "Guido van Rossum and Barry Warsaw and Alyssa Coghlan"' in bib


def test_create_bibtex_files_no_doctrees(tmp_path: Path) -> None:
# Arrange
doctree_dir = tmp_path / "doctrees"
doctree_dir.mkdir()
output_dir = tmp_path / "output"
output_dir.mkdir()

# Act
create_bibtex_files(str(doctree_dir), str(output_dir))

# Assert
assert list(output_dir.glob("*.bib")) == []
Loading