Building a blog in Django#

We launched the Datasette Cloud blog today. The Datasette Cloud site itself is a Django app - it uses Django and PostgreSQL to manage accounts, teams and soon billing and payments, then launches dedicated containers running Datasette for each customer.

It’s been a while since I’ve built a new blog implementation in Django! I decided to make notes for the next time.

Features#

Here are the features I consider to be essential for a blog in 2023 (though they haven’t changed much in over a decade):

Blog posts have a title, summary, body and publication date. Optional: author information, tags
Posts can be live or draft
The blog index page shows the most recent entries
Older entries are available via some kind of archive mechanism
The blog has an Atom feed
Entries have social media card metadata, to enhance links to them on Mastodon and Twitter
Markdown is a nice-to-have for editing the posts

The models#

Here’s the Django model for the blog (I generated the first version of this with ChatGPT, then iterated on it):

1
from django.db import models
2
from django.contrib.auth.models import User
3
from django.utils import timezone
4
from django.utils.html import strip_tags
5
import markdown
6
from django.utils.html import mark_safe
7

8

9
class Tag(models.Model):
10
    name = models.CharField(max_length=50)
11
    slug = models.SlugField()
12

13
    def __str__(self):
14
        return self.name
15

16

17
class Entry(models.Model):
18
    title = models.CharField(max_length=200)
19
    created = models.DateTimeField(default=timezone.now)
20
    slug = models.SlugField()
21
    summary = models.TextField()
22
    body = models.TextField()
23
    card_image = models.URLField(
24
        blank=True, null=True, help_text="URL to image for social media cards"
25
    )
26
    authors = models.ManyToManyField(User, through="Authorship")
27
    tags = models.ManyToManyField(Tag, blank=True)
28

29
    is_draft = models.BooleanField(
30
        default=False,
31
        help_text="Draft entries do not show in index pages but can be visited directly if you know the URL",
32
    )
33

34
    class Meta:
35
        verbose_name_plural = "entries"
36

37
    @property
38
    def summary_rendered(self):
39
        return mark_safe(markdown.markdown(self.summary, output_format="html5"))
40

41
    @property
42
    def summary_text(self):
43
        return strip_tags(markdown.markdown(self.summary, output_format="html5"))
44

45
    @property
46
    def body_rendered(self):
47
        return mark_safe(markdown.markdown(self.body, output_format="html5"))
48

49
    def get_absolute_url(self):
50
        return "/blog/%d/%s/" % (self.created.year, self.slug)
51

52
    def __str__(self):
53
        return self.title
54

55

56
class Authorship(models.Model):
57
    user = models.ForeignKey(User, on_delete=models.CASCADE)
58
    entry = models.ForeignKey(Entry, on_delete=models.CASCADE)
59
    order = models.PositiveIntegerField(default=0)
60

61
    class Meta:
62
        ordering = ["order"]

It’s pretty self-explanatory. The most interesting features are the is_draft flag and the way it provides .summary_rendered and .body_rendered properties that return Markdown rendered as HTML.

The URL format for this blog is /blog/2023/welcome/ - in my experience name-spacing posts by year makes the most sense, since even the most active blogs usually only have a few posts every month.

The views#

Here are the view functions defined the views.py module for my blog/ application:

1
from django.contrib.syndication.views import Feed
2
from django.shortcuts import render, get_object_or_404
3
from django.utils.feedgenerator import Atom1Feed
4
from .models import Entry, Tag
5

6
ENTRIES_ON_HOMEPAGE = 5
7

8

9
def index(request):
10
    entries = list(
11
        Entry.objects.filter(is_draft=False).order_by("-created")[
12
            : ENTRIES_ON_HOMEPAGE + 1
13
        ]
14
    )
15
    has_more = False
16
    if len(entries) > ENTRIES_ON_HOMEPAGE:
17
        has_more = True
18
        entries = entries[:ENTRIES_ON_HOMEPAGE]
19
    return render(
20
        request, "blog/index.html", {"entries": entries, "has_more": has_more}
21
    )
22

23

24
def entry(request, year, slug):
25
    entry = get_object_or_404(Entry, created__year=year, slug=slug)
26
    return render(
27
        request,
28
        "blog/entry.html",
29
        {"entry": entry},
30
    )
31

32

33
def year(request, year):
34
    entries = Entry.objects.filter(created__year=year, is_draft=False).order_by(
35
        "-created"
36
    )
37
    return render(request, "blog/year.html", {"entries": entries, "year": year})
38

39

40
def archive(request):
41
    entries = Entry.objects.filter(is_draft=False).order_by("-created")
42
    return render(request, "blog/archive.html", {"entries": entries})
43

44

45
def tag(request, slug):
46
    tag = Tag.objects.get(slug=slug)
47
    entries = tag.entry_set.filter(is_draft=False).order_by("-created")
48
    return render(request, "blog/tag.html", {"tag": tag, "entries": entries})

The Atom feed#

The most interesting part of the views.py file is this bit - defining the Atom feed:

1
class BlogFeed(Feed):
2
    title = "Datasette Cloud"
3
    link = "/blog/"
4
    feed_type = Atom1Feed
5

6
    def items(self):
7
        return Entry.objects.filter(is_draft=False).order_by("-created")[:5]
8

9
    def item_title(self, item):
10
        return item.title
11

12
    def item_description(self, item):
13
        return item.summary_rendered + "\n" + item.body_rendered
14

15
    def item_link(self, item):
16
        return "/blog/%d/%s/" % (item.created.year, item.slug)
17

18
    def item_author_name(self, item):
19
        return (
20
            ", ".join([a.get_full_name() or str(a) for a in item.authors.all()]) or None
21
        )
22

23
    def get_feed(self, obj, request):
24
        feedgen = super().get_feed(obj, request)
25
        feedgen.content_type = "application/xml; charset=utf-8"
26
        return feedgen

This is using the Django syndication feed framework. The resulting Atom feed can be found here:

https://www.datasette.cloud/blog/feed/

There’s one extra trick here: I’m over-riding the default content-type header and setting it to "application/xml; charset=utf-8.

Django defaults to using application/atom+xml; charset=utf-8 which is correct… but causes most browsers to trigger a download rather than rendering the XML in the browser directly.

I like to be able to click on a feed link and see the XML before I paste the URL into my feed reader software, so I prefer to use application/xml instead.

It’s easy to forget these, but they’re really important - with the right markup links to posts shared on Mastodon, Twitter, LinkedIn and Facebook will look MUCH better.

Here’s a snipet from my entry.html template:

1
{% block extra_head %}
2
{% if entry.card_image %}
3
<meta name="twitter:card" content="summary_large_image">
4
<meta name="twitter:image" content="{{ entry.card_image }}">
5
{% else %}
6
<meta name="twitter:card" content="summary">
7
{% endif %}
8
<meta name="twitter:creator" content="@datasetteproj">
9
<meta property="og:url" content="https://www.datasette.cloud{{ request.path }}">
10
<meta property="og:title" content="{{ entry.title }} - Datasette Cloud">
11
{% if entry.card_image %}<meta property="og:image" content="{{ entry.card_image }}">{% endif %}
12
<meta property="og:type" content="article">
13
<meta property="og:description" content="{{ entry.summary_text }}">
14
{% if entry.is_draft %}
15
<meta name="robots" content="noindex">
16
{% endif %}
17
{% endblock %}

There’s one other detail in there: if an entry is a draft entry I serve <meta name="robots" content="noindex"> to prevent it from being accidentally indexed by search engines.

URL configuration#

Here’s the URL configuration from urls.py:

1
    # Blog
2
    path("blog/", blog_views.index),
3
    path("blog/<int:year>/<slug:slug>/", blog_views.entry),
4
    path("blog/archive/", blog_views.archive),
5
    path("blog/<int:year>/", blog_views.year),
6
    path("blog/tag/<slug:slug>/", blog_views.tag),
7
    path("blog/feed/", blog_views.BlogFeed()),

Tests#

I added a quick suite of tests, mainly to check that is_draft was working correctly but also to ensure the Atom feed works.

Testing the feed was particularly important because it’s at the highest risk of accidentally breaking without me noticing it - errors that affect the HTML of the blog are much more obvious.

1
import pytest
2
from datetime import datetime
3
from django.contrib.auth.models import User
4
from django.utils import timezone
5
from blog.models import Entry, Tag
6
from xml.etree import ElementTree as ET
7

8

9
@pytest.fixture
10
def client():
11
    from django.test import Client
12

13
    return Client()
14

15

16
@pytest.fixture
17
def five_entries():
18
    author = User.objects.create_user(username="author")
19
    all = Tag.objects.get_or_create(name="All", slug="all")[0]
20
    entries = []
21
    for i in range(5):
22
        i += 1
23
        entry = Entry.objects.create(
24
            title=f"Test Entry {i}",
25
            slug=f"test-entry-{i}",
26
            created=timezone.make_aware(datetime(2023, 5, i), timezone.utc),
27
            summary=f"This is test entry {i}",
28
            body=f"This is the body of test entry {i}.",
29
            is_draft=i == 1,
30
        )
31
        entry.authors.add(author)
32
        entry.tags.add(all)
33
        entries.append(entry)
34

35
    return entries
36

37

38
@pytest.mark.django_db
39
def test_index_page(client, five_entries):
40
    response = client.get("/blog/")
41
    html = response.content.decode("utf-8")
42

43
    # Should have five entries without a more link
44
    for i in range(5):
45
        i += 1
46
        if i == 1:
47
            # It's the draft one
48
            assert f"Test Entry {i}" not in html
49
            assert f"This is test entry {i}" not in html
50
        else:
51
            assert f"Test Entry {i}" in html
52
            assert f"This is test entry {i}" in html
53
    assert "Older entries" not in html
54

55
    # Add two more entries to get a more link
56
    Entry.objects.create(
57
        title="Test Entry 6", slug="test-entry-6", summary=".", body="."
58
    )
59
    Entry.objects.create(
60
        title="Test Entry 7", slug="test-entry-7", summary=".", body="."
61
    )
62
    response2 = client.get("/blog/")
63
    html2 = response2.content.decode("utf-8")
64
    assert "Older entries" in html2
65

66

67
@pytest.mark.django_db
68
def test_entry_page(client, five_entries):
69
    # Test a draft and a not-draft one
70
    draft_entry = five_entries[0]
71
    not_draft_entry = five_entries[1]
72
    for entry, should_be_draft in (
73
        (draft_entry, True),
74
        (not_draft_entry, False),
75
    ):
76
        response = client.get(f"/blog/{entry.created.year}/{entry.slug}/")
77
        html = response.content.decode("utf-8")
78

79
        # Check that each entry's title and body are present on their respective page
80
        assert entry.title in html
81
        assert entry.body in html
82

83
        if should_be_draft:
84
            assert "(draft)" in html
85
            assert '<meta name="robots" content="noindex">' in html
86
        else:
87
            assert "(draft)" not in html
88
            assert '<meta name="robots" content="noindex">' not in html
89

90

91
@pytest.mark.django_db
92
@pytest.mark.parametrize(
93
    "path", ("/blog/", "/blog/archive/", "/blog/2023/", "/blog/tag/all/")
94
)
95
def test_draft_entry_not_visible(client, five_entries, path):
96
    draft_entry = five_entries[0]
97
    assert draft_entry.title == "Test Entry 1"
98
    # It should not be on any of the pages
99
    response = client.get(path)
100
    html = response.content.decode("utf-8")
101
    assert draft_entry.title not in html
102

103

104
@pytest.mark.django_db
105
def test_atom_feed(client, five_entries):
106
    response = client.get("/blog/feed/")
107
    assert response.status_code == 200
108
    assert response["Content-Type"] == "application/xml; charset=utf-8"
109
    xml = response.content.decode("utf-8")
110
    et = ET.fromstring(xml)
111
    assert "<title>Datasette Cloud</title>" in xml
112
    expected_entries = [e for e in five_entries if not e.is_draft]
113
    assert len(expected_entries) == 4
114
    expected_entries.sort(key=lambda e: e.created, reverse=True)
115
    # Should have the non-draft entries
116
    entries = et.findall("{http://www.w3.org/2005/Atom}entry")
117
    assert len(entries) == 4
118
    for xml_entry, entry in zip(entries, expected_entries):
119
        assert xml_entry.find("{http://www.w3.org/2005/Atom}title").text == entry.title
120
        assert (
121
            xml_entry.find("{http://www.w3.org/2005/Atom}link").attrib["href"]
122
            == f"http://testserver/blog/{entry.created.year}/{entry.slug}/"
123
        )
124
        assert (
125
            xml_entry.find(
126
                "{http://www.w3.org/2005/Atom}author/{http://www.w3.org/2005/Atom}name"
127
            ).text
128
            == "author"
129
        )

The finished blog#

Check it out at https://www.datasette.cloud/blog/

Consider the code snippets in this TIL licensed under Apache License, Version 2.0.