Generate a tag cloud

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Generate (i.e. calculate and render) a tag cloud.

I've been using and evolving this code over many, many years.
Tag clouds definitely aren't cool anymore today, but in case
you need one, this might be of use to you.

Requires Python_ 2.7 or later (including Python 3.x) as well as
Jinja2_ (tested with 2.7.2).

.. _Python: http://python.org/
.. _Jinja2: http://jinja.pocoo.org/

:Copyright: 2005-2014 `Jochen Kupperschmidt <http://homework.nwsnet.de/>`_
:Date: 05-Apr-2014
:License: MIT
"""

from bisect import bisect_left
from collections import defaultdict, namedtuple
from operator import attrgetter

from jinja2 import Environment


# --- models ---

# An article. Can have one or more tags attached.
Article = namedtuple('Article', ['id', 'title', 'tags'])

# A tag. Can be attached to one or more articles.
Tag = namedtuple('Tag', ['name', 'weight', 'level'])


# --- cloud calculations ---

def generate_tag_cloud(articles, level_count=4):
    """Generate tags with attributes required to build a cloud."""
    article_ids_by_tag_name = group_article_ids_by_tag_name(articles)

    tags = list(create_tags(article_ids_by_tag_name))

    levels_by_tag_name = calculate_levels_by_tag_name(tags, level_count)

    tags = update_levels(tags, levels_by_tag_name)

    # Return tags sorted by name.
    return sorted(tags, key=attrgetter('name'))

def group_article_ids_by_tag_name(articles):
    """Collect ids and tag names of articles, and group by tag name."""
    article_ids_by_tag_name = defaultdict(set)

    for article in articles:
        for tag in article.tags:
            article_ids_by_tag_name[tag].add(article.id)

    return article_ids_by_tag_name

def create_tags(article_ids_by_tag_name):
    """Create tags."""
    for tag_name, article_ids in article_ids_by_tag_name.items():
        weight = len(frozenset(article_ids))
        # Level will be calculated later.
        yield Tag(name=tag_name, weight=weight, level=0)

def calculate_levels_by_tag_name(tags, level_count):
    """Calculate the relative level for each tag."""
    tags = list(tags)

    weights = list(map(attrgetter('weight'), tags))
    thresholds = list(calculate_thresholds(weights, level_count))

    def calculate_level(tag):
        return bisect_left(thresholds, tag.weight) + 1

    return {tag.name: calculate_level(tag) for tag in tags}

def calculate_thresholds(weights, level_count):
    """Calculate thresholds for the given number of levels from the
    given tag weights.
    """
    weight_range = max(weights) - min(weights) + 1
    factor = 1.27  # Experiment with this value.
    for i in range(level_count):
        yield weight_range ** (i / float(level_count)) * (factor ** i)

def update_levels(tags, levels_by_tag_name):
    """Update tags with their actual level."""
    for tag in tags:
        level = levels_by_tag_name[tag.name]
        yield tag._replace(level=level)


# --- HTML generation ---

# In case you use SCSS (http://sass-lang.com/) to generate
# stylesheets, you can use this snippet to generate the CSS
# for the levels:
#
#   .tag-cloud a {
#     $level_font_sizes: (80% 120% 150% 200%);
#     @for $i from 1 through 4 {
#       &.level-#{$i} {
#         // The first index is 1.
#         font-size: nth($level_font_sizes, $i);
#       }
#     }
#   }

HTML_TEMPLATE = """\
<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <style>
      .tag-cloud li {
        display: inline;
      }
      .tag-cloud a {
        line-height: 1.4em;
        margin: 0 0.1em;
      }
      .tag-cloud a.level-1 {
        font-size: 80%;
      }
      .tag-cloud a.level-2 {
        font-size: 120%;
      }
      .tag-cloud a.level-3 {
        font-size: 150%;
      }
      .tag-cloud a.level-4 {
        font-size: 200%;
      }
    </style>
  </head>
  <body>

    <h1>Tags <small>({{ tags|length }})</small></h1>

    <ol class="tag-cloud">
      {%- for tag in tags %}
      <li><a href="#{{ tag.name }}" title="{{ tag.weight }} article(s)" class="level-{{ tag.level }}">{{ tag.name }}</a></li>
      {%- endfor %}
    </ol>

  </body>
</html>"""

def render_html(tags):
    env = Environment(autoescape=True)
    template = env.from_string(HTML_TEMPLATE)
    return template.render(tags=tags)


# --- tests ---

def test(tags):
    assert_tag(tags[0], 'boring',        2, 3)
    assert_tag(tags[1], 'cool',          3, 4)
    assert_tag(tags[2], 'great',         1, 1)
    assert_tag(tags[3], 'revolutionary', 1, 1)
    print('All tests were successful.')

def assert_tag(tag, expected_name, expected_weight, expected_level):
    assert tag.name == expected_name
    assert tag.weight == expected_weight
    assert tag.level == expected_level


# --- entry point ---

if __name__ == '__main__':
    articles = [
        Article(1, 'First',  ['cool']),
        Article(2, 'Second', ['boring']),
        Article(3, 'Third',  ['cool', 'revolutionary']),
        Article(4, 'Fourth', ['great', 'cool']),
        Article(5, 'Fifth',  ['boring']),
    ]

    tags = generate_tag_cloud(articles)

    #test(tags)
    # or:
    print(render_html(tags))