falseknees-atom.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

#!/usr/bin/lua

local string = require "string"
local http = require "socket.http"

local function must_get_page(url)
  local data, status = http.request(url)
  if status ~= 200 then
    error("Failed getting '" .. url .. "': Got status " .. status .. "\n")
  end
  return data
end

local function xmlesc(text)
  text = string.gsub(text, "&", "&amp;")
  text = string.gsub(text, "<", "&lt;")
  text = string.gsub(text, ">", "&gt;")
  text = string.gsub(text, '"', "&quot;")
  text = string.gsub(text, "'", "&apos;")
  return text
end

local function unesc_html(s)
  return string.gsub(s, '&([^;]+);', {
    amp = "&",
    lt = "<",
    gt = ">",
    quot = '"',
    apos = "'",
  })
end

local months = {
  January = 1,
  February = 2,
  March = 3,
  April = 4,
  May = 5,
  June = 6,
  July = 7,
  August = 8,
  September = 9,
  October = 10,
  November = 11,
  December = 12,
}

-- Get the permanent URL of the current comic (index.html)
local function resolve_index()
  local indexsrc = must_get_page("https://falseknees.com/index.html")
  local url = string.match(indexsrc, '<meta%s+http%-equiv="refresh"%s+content="%d+;%s*URL=([^"]+)"%s*/?>')
  return unesc_html(url)
end

local pagesrc = must_get_page("https://falseknees.com/archive.html")

local maxdate
local entries = {}

local PATTERN = '<a href="([^"]+)">(%w+) (%d+)%w+, (%d+)%s*%-%s*([^<>]-)</a>'
for url, monthname, day, year, title in string.gmatch(pagesrc, PATTERN) do
  url = unesc_html(url)
  monthname = unesc_html(monthname)
  day = tonumber(unesc_html(day), 10)
  year = tonumber(unesc_html(year), 10)
  title = unesc_html(title)

  if url == "index.html" then
    url = resolve_index()
  end

  if not url then goto continue end

  url = "https://falseknees.com/" .. url

  local month = assert(months[monthname], "unknown month")

  -- We fon't know the time, lets assume midnight at UTC+0
  local date = string.format("%04d-%02d-%02dT00:00:00Z", year, month, day)

  if not maxdate or date > maxdate then
    maxdate = date
  end

  entries[#entries+1] = {
    url = url,
    title = title,
    date = date
  }

  ::continue::
end

if not maxdate then
  error("No entry successfully parsed")
end

-- A UUID URN with a UUID generated by `uuid -r`.
local FEED_ID = "urn:uuid:7e86b012-c226-41b3-945e-bab3341c65e9"

print(string.format([[<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>False Knees</title>
  <link href="http://example.org/" />
  <id>%s</id>
  <author><name>Joshua Barkman</name></author>
  <updated>%s</updated>]], xmlesc(FEED_ID), xmlesc(maxdate)))

for _, entry in ipairs(entries) do
  print(string.format([[
    <entry>
      <title>%s</title>
      <link rel="alternate" type="text/html" href="%s" />
      <id>%s</id>
      <updated>%s</updated>
    </entry>
  ]], xmlesc(entry.title), xmlesc(entry.url), xmlesc(FEED_ID .. "#" .. entry.url), xmlesc(entry.date)))
end

print("</feed>")