1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#!/usr/bin/lua
local string = require "string"
local http = require "socket.http"
local function must_get_page(url)
local data, status = http.request(url)
if status ~= 200 then
error("Failed getting '" .. url .. "': Got status " .. status .. "\n")
end
return data
end
local function xmlesc(text)
text = string.gsub(text, "&", "&")
text = string.gsub(text, "<", "<")
text = string.gsub(text, ">", ">")
text = string.gsub(text, '"', """)
text = string.gsub(text, "'", "'")
return text
end
local function unesc_html(s)
return string.gsub(s, '&([^;]+);', {
amp = "&",
lt = "<",
gt = ">",
quot = '"',
apos = "'",
})
end
local months = {
January = 1,
February = 2,
March = 3,
April = 4,
May = 5,
June = 6,
July = 7,
August = 8,
September = 9,
October = 10,
November = 11,
December = 12,
}
-- Get the permanent URL of the current comic (index.html)
local function resolve_index()
local indexsrc = must_get_page("https://falseknees.com/index.html")
local url = string.match(indexsrc, '<meta%s+http%-equiv="refresh"%s+content="%d+;%s*URL=([^"]+)"%s*/?>')
return unesc_html(url)
end
local pagesrc = must_get_page("https://falseknees.com/archive.html")
local maxdate
local entries = {}
local PATTERN = '<a href="([^"]+)">(%w+) (%d+)%w+, (%d+)%s*%-%s*([^<>]-)</a>'
for url, monthname, day, year, title in string.gmatch(pagesrc, PATTERN) do
url = unesc_html(url)
monthname = unesc_html(monthname)
day = tonumber(unesc_html(day), 10)
year = tonumber(unesc_html(year), 10)
title = unesc_html(title)
if url == "index.html" then
url = resolve_index()
end
if not url then goto continue end
url = "https://falseknees.com/" .. url
local month = assert(months[monthname], "unknown month")
-- We fon't know the time, lets assume midnight at UTC+0
local date = string.format("%04d-%02d-%02dT00:00:00Z", year, month, day)
if not maxdate or date > maxdate then
maxdate = date
end
entries[#entries+1] = {
url = url,
title = title,
date = date
}
::continue::
end
if not maxdate then
error("No entry successfully parsed")
end
-- A UUID URN with a UUID generated by `uuid -r`.
local FEED_ID = "urn:uuid:7e86b012-c226-41b3-945e-bab3341c65e9"
print(string.format([[<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>False Knees</title>
<link href="http://example.org/" />
<id>%s</id>
<author><name>Joshua Barkman</name></author>
<updated>%s</updated>]], xmlesc(FEED_ID), xmlesc(maxdate)))
for _, entry in ipairs(entries) do
print(string.format([[
<entry>
<title>%s</title>
<link rel="alternate" type="text/html" href="%s" />
<id>%s</id>
<updated>%s</updated>
</entry>
]], xmlesc(entry.title), xmlesc(entry.url), xmlesc(FEED_ID .. "#" .. entry.url), xmlesc(entry.date)))
end
print("</feed>")
|