diff options
author | Laria Carolin Chabowski <laria@laria.me> | 2020-03-11 22:35:27 +0100 |
---|---|---|
committer | Laria Carolin Chabowski <laria@laria.me> | 2020-03-11 22:35:27 +0100 |
commit | 3e2254d676193ef6c628250617c675b9ff724b4c (patch) | |
tree | fe705722d015c0c813d4ee6f0513f127b53dd4af | |
download | falseknees-atom-master.tar.gz falseknees-atom-master.tar.bz2 falseknees-atom-master.zip |
falseknees-atom.lua is a simple scraper of falseknees.com/archive that
outputs an atom feed for consumption by newsboat or similar newsreaders
that can get a feed from some external command.
-rw-r--r-- | LICENSE | 13 | ||||
-rw-r--r-- | README.md | 35 | ||||
-rwxr-xr-x | falseknees-atom.lua | 120 |
3 files changed, 168 insertions, 0 deletions
@@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + 14 rue de Plaisance, 75014 Paris, France + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f42bc6e --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +falseknees-atom +=============== + +A simple Lua script that generates an Atom Feed of the fantastic +[False Knees][falseknees] webcomic. + +Requirements +------------ + +You need to have [Lua][lua] (5.3 or later) and [LuaSocket][luasocket] installed. + +Usage +----- + +Executing `falseknees-atom.lua` will write an Atom Feed with all the comic +strips to stdout. When you are using a feed reader that can take it's input from +an external binary, you can just point it to the script. E.g. here is an entry +for [Newsboat][newsboat]'s `~/.newsboat/urls` that I use: + + exec:~/src/falseknees-atom/falseknees-atom.lua + +`~/src/falseknees-atom/` is where this repo lives on my machine. Adapt the path +to fit wherever you put it. + +Why? +---- + +I want to know when the latest comic arrives, but +<http://falseknees.com/rss.xml> does not seem to get updated any more :(. + + +[falseknees]: https://falseknees.com +[lua]: https://lua.org +[luasocket]: http://w3.impa.br/~diego/software/luasocket/home.html +[newsboat]: https://newsboat.org/ diff --git a/falseknees-atom.lua b/falseknees-atom.lua new file mode 100755 index 0000000..584617c --- /dev/null +++ b/falseknees-atom.lua @@ -0,0 +1,120 @@ +#!/usr/bin/lua + +local string = require "string" +local http = require "socket.http" + +local function must_get_page(url) + local data, status = http.request(url) + if status ~= 200 then + error("Failed getting '" .. url .. "': Got status " .. status .. "\n") + end + return data +end + +local function xmlesc(text) + text = string.gsub(text, "&", "&") + text = string.gsub(text, "<", "<") + text = string.gsub(text, ">", ">") + text = string.gsub(text, '"', """) + text = string.gsub(text, "'", "'") + return text +end + +local function unesc_html(s) + return string.gsub(s, '&([^;]+);', { + amp = "&", + lt = "<", + gt = ">", + quot = '"', + apos = "'", + }) +end + +local months = { + January = 1, + February = 2, + March = 3, + April = 4, + May = 5, + June = 6, + July = 7, + August = 8, + September = 9, + October = 10, + November = 11, + December = 12, +} + +-- Get the permanent URL of the current comic (index.html) +local function resolve_index() + local indexsrc = must_get_page("https://falseknees.com/index.html") + local url = string.match(indexsrc, '<meta%s+http%-equiv="refresh"%s+content="%d+;%s*URL=([^"]+)"%s*/?>') + return unesc_html(url) +end + +local pagesrc = must_get_page("https://falseknees.com/archive.html") + +local maxdate +local entries = {} + +local PATTERN = '<a href="([^"]+)">(%w+) (%d+)%w+, (%d+)%s*%-%s*([^<>]-)</a>' +for url, monthname, day, year, title in string.gmatch(pagesrc, PATTERN) do + url = unesc_html(url) + monthname = unesc_html(monthname) + day = tonumber(unesc_html(day), 10) + year = tonumber(unesc_html(year), 10) + title = unesc_html(title) + + if url == "index.html" then + url = resolve_index() + end + + if not url then goto continue end + + url = "https://falseknees.com/" .. url + + local month = assert(months[monthname], "unknown month") + + -- We fon't know the time, lets assume midnight at UTC+0 + local date = string.format("%04d-%02d-%02dT00:00:00Z", year, month, day) + + if not maxdate or date > maxdate then + maxdate = date + end + + entries[#entries+1] = { + url = url, + title = title, + date = date + } + + ::continue:: +end + +if not maxdate then + error("No entry successfully parsed") +end + +-- A UUID URN with a UUID generated by `uuid -r`. +local FEED_ID = "urn:uuid:7e86b012-c226-41b3-945e-bab3341c65e9" + +print(string.format([[<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <title>False Knees</title> + <link href="http://example.org/" /> + <id>%s</id> + <author><name>Joshua Barkman</name></author> + <updated>%s</updated>]], xmlesc(FEED_ID), xmlesc(maxdate))) + +for _, entry in ipairs(entries) do + print(string.format([[ + <entry> + <title>%s</title> + <link rel="alternate" type="text/html" href="%s" /> + <id>%s</id> + <updated>%s</updated> + </entry> + ]], xmlesc(entry.title), xmlesc(entry.url), xmlesc(FEED_ID .. "#" .. entry.url), xmlesc(entry.date))) +end + +print("</feed>") |