From 3e2254d676193ef6c628250617c675b9ff724b4c Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Wed, 11 Mar 2020 22:35:27 +0100 Subject: Initial commit falseknees-atom.lua is a simple scraper of falseknees.com/archive that outputs an atom feed for consumption by newsboat or similar newsreaders that can get a feed from some external command. --- LICENSE | 13 ++++++ README.md | 35 +++++++++++++++ falseknees-atom.lua | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100755 falseknees-atom.lua diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..90107a0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + 14 rue de Plaisance, 75014 Paris, France + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f42bc6e --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +falseknees-atom +=============== + +A simple Lua script that generates an Atom Feed of the fantastic +[False Knees][falseknees] webcomic. + +Requirements +------------ + +You need to have [Lua][lua] (5.3 or later) and [LuaSocket][luasocket] installed. + +Usage +----- + +Executing `falseknees-atom.lua` will write an Atom Feed with all the comic +strips to stdout. When you are using a feed reader that can take it's input from +an external binary, you can just point it to the script. E.g. here is an entry +for [Newsboat][newsboat]'s `~/.newsboat/urls` that I use: + + exec:~/src/falseknees-atom/falseknees-atom.lua + +`~/src/falseknees-atom/` is where this repo lives on my machine. Adapt the path +to fit wherever you put it. + +Why? +---- + +I want to know when the latest comic arrives, but + does not seem to get updated any more :(. + + +[falseknees]: https://falseknees.com +[lua]: https://lua.org +[luasocket]: http://w3.impa.br/~diego/software/luasocket/home.html +[newsboat]: https://newsboat.org/ diff --git a/falseknees-atom.lua b/falseknees-atom.lua new file mode 100755 index 0000000..584617c --- /dev/null +++ b/falseknees-atom.lua @@ -0,0 +1,120 @@ +#!/usr/bin/lua + +local string = require "string" +local http = require "socket.http" + +local function must_get_page(url) + local data, status = http.request(url) + if status ~= 200 then + error("Failed getting '" .. url .. "': Got status " .. status .. "\n") + end + return data +end + +local function xmlesc(text) + text = string.gsub(text, "&", "&") + text = string.gsub(text, "<", "<") + text = string.gsub(text, ">", ">") + text = string.gsub(text, '"', """) + text = string.gsub(text, "'", "'") + return text +end + +local function unesc_html(s) + return string.gsub(s, '&([^;]+);', { + amp = "&", + lt = "<", + gt = ">", + quot = '"', + apos = "'", + }) +end + +local months = { + January = 1, + February = 2, + March = 3, + April = 4, + May = 5, + June = 6, + July = 7, + August = 8, + September = 9, + October = 10, + November = 11, + December = 12, +} + +-- Get the permanent URL of the current comic (index.html) +local function resolve_index() + local indexsrc = must_get_page("https://falseknees.com/index.html") + local url = string.match(indexsrc, '') + return unesc_html(url) +end + +local pagesrc = must_get_page("https://falseknees.com/archive.html") + +local maxdate +local entries = {} + +local PATTERN = '(%w+) (%d+)%w+, (%d+)%s*%-%s*([^<>]-)' +for url, monthname, day, year, title in string.gmatch(pagesrc, PATTERN) do + url = unesc_html(url) + monthname = unesc_html(monthname) + day = tonumber(unesc_html(day), 10) + year = tonumber(unesc_html(year), 10) + title = unesc_html(title) + + if url == "index.html" then + url = resolve_index() + end + + if not url then goto continue end + + url = "https://falseknees.com/" .. url + + local month = assert(months[monthname], "unknown month") + + -- We fon't know the time, lets assume midnight at UTC+0 + local date = string.format("%04d-%02d-%02dT00:00:00Z", year, month, day) + + if not maxdate or date > maxdate then + maxdate = date + end + + entries[#entries+1] = { + url = url, + title = title, + date = date + } + + ::continue:: +end + +if not maxdate then + error("No entry successfully parsed") +end + +-- A UUID URN with a UUID generated by `uuid -r`. +local FEED_ID = "urn:uuid:7e86b012-c226-41b3-945e-bab3341c65e9" + +print(string.format([[ + + False Knees + + %s + Joshua Barkman + %s]], xmlesc(FEED_ID), xmlesc(maxdate))) + +for _, entry in ipairs(entries) do + print(string.format([[ + + %s + + %s + %s + + ]], xmlesc(entry.title), xmlesc(entry.url), xmlesc(FEED_ID .. "#" .. entry.url), xmlesc(entry.date))) +end + +print("") -- cgit v1.2.3-54-g00ecf