initial commit

author: Kevin Chabowski <kevin@kch42.de> 2014-10-12 21:36:53 +0200
committer: Kevin Chabowski <kevin@kch42.de> 2014-10-12 21:36:53 +0200
commit: ba57efcadd5b994e0bd0be54a27e927852cbc940 (patch)
tree: ca9be8f1c4b846849818607e21d4e7da01b5efa1
download: twitter-to-atom-ba57efcadd5b994e0bd0be54a27e927852cbc940.tar.gz
twitter-to-atom-ba57efcadd5b994e0bd0be54a27e927852cbc940.tar.bz2
twitter-to-atom-ba57efcadd5b994e0bd0be54a27e927852cbc940.zip
3 files changed, 210 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d15d509
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,5 @@
+           DO WHATEVER THE FUCK YOU WANT, PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+            0. You just DO WHATEVER THE FUCK YOU WANT.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a3d8c8d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+twitter-to-atom
+===============
+
+Generate an Atom-Feed from a Twitter account.
+
+Installation / Building
+-----------------------
+
+You need to have the Go programming language installed. After that, installing is as simple as
+
+	go get github.com/kch42/twitter-to-atom
+
+Usage
+-----
+
+	twitter-to-atom user
+
+This generates an Atom feed on stdout with the recent posts and retweets of the Twitter user `@user`.
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..d41be58
--- /dev/null
+++ b/main.go
@@ -0,0 +1,187 @@
+package main
+
+import (
+	"code.google.com/p/go-html-transform/h5"
+	"code.google.com/p/go-html-transform/html/transform"
+	"code.google.com/p/go.net/html"
+	"code.google.com/p/go.tools/blog/atom"
+	"encoding/xml"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strconv"
+	"time"
+)
+
+func getattr(attrs []html.Attribute, name string) (val string, found bool) {
+	for _, a := range attrs {
+		if a.Key == name {
+			val = a.Val
+			found = true
+			return
+		}
+	}
+
+	return
+}
+
+func Textify(node *html.Node) string {
+	switch node.Type {
+	case html.TextNode:
+		return node.Data
+	case html.ElementNode:
+		for _, att := range node.Attr {
+			if att.Key == "alt" {
+				return att.Val
+			}
+		}
+
+		fallthrough
+	case html.DocumentNode:
+		text := ""
+		for n := node.FirstChild; n != nil; n = n.NextSibling {
+			text += Textify(n)
+		}
+		return text
+	default:
+		return ""
+	}
+}
+
+type Tweet struct {
+	Content string
+	From    string
+	ID      string
+	Date    time.Time
+}
+
+func ScrapeTweets(r io.Reader) ([]Tweet, error) {
+	t, err := transform.NewFromReader(r)
+	if err != nil {
+		return nil, fmt.Errorf("Could not scrape profile: %s", err)
+	}
+
+	tweets := make([]Tweet, 0)
+
+	t.Apply(func(node *html.Node) {
+		var tweet Tweet
+
+		tweet.From, _ = getattr(node.Attr, "data-screen-name")
+		tweet.ID, _ = getattr(node.Attr, "data-item-id")
+
+		time_ok := false
+		tree := h5.NewTree(node)
+		t2 := transform.New(&tree)
+		t2.Apply(func(node *html.Node) {
+			if ts, ok := getattr(node.Attr, "data-time"); ok {
+				if ts_int, err := strconv.ParseInt(ts, 10, 64); err == nil {
+					tweet.Date = time.Unix(ts_int, 0)
+					time_ok = true
+				}
+			}
+		}, "a.ProfileTweet-timestamp span")
+		if !time_ok {
+			return
+		}
+
+		t2.Apply(func(node *html.Node) {
+			tweet.Content = Textify(node)
+		}, ".ProfileTweet-text")
+
+		tweets = append(tweets, tweet)
+
+	}, "div.GridTimeline .ProfileTweet")
+
+	return tweets, nil
+}
+
+const titlelimit = 80
+
+func (t Tweet) Atomify() *atom.Entry {
+	entry := new(atom.Entry)
+
+	entry.Title = "@" + t.From + ": " + t.Content
+	if len(entry.Title) > titlelimit {
+		entry.Title = string([]rune(entry.Title)[:titlelimit-2]) + " …"
+	}
+
+	url := "https://twitter.com/" + t.From + "/status/" + t.ID
+	entry.ID = url
+	entry.Link = []atom.Link{atom.Link{
+		Rel:  "alternate",
+		Href: url,
+	}}
+	entry.Summary = &atom.Text{Type: "text", Body: t.Content}
+	entry.Content = &atom.Text{Type: "text", Body: t.Content}
+	entry.Author = &atom.Person{
+		Name: "@" + t.From,
+		URI:  "https://twitter.com/" + t.From,
+	}
+	entry.Published = atom.Time(t.Date)
+	entry.Updated = atom.Time(t.Date)
+
+	return entry
+}
+
+func main() {
+	os.Exit(Main())
+}
+
+func Main() int {
+	if len(os.Args) < 2 {
+		fmt.Fprintln(os.Stderr, "Need one argument (twitter user name, without the '@')")
+		return 1
+	}
+
+	user := os.Args[1]
+
+	resp, err := http.Get("https://twitter.com/" + user)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Couldn't download @%s's stream: %s\n", user, err)
+		return 1
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		fmt.Fprintf(os.Stderr, "Couldn't download @%s's stream: HTTP Status %d %s\n", user, resp.StatusCode, resp.Status)
+		return 1
+	}
+
+	tweets, err := ScrapeTweets(resp.Body)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+	}
+
+	feed := atom.Feed{
+		Title: "Tweets from @" + user,
+		ID:    "https://twitter.com/" + user,
+		Link: []atom.Link{
+			atom.Link{
+				Rel:  "alternate",
+				Href: "http://twitter.com/" + user,
+			},
+		},
+		Author: &atom.Person{
+			Name: "@" + user,
+			URI:  "https://twitter.com/" + user,
+		},
+	}
+
+	var latest time.Time
+	for _, tweet := range tweets {
+		feed.Entry = append(feed.Entry, tweet.Atomify())
+		if tweet.Date.After(latest) {
+			latest = tweet.Date
+		}
+	}
+
+	feed.Updated = atom.Time(latest)
+
+	enc := xml.NewEncoder(os.Stdout)
+	if err := enc.Encode(feed); err != nil {
+		fmt.Fprintf(os.Stderr, "Could not encode feed: %s\n", err)
+		return 1
+	}
+	return 0
+}
author	Kevin Chabowski <kevin@kch42.de>	2014-10-12 21:36:53 +0200
committer	Kevin Chabowski <kevin@kch42.de>	2014-10-12 21:36:53 +0200
commit	ba57efcadd5b994e0bd0be54a27e927852cbc940 (patch)
tree	ca9be8f1c4b846849818607e21d4e7da01b5efa1
download	twitter-to-atom-ba57efcadd5b994e0bd0be54a27e927852cbc940.tar.gz twitter-to-atom-ba57efcadd5b994e0bd0be54a27e927852cbc940.tar.bz2 twitter-to-atom-ba57efcadd5b994e0bd0be54a27e927852cbc940.zip