From ba57efcadd5b994e0bd0be54a27e927852cbc940 Mon Sep 17 00:00:00 2001 From: Kevin Chabowski Date: Sun, 12 Oct 2014 21:36:53 +0200 Subject: initial commit --- LICENSE | 5 ++ README.md | 18 ++++++ main.go | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 210 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 main.go diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d15d509 --- /dev/null +++ b/LICENSE @@ -0,0 +1,5 @@ + DO WHATEVER THE FUCK YOU WANT, PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHATEVER THE FUCK YOU WANT. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3d8c8d --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +twitter-to-atom +=============== + +Generate an Atom-Feed from a Twitter account. + +Installation / Building +----------------------- + +You need to have the Go programming language installed. After that, installing is as simple as + + go get github.com/kch42/twitter-to-atom + +Usage +----- + + twitter-to-atom user + +This generates an Atom feed on stdout with the recent posts and retweets of the Twitter user `@user`. diff --git a/main.go b/main.go new file mode 100644 index 0000000..d41be58 --- /dev/null +++ b/main.go @@ -0,0 +1,187 @@ +package main + +import ( + "code.google.com/p/go-html-transform/h5" + "code.google.com/p/go-html-transform/html/transform" + "code.google.com/p/go.net/html" + "code.google.com/p/go.tools/blog/atom" + "encoding/xml" + "fmt" + "io" + "net/http" + "os" + "strconv" + "time" +) + +func getattr(attrs []html.Attribute, name string) (val string, found bool) { + for _, a := range attrs { + if a.Key == name { + val = a.Val + found = true + return + } + } + + return +} + +func Textify(node *html.Node) string { + switch node.Type { + case html.TextNode: + return node.Data + case html.ElementNode: + for _, att := range node.Attr { + if att.Key == "alt" { + return att.Val + } + } + + fallthrough + case html.DocumentNode: + text := "" + for n := node.FirstChild; n != nil; n = n.NextSibling { + text += Textify(n) + } + return text + default: + return "" + } +} + +type Tweet struct { + Content string + From string + ID string + Date time.Time +} + +func ScrapeTweets(r io.Reader) ([]Tweet, error) { + t, err := transform.NewFromReader(r) + if err != nil { + return nil, fmt.Errorf("Could not scrape profile: %s", err) + } + + tweets := make([]Tweet, 0) + + t.Apply(func(node *html.Node) { + var tweet Tweet + + tweet.From, _ = getattr(node.Attr, "data-screen-name") + tweet.ID, _ = getattr(node.Attr, "data-item-id") + + time_ok := false + tree := h5.NewTree(node) + t2 := transform.New(&tree) + t2.Apply(func(node *html.Node) { + if ts, ok := getattr(node.Attr, "data-time"); ok { + if ts_int, err := strconv.ParseInt(ts, 10, 64); err == nil { + tweet.Date = time.Unix(ts_int, 0) + time_ok = true + } + } + }, "a.ProfileTweet-timestamp span") + if !time_ok { + return + } + + t2.Apply(func(node *html.Node) { + tweet.Content = Textify(node) + }, ".ProfileTweet-text") + + tweets = append(tweets, tweet) + + }, "div.GridTimeline .ProfileTweet") + + return tweets, nil +} + +const titlelimit = 80 + +func (t Tweet) Atomify() *atom.Entry { + entry := new(atom.Entry) + + entry.Title = "@" + t.From + ": " + t.Content + if len(entry.Title) > titlelimit { + entry.Title = string([]rune(entry.Title)[:titlelimit-2]) + " …" + } + + url := "https://twitter.com/" + t.From + "/status/" + t.ID + entry.ID = url + entry.Link = []atom.Link{atom.Link{ + Rel: "alternate", + Href: url, + }} + entry.Summary = &atom.Text{Type: "text", Body: t.Content} + entry.Content = &atom.Text{Type: "text", Body: t.Content} + entry.Author = &atom.Person{ + Name: "@" + t.From, + URI: "https://twitter.com/" + t.From, + } + entry.Published = atom.Time(t.Date) + entry.Updated = atom.Time(t.Date) + + return entry +} + +func main() { + os.Exit(Main()) +} + +func Main() int { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "Need one argument (twitter user name, without the '@')") + return 1 + } + + user := os.Args[1] + + resp, err := http.Get("https://twitter.com/" + user) + if err != nil { + fmt.Fprintf(os.Stderr, "Couldn't download @%s's stream: %s\n", user, err) + return 1 + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Fprintf(os.Stderr, "Couldn't download @%s's stream: HTTP Status %d %s\n", user, resp.StatusCode, resp.Status) + return 1 + } + + tweets, err := ScrapeTweets(resp.Body) + if err != nil { + fmt.Fprintln(os.Stderr, err) + } + + feed := atom.Feed{ + Title: "Tweets from @" + user, + ID: "https://twitter.com/" + user, + Link: []atom.Link{ + atom.Link{ + Rel: "alternate", + Href: "http://twitter.com/" + user, + }, + }, + Author: &atom.Person{ + Name: "@" + user, + URI: "https://twitter.com/" + user, + }, + } + + var latest time.Time + for _, tweet := range tweets { + feed.Entry = append(feed.Entry, tweet.Atomify()) + if tweet.Date.After(latest) { + latest = tweet.Date + } + } + + feed.Updated = atom.Time(latest) + + enc := xml.NewEncoder(os.Stdout) + if err := enc.Encode(feed); err != nil { + fmt.Fprintf(os.Stderr, "Could not encode feed: %s\n", err) + return 1 + } + return 0 +} -- cgit v1.2.3-54-g00ecf