From ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0 Mon Sep 17 00:00:00 2001 From: Adam Mathes Date: Thu, 23 Nov 2017 13:00:36 -0700 Subject: add basic feed discovery for new feeds --- models/feed/feed.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/models/feed/feed.go b/models/feed/feed.go index bea7ae1..0a043f2 100644 --- a/models/feed/feed.go +++ b/models/feed/feed.go @@ -2,7 +2,9 @@ package feed import ( "adammathes.com/neko/models" + "github.com/PuerkitoBio/goquery" "log" + "net/http" ) type Feed struct { @@ -13,6 +15,7 @@ type Feed struct { } func NewFeed(url string) error { + url = ResolveFeedURL(url) stmt, err := models.DB.Prepare("INSERT INTO feed(url) VALUES(?)") if err != nil { return err @@ -101,3 +104,56 @@ func (f *Feed) Create() error { return nil } + +// Given a string `url`, return to the best guess of the feed +func ResolveFeedURL(url string) string { + resp, err := http.Get(url) + if err != nil { + // handle errors better + return url + } + + // Check content-type header first + // if it's feed-ish, just use it + contentType := resp.Header["Content-Type"][0] + switch contentType { + + case "text/xml": + return url + case "text/rss+xml": + return url + case "application/rss+xml": + return url + case "application/atom+xml": + return url + } + + // goquery is probably overkill here + doc, err := goquery.NewDocument(url) + var f string + + // loop over each link element, return first one that is of type rss or atom + f = "" + doc.Find("link").Each(func(i int, s *goquery.Selection) { + + if f != "" { + // we're done + return + } + + t := s.AttrOr("type", "") + h := s.AttrOr("href", "") + if t == "application/atom+xml" { + f = h + } + if t == "application/rss+xml" { + f = h + } + }) + + // if we have nothing, just return the original url + if f == "" { + f = url + } + return f +} -- cgit v1.2.3