diff options
author | Adam Mathes <adam@trenchant.org> | 2017-11-23 13:00:36 -0700 |
---|---|---|
committer | Adam Mathes <adam@trenchant.org> | 2017-11-23 13:00:36 -0700 |
commit | ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0 (patch) | |
tree | 84a9c4893cc72b6197a117d9f8fb4339f7b8deb6 | |
parent | 7227865519ffb9f860e9a6de4b96ffc4eb291ff8 (diff) | |
download | neko-ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0.tar.gz neko-ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0.tar.bz2 neko-ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0.zip |
add basic feed discovery for new feeds
-rw-r--r-- | models/feed/feed.go | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/models/feed/feed.go b/models/feed/feed.go index bea7ae1..0a043f2 100644 --- a/models/feed/feed.go +++ b/models/feed/feed.go @@ -2,7 +2,9 @@ package feed import ( "adammathes.com/neko/models" + "github.com/PuerkitoBio/goquery" "log" + "net/http" ) type Feed struct { @@ -13,6 +15,7 @@ type Feed struct { } func NewFeed(url string) error { + url = ResolveFeedURL(url) stmt, err := models.DB.Prepare("INSERT INTO feed(url) VALUES(?)") if err != nil { return err @@ -101,3 +104,56 @@ func (f *Feed) Create() error { return nil } + +// Given a string `url`, return to the best guess of the feed +func ResolveFeedURL(url string) string { + resp, err := http.Get(url) + if err != nil { + // handle errors better + return url + } + + // Check content-type header first + // if it's feed-ish, just use it + contentType := resp.Header["Content-Type"][0] + switch contentType { + + case "text/xml": + return url + case "text/rss+xml": + return url + case "application/rss+xml": + return url + case "application/atom+xml": + return url + } + + // goquery is probably overkill here + doc, err := goquery.NewDocument(url) + var f string + + // loop over each link element, return first one that is of type rss or atom + f = "" + doc.Find("link").Each(func(i int, s *goquery.Selection) { + + if f != "" { + // we're done + return + } + + t := s.AttrOr("type", "") + h := s.AttrOr("href", "") + if t == "application/atom+xml" { + f = h + } + if t == "application/rss+xml" { + f = h + } + }) + + // if we have nothing, just return the original url + if f == "" { + f = url + } + return f +} |