aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Mathes <adam@trenchant.org>2017-11-23 13:00:36 -0700
committerAdam Mathes <adam@trenchant.org>2017-11-23 13:00:36 -0700
commitee46ed7c3c3da4f3630ffc430527c0a9e54abeb0 (patch)
tree84a9c4893cc72b6197a117d9f8fb4339f7b8deb6
parent7227865519ffb9f860e9a6de4b96ffc4eb291ff8 (diff)
downloadneko-ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0.tar.gz
neko-ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0.tar.bz2
neko-ee46ed7c3c3da4f3630ffc430527c0a9e54abeb0.zip
add basic feed discovery for new feeds
-rw-r--r--models/feed/feed.go56
1 files changed, 56 insertions, 0 deletions
diff --git a/models/feed/feed.go b/models/feed/feed.go
index bea7ae1..0a043f2 100644
--- a/models/feed/feed.go
+++ b/models/feed/feed.go
@@ -2,7 +2,9 @@ package feed
import (
"adammathes.com/neko/models"
+ "github.com/PuerkitoBio/goquery"
"log"
+ "net/http"
)
type Feed struct {
@@ -13,6 +15,7 @@ type Feed struct {
}
func NewFeed(url string) error {
+ url = ResolveFeedURL(url)
stmt, err := models.DB.Prepare("INSERT INTO feed(url) VALUES(?)")
if err != nil {
return err
@@ -101,3 +104,56 @@ func (f *Feed) Create() error {
return nil
}
+
+// Given a string `url`, return to the best guess of the feed
+func ResolveFeedURL(url string) string {
+ resp, err := http.Get(url)
+ if err != nil {
+ // handle errors better
+ return url
+ }
+
+ // Check content-type header first
+ // if it's feed-ish, just use it
+ contentType := resp.Header["Content-Type"][0]
+ switch contentType {
+
+ case "text/xml":
+ return url
+ case "text/rss+xml":
+ return url
+ case "application/rss+xml":
+ return url
+ case "application/atom+xml":
+ return url
+ }
+
+ // goquery is probably overkill here
+ doc, err := goquery.NewDocument(url)
+ var f string
+
+ // loop over each link element, return first one that is of type rss or atom
+ f = ""
+ doc.Find("link").Each(func(i int, s *goquery.Selection) {
+
+ if f != "" {
+ // we're done
+ return
+ }
+
+ t := s.AttrOr("type", "")
+ h := s.AttrOr("href", "")
+ if t == "application/atom+xml" {
+ f = h
+ }
+ if t == "application/rss+xml" {
+ f = h
+ }
+ })
+
+ // if we have nothing, just return the original url
+ if f == "" {
+ f = url
+ }
+ return f
+}