package feed import ( "log" "net/http" "strings" "time" "github.com/PuerkitoBio/goquery" "adammathes.com/neko/internal/safehttp" "adammathes.com/neko/models" ) type Feed struct { Id int64 `json:"_id" xml:"-"` Url string `json:"url" xml:"xmlUrl,attr"` WebUrl string `json:"web_url" xml:"htmlUrl,attr"` Title string `json:"title" xml:"text,attr"` Category string `json:"category"` // for OPML output purposes XMLName string `json:"-" xml:"outline"` Type string `json:"-" xml:"type,attr"` } type Category struct { Title string `json:"title"` } func NewFeed(url string) error { url = ResolveFeedURL(url) stmt, err := models.DB.Prepare("INSERT INTO feed(url) VALUES(?)") if err != nil { return err } _, err = stmt.Exec(url) if err != nil { return err } return nil } func All() ([]*Feed, error) { return filter(" ORDER BY lower(TITLE) asc") } func filter(where string, args ...interface{}) ([]*Feed, error) { // todo: add back in title rows, err := models.DB.Query(`SELECT id, url, web_url, title, category FROM feed `+where, args...) if err != nil { return nil, err } defer func() { _ = rows.Close() }() feeds := make([]*Feed, 0) for rows.Next() { f := new(Feed) err := rows.Scan(&f.Id, &f.Url, &f.WebUrl, &f.Title, &f.Category) f.Type = "rss" if err != nil { return nil, err } feeds = append(feeds, f) } if err = rows.Err(); err != nil { return nil, err } return feeds, nil } func (f *Feed) Update() { if len(f.Title) == 0 { return } if f.Id == 0 { return } if len(f.Url) == 0 { return } _, _ = models.DB.Exec(`UPDATE feed SET title=?, url=?, web_url=?, category=? WHERE id=?`, f.Title, f.Url, f.WebUrl, f.Category, f.Id) } func (f *Feed) Delete() { _, err := models.DB.Exec(`DELETE FROM feed WHERE id=?`, f.Id) if err != nil { log.Println(err) } } func (f *Feed) ByUrl(url string) error { err := models.DB.QueryRow(`SELECT id, url, title, category FROM feed WHERE url = ?`, url).Scan(&f.Id, &f.Url, &f.Title, &f.Category) if err != nil { return err } return nil } func (f *Feed) Create() error { res, err := models.DB.Exec(`INSERT INTO feed(url, title, category) VALUES(?, ?, ?)`, f.Url, f.Title, f.Category) if err != nil { return err } id, _ := res.LastInsertId() f.Id = id return nil } // Given a string `url`, return to the best guess of the feed func ResolveFeedURL(url string) string { c := safehttp.NewSafeClient(10 * time.Second) req, err := http.NewRequest("GET", url, nil) if err != nil { return url } req.Header.Set("User-Agent", "neko RSS Crawler +https://github.com/adammathes/neko") resp, err := c.Do(req) if err != nil { return url } defer func() { _ = resp.Body.Close() }() contentType := resp.Header.Get("Content-Type") if contentType == "" { return url } switch { case strings.HasPrefix(contentType, "text/xml"): return url case strings.HasPrefix(contentType, "text/rss+xml"): return url case strings.HasPrefix(contentType, "application/rss+xml"): return url case strings.HasPrefix(contentType, "application/atom+xml"): return url } // goquery is probably overkill here resp, err = c.Get(url) if err != nil { return url } defer func() { _ = resp.Body.Close() }() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return url } var f string // loop over each link element, return first one that is of type rss or atom f = "" doc.Find("link").Each(func(i int, s *goquery.Selection) { if f != "" { // we're done return } t := s.AttrOr("type", "") h := s.AttrOr("href", "") if t == "application/atom+xml" { f = h } if t == "application/rss+xml" { f = h } }) // if we have nothing, just return the original url if f == "" { f = url } // if we don't start with http[s] its probably relative if f[0] != 'h' { f = url + f } return f } func Categories() ([]*Category, error) { rows, err := models.DB.Query(`SELECT DISTINCT category FROM feed WHERE category!="" ORDER BY lower(category) ASC`) if err != nil { return nil, err } defer func() { _ = rows.Close() }() categories := make([]*Category, 0) for rows.Next() { c := new(Category) err := rows.Scan(&c.Title) if err != nil { return nil, err } categories = append(categories, c) } if err = rows.Err(); err != nil { return nil, err } return categories, nil }