package item import ( "adammathes.com/neko/config" "adammathes.com/neko/models" "adammathes.com/neko/vlog" "encoding/base64" "fmt" "github.com/PuerkitoBio/goquery" "github.com/advancedlogic/GoOse" "github.com/microcosm-cc/bluemonday" "github.com/russross/blackfriday" "strings" ) type Item struct { Id int64 `json:"_id,string,omitempty"` Title string `json:"title"` Url string `json:"url"` Description string `json:"description"` PublishDate string `json:"publish_date"` FeedId int64 FeedTitle string `json:"feed_title"` FeedUrl string `json:"feed_url"` FeedCategory string `json:"feed_category"` ReadState bool `json:"read"` Starred bool `json:"starred"` FullContent string `json:"full_content"` HeaderImage string `json:"header_image"` } func (i *Item) Print() { fmt.Printf("id: %d\n", i.Id) fmt.Printf("title: %s\n", i.Title) fmt.Printf("ReadState: %d\n", i.ReadState) } func (i *Item) Create() error { res, err := models.DB.Exec(`INSERT INTO item(title, url, description, publish_date, feed_id) VALUES(?, ?, ?, ?, ?)`, i.Title, i.Url, i.Description, i.PublishDate, i.FeedId) if err != nil { vlog.Printf("Error on item.Create\n%v\n%v\n", i.Url, err) return err } id, _ := res.LastInsertId() i.Id = id return nil } func (i *Item) Save() { _, err := models.DB.Exec(`UPDATE item SET read_state=?, starred=? WHERE id=?`, i.ReadState, i.Starred, i.Id) if err != nil { vlog.Printf("Error on item.Save\n%v\n%v\n", i, err) } } func (i *Item) FullSave() { _, err := models.DB.Exec(`UPDATE item SET title=?, url=?, description=?, feed_id=? WHERE id=?`, i.Title, i.Url, i.Description, i.FeedId, i.Id) if err != nil { vlog.Printf("Error on item.fullSave\n%v\n%v\n", i, err) } } func filterPolicy() *bluemonday.Policy { p := bluemonday.NewPolicy() p.AllowElements("ul", "ol", "li", "blockquote", "a", "img", "p", "h1", "h2", "h3", "h4", "b", "i", "em", "strong", "pre", "code") p.AllowAttrs("href").OnElements("a") p.AllowAttrs("src", "alt").OnElements("img") return p } func ItemById(id int64) *Item { items, _ := Filter(0, 0, "", false, false, id, "") return items[0] } func (i *Item) GetFullContent() { fmt.Printf("fetching from %s\n", i.Url) g := goose.New() article, err := g.ExtractFromURL(i.Url) if err != nil { vlog.Println(err) return } if article.TopNode == nil { return } var md, img string md = "" img = "" md = string(blackfriday.Run([]byte(article.CleanedText))) ht, err := article.TopNode.Html() if err != nil { vlog.Println(err) return } p := filterPolicy() i.FullContent = p.Sanitize(ht) i.HeaderImage = article.TopImage _, err = models.DB.Exec(`UPDATE item SET full_content=?, header_image=? WHERE id=?`, md, img, i.Id) if err != nil { vlog.Println(err) } } func Filter(max_id int64, feed_id int64, category string, unread_only bool, starred_only bool, item_id int64, search_query string) ([]*Item, error) { var args []interface{} tables := " feed,item" if search_query != "" { tables = tables + ",fts_item" } query := `SELECT item.id, item.feed_id, item.title, item.url, item.description, item.read_state, item.starred, item.publish_date, item.full_content, item.header_image, feed.url, feed.title, feed.category FROM ` query = query + tables + ` WHERE item.feed_id=feed.id AND item.id!=0 ` if max_id != 0 { query = query + "AND item.id < ? " args = append(args, max_id) } if feed_id != 0 { query = query + " AND feed.id=? " args = append(args, feed_id) } if category != "" { query = query + " AND feed.category=? " args = append(args, category) } if unread_only { query = query + " AND item.read_state=0 " } if item_id != 0 { query = query + " AND item.id=? " args = append(args, item_id) } if search_query != "" { query = query + " AND fts_item match ? AND fts_item.rowid=item.id " args = append(args, search_query) } // this is kind of dumb, but to keep the logic the same // we kludge it this way for a "by id" select if starred_only { query = query + " AND item.starred=1 " } query = query + "ORDER BY item.id DESC LIMIT 15" // vlog.Println(query) // vlog.Println(args...) rows, err := models.DB.Query(query, args...) if err != nil { vlog.Println(err) return nil, err } defer rows.Close() p := filterPolicy() items := make([]*Item, 0) for rows.Next() { i := new(Item) var feed_id int64 err := rows.Scan(&i.Id, &feed_id, &i.Title, &i.Url, &i.Description, &i.ReadState, &i.Starred, &i.PublishDate, &i.FullContent, &i.HeaderImage, &i.FeedUrl, &i.FeedTitle, &i.FeedCategory) if err != nil { vlog.Println(err) return nil, err } // sanitize all fields from external input // should do this at ingest time, probably, for efficiency // but still may need to adjust rules i.Title = p.Sanitize(i.Title) i.Description = p.Sanitize(i.Description) if config.Config.ProxyImages { i.Description = rewriteImages(i.Description) } i.Url = p.Sanitize(i.Url) i.FeedTitle = p.Sanitize(i.FeedTitle) i.FeedUrl = p.Sanitize(i.FeedUrl) i.FullContent = p.Sanitize(i.FullContent) i.HeaderImage = p.Sanitize(i.HeaderImage) i.CleanHeaderImage() items = append(items, i) } if err = rows.Err(); err != nil { return nil, err } return items, nil } func (i *Item) CleanHeaderImage() { // TODO: blacklist of bad imgs if i.HeaderImage == "https://s0.wp.com/i/blank.jpg" { i.HeaderImage = "" } } // rewrite images to use local proxy func rewriteImages(s string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(s)) if err != nil { vlog.Println(err) return s } doc.Find("img").Each(func(i int, img *goquery.Selection) { if src, ok := img.Attr("src"); ok { img.SetAttr("src", proxyURL(src)) } }) output, _ := doc.Html() return output } func proxyURL(url string) string { return "/image/" + base64.URLEncoding.EncodeToString([]byte(url)) }