From 741cd94da338e00e2eee0e12d572e04c3be46530 Mon Sep 17 00:00:00 2001
From: Adam Mathes 
Date: Sun, 19 Nov 2017 21:23:04 -0700
Subject: enable full_text and header_image
---
 crawler/crawler.go  | 17 +++++++++--------
 init.sql            |  1 +
 models/item/item.go | 55 +++++++++++++++++++++++++++++++++++++++--------------
 static/ui.html      |  7 ++++++-
 4 files changed, 57 insertions(+), 23 deletions(-)
diff --git a/crawler/crawler.go b/crawler/crawler.go
index ea9f694..e84e219 100644
--- a/crawler/crawler.go
+++ b/crawler/crawler.go
@@ -1,21 +1,20 @@
 package crawler
 
 import (
-	"log"
 	"adammathes.com/neko/models/feed"
 	"adammathes.com/neko/models/item"
+	"adammathes.com/neko/vlog"
+	"github.com/mmcdole/gofeed"
+	"log"
 	"net/http"
 	"time"
-    "github.com/mmcdole/gofeed"
-	"adammathes.com/neko/vlog"
 )
 
-
 func Crawl() {
 
 	ch := make(chan string)
 
-	feeds,err := feed.All()
+	feeds, err := feed.All()
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -64,7 +63,7 @@ func CrawlFeed(f *feed.Feed, ch chan<- string) {
 
 		// a lot of RSS2.0 generated by wordpress and others
 		// uses 
-		e,ok := i.Extensions["content"]["encoded"]
+		e, ok := i.Extensions["content"]["encoded"]
 		var encoded = ""
 		if ok {
 			encoded = e[0].Value
@@ -73,16 +72,18 @@ func CrawlFeed(f *feed.Feed, ch chan<- string) {
 			item.Description = encoded
 		}
 
-		if(i.PublishedParsed != nil) {
+		if i.PublishedParsed != nil {
 			item.PublishDate = i.PublishedParsed.Format("2006-01-02 15:04:05")
 		} else {
 			item.PublishDate = time.Now().Format("2006-01-02 15:04:05")
 		}
-		
+
 		item.FeedId = f.Id
 		err := item.Create()
 		if err != nil {
 			vlog.Println(err)
+		} else {
+			item.GetFullContent()
 		}
 	}
 	ch <- "successfully crawled " + f.Url + "\n"
diff --git a/init.sql b/init.sql
index d69d5e9..d2c0de1 100644
--- a/init.sql
+++ b/init.sql
@@ -17,6 +17,7 @@ CREATE TABLE item (
   title TEXT,
   url VARCHAR(255) NOT NULL,
   description TEXT,
+  full_content TEXT,
   publish_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
   read_state BOOLEAN DEFAULT FALSE NOT NULL,
   starred BOOLEAN DEFAULT FALSE NOT NULL,
diff --git a/models/item/item.go b/models/item/item.go
index 5a4c274..48d76bf 100644
--- a/models/item/item.go
+++ b/models/item/item.go
@@ -1,26 +1,32 @@
 package item
 
 import (
-	"fmt"
-	"log"
 	"adammathes.com/neko/models"
+	"fmt"
+	"github.com/advancedlogic/GoOse"
 	"github.com/microcosm-cc/bluemonday"
+	"github.com/russross/blackfriday"
+	"log"
 )
 
 type Item struct {
-	Id          int64  `json:"_id,string,omitempty"`
+	Id int64 `json:"_id,string,omitempty"`
+
+	Title string `json:"title"`
+	Url   string `json:"url"`
 
-	Title       string `json:"title"`
-	Url         string `json:"url"`
 	Description string `json:"description"`
 	PublishDate string `json:"publish_date"`
 
-	FeedId      int64
-	FeedTitle   string `json:"feed_title"`
-	FeedUrl     string `json:"feed_url"`
+	FeedId    int64
+	FeedTitle string `json:"feed_title"`
+	FeedUrl   string `json:"feed_url"`
+
+	ReadState bool `json:"read"`
+	Starred   bool `json:"starred"`
 
-	ReadState   bool   `json:"read"`
-	Starred     bool   `json:"starred"`
+	FullContent string `json:"full_content"`
+	HeaderImage string `json:"header_image"`
 }
 
 func (i *Item) Print() {
@@ -61,12 +67,34 @@ func (i *Item) FullSave() {
 	}
 }
 
+func (i *Item) GetFullContent() {
+	g := goose.New()
+	article, err := g.ExtractFromURL(i.Url)
+	var md, img string
+	md = ""
+	img = ""
+	if err != nil {
+		log.Println(err)
+	} else {
+		md = string(blackfriday.MarkdownCommon([]byte(article.CleanedText)))
+		img = article.TopImage
+	}
+
+	_, err = models.DB.Exec(`UPDATE item
+                              SET full_content=?, header_image=?
+                              WHERE id=?`, md, img, i.Id)
+	if err != nil {
+		log.Println(err)
+	}
+}
+
 func Filter(max_id int64, feed_id int64, unread_only bool, starred_only bool) ([]*Item, error) {
 
 	var args []interface{}
 
 	query := `SELECT item.id, item.title, item.url, item.description, 
                      item.read_state, item.starred, item.publish_date,
+                     item.full_content, item.header_image,
                      feed.url, feed.title
               FROM item,feed 
               WHERE item.feed_id=feed.id  `
@@ -89,7 +117,6 @@ func Filter(max_id int64, feed_id int64, unread_only bool, starred_only bool) ([
 		query = query + " AND item.starred=1 "
 	}
 
-	
 	query = query + "ORDER BY item.id DESC LIMIT 15"
 	// log.Println(query)
 	// log.Println(args...)
@@ -106,16 +133,15 @@ func Filter(max_id int64, feed_id int64, unread_only bool, starred_only bool) ([
 	p.AllowAttrs("href").OnElements("a")
 	p.AllowAttrs("src", "alt").OnElements("img")
 
-	
 	items := make([]*Item, 0)
 	for rows.Next() {
 		i := new(Item)
-		err := rows.Scan(&i.Id, &i.Title, &i.Url, &i.Description, &i.ReadState, &i.Starred, &i.PublishDate, &i.FeedUrl, &i.FeedTitle)
+		err := rows.Scan(&i.Id, &i.Title, &i.Url, &i.Description, &i.ReadState, &i.Starred, &i.PublishDate, &i.FullContent, &i.HeaderImage, &i.FeedUrl, &i.FeedTitle)
 		if err != nil {
 			log.Println(err)
 			return nil, err
 		}
-		
+
 		// sanitize all fields from external input
 		// should do this at ingest time, probably, for efficiency
 		// but still may need to adjust rules
@@ -124,6 +150,7 @@ func Filter(max_id int64, feed_id int64, unread_only bool, starred_only bool) ([
 		i.Url = p.Sanitize(i.Url)
 		i.FeedTitle = p.Sanitize(i.FeedTitle)
 		i.FeedUrl = p.Sanitize(i.FeedUrl)
+		i.FullContent = p.Sanitize(i.FullContent)
 		items = append(items, i)
 	}
 	if err = rows.Err(); err != nil {
diff --git a/static/ui.html b/static/ui.html
index 7b1d08c..b977dad 100644
--- a/static/ui.html
+++ b/static/ui.html
@@ -39,7 +39,12 @@
         ${ item.publish_date } from ${item.feed_title}
       
       {{html item.description}}
      
-    
+
+      full content
+      
+      {{html item.full_content}}
      
+
+