aboutsummaryrefslogtreecommitdiffstats
path: root/crawler
diff options
context:
space:
mode:
authorAdam Mathes <adam@trenchant.org>2017-01-23 20:04:03 -0800
committerAdam Mathes <adam@trenchant.org>2017-01-23 20:04:03 -0800
commit93d6d36eb697cd9452eb4aab446151a1a33ed245 (patch)
treec9cb88718d03a6964f6d3705066f11d356257d37 /crawler
downloadneko-93d6d36eb697cd9452eb4aab446151a1a33ed245.tar.gz
neko-93d6d36eb697cd9452eb4aab446151a1a33ed245.tar.bz2
neko-93d6d36eb697cd9452eb4aab446151a1a33ed245.zip
neko v2 initial commit
Diffstat (limited to 'crawler')
-rw-r--r--crawler/crawler.go63
1 files changed, 63 insertions, 0 deletions
diff --git a/crawler/crawler.go b/crawler/crawler.go
new file mode 100644
index 0000000..e3e4aeb
--- /dev/null
+++ b/crawler/crawler.go
@@ -0,0 +1,63 @@
+package crawler
+
+import (
+ "log"
+ "neko/models/feed"
+ "neko/models/item"
+ "net/http"
+ "time"
+ "github.com/SlyMarbo/rss"
+)
+
+
+func Crawl() {
+
+ ch := make(chan string)
+
+ feeds,err := feed.All()
+ if err != nil {
+ log.Fatal(err)
+ }
+ for _, f := range feeds {
+ log.Printf("crawling %s", f.Url)
+ go CrawlFeed(f, ch)
+ }
+
+ for i := 0; i < len(feeds); i++ {
+ log.Println(<-ch)
+ }
+}
+
+/*
+ TODO: sanitize input on crawl
+*/
+func CrawlFeed(f *feed.Feed, ch chan<- string) {
+ c := &http.Client{
+ // give up after 5 seconds
+ Timeout: 5 * time.Second,
+ }
+
+ feed, err := rss.FetchByClient(f.Url, c)
+ if err != nil {
+ log.Print(err)
+ ch <- "failed to fetch and parse for " + f.Url
+ return
+ }
+
+ f.Title = feed.Title
+ f.Update()
+
+ for _, i := range feed.Items {
+ log.Printf("storing item: %s", i.Title)
+ var item item.Item
+ item.Title = i.Title
+ item.Url = i.Link
+ item.Description = i.Content
+ if item.Description == "" {
+ item.Description = i.Summary
+ }
+ item.FeedId = f.Id
+ item.Create()
+ }
+ ch <- "successfully crawled " + f.Url
+}