aboutsummaryrefslogtreecommitdiffstats
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/crawler.out40
-rw-r--r--crawler/crawler_test.go45
-rw-r--r--crawler/integration_test.go67
3 files changed, 152 insertions, 0 deletions
diff --git a/crawler/crawler.out b/crawler/crawler.out
new file mode 100644
index 0000000..e859782
--- /dev/null
+++ b/crawler/crawler.out
@@ -0,0 +1,40 @@
+mode: set
+adammathes.com/neko/crawler/crawler.go:16.14,21.16 4 1
+adammathes.com/neko/crawler/crawler.go:21.16,23.3 1 0
+adammathes.com/neko/crawler/crawler.go:25.2,25.36 1 1
+adammathes.com/neko/crawler/crawler.go:25.36,28.3 2 1
+adammathes.com/neko/crawler/crawler.go:30.2,30.26 1 1
+adammathes.com/neko/crawler/crawler.go:30.26,33.3 2 1
+adammathes.com/neko/crawler/crawler.go:34.2,36.34 2 1
+adammathes.com/neko/crawler/crawler.go:36.34,38.3 1 1
+adammathes.com/neko/crawler/crawler.go:39.2,39.16 1 1
+adammathes.com/neko/crawler/crawler.go:42.66,44.23 1 1
+adammathes.com/neko/crawler/crawler.go:44.23,48.3 3 1
+adammathes.com/neko/crawler/crawler.go:54.44,66.16 3 1
+adammathes.com/neko/crawler/crawler.go:66.16,68.3 1 0
+adammathes.com/neko/crawler/crawler.go:70.2,74.16 4 1
+adammathes.com/neko/crawler/crawler.go:74.16,76.3 1 1
+adammathes.com/neko/crawler/crawler.go:78.2,78.17 1 1
+adammathes.com/neko/crawler/crawler.go:78.17,79.16 1 1
+adammathes.com/neko/crawler/crawler.go:79.16,81.17 2 1
+adammathes.com/neko/crawler/crawler.go:81.17,83.5 1 0
+adammathes.com/neko/crawler/crawler.go:87.2,87.53 1 1
+adammathes.com/neko/crawler/crawler.go:87.53,89.3 1 1
+adammathes.com/neko/crawler/crawler.go:91.2,92.16 2 1
+adammathes.com/neko/crawler/crawler.go:92.16,94.3 1 0
+adammathes.com/neko/crawler/crawler.go:95.2,95.26 1 1
+adammathes.com/neko/crawler/crawler.go:101.48,112.16 6 1
+adammathes.com/neko/crawler/crawler.go:112.16,116.3 3 1
+adammathes.com/neko/crawler/crawler.go:118.2,122.31 4 1
+adammathes.com/neko/crawler/crawler.go:122.31,129.45 6 1
+adammathes.com/neko/crawler/crawler.go:129.45,131.4 1 1
+adammathes.com/neko/crawler/crawler.go:135.3,137.9 3 1
+adammathes.com/neko/crawler/crawler.go:137.9,139.4 1 0
+adammathes.com/neko/crawler/crawler.go:140.3,140.43 1 1
+adammathes.com/neko/crawler/crawler.go:140.43,142.4 1 0
+adammathes.com/neko/crawler/crawler.go:144.3,144.31 1 1
+adammathes.com/neko/crawler/crawler.go:144.31,146.4 1 1
+adammathes.com/neko/crawler/crawler.go:146.9,148.4 1 1
+adammathes.com/neko/crawler/crawler.go:150.3,152.17 3 1
+adammathes.com/neko/crawler/crawler.go:152.17,154.4 1 0
+adammathes.com/neko/crawler/crawler.go:159.2,159.46 1 1
diff --git a/crawler/crawler_test.go b/crawler/crawler_test.go
index f0cff9a..e0c4c6b 100644
--- a/crawler/crawler_test.go
+++ b/crawler/crawler_test.go
@@ -1,8 +1,10 @@
package crawler
import (
+ "log"
"net/http"
"net/http/httptest"
+ "strings"
"testing"
"adammathes.com/neko/config"
@@ -231,3 +233,46 @@ func TestCrawl(t *testing.T) {
t.Errorf("Expected 1 item after crawl, got %d", count)
}
}
+
+func TestCrawlFeedWithExtensions(t *testing.T) {
+ setupTestDB(t)
+
+ rssContent := `<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
+ <channel>
+ <title>Extension Feed</title>
+ <item>
+ <title>Extension Article</title>
+ <link>https://example.com/ext</link>
+ <description>Short description</description>
+ <content:encoded><![CDATA[Much longer content that should be used as description]]></content:encoded>
+ </item>
+ </channel>
+</rss>`
+
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(200)
+ w.Write([]byte(rssContent))
+ }))
+ defer ts.Close()
+
+ f := &feed.Feed{Url: ts.URL, Title: "Extension Test"}
+ f.Create()
+
+ ch := make(chan string, 1)
+ CrawlFeed(f, ch)
+ <-ch
+
+ var itemTitle, itemDesc string
+ err := models.DB.QueryRow("SELECT title, description FROM item WHERE feed_id = ?", f.Id).Scan(&itemTitle, &itemDesc)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ if itemTitle != "Extension Article" {
+ t.Errorf("Expected title 'Extension Article', got %q", itemTitle)
+ }
+ if !strings.Contains(itemDesc, "Much longer content") {
+ t.Errorf("Expected description to contain encoded content, got %q", itemDesc)
+ }
+}
diff --git a/crawler/integration_test.go b/crawler/integration_test.go
new file mode 100644
index 0000000..633b60f
--- /dev/null
+++ b/crawler/integration_test.go
@@ -0,0 +1,67 @@
+package crawler
+
+import (
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "testing"
+
+ "adammathes.com/neko/models/feed"
+ "adammathes.com/neko/models/item"
+)
+
+func TestCrawlIntegration(t *testing.T) {
+ setupTestDB(t)
+
+ // Mock RSS feed server
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/rss+xml")
+ os.Stdout.Write([]byte("serving mock rss\n"))
+ fmt.Fprint(w, `<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0">
+<channel>
+ <title>Test Feed</title>
+ <link>http://example.com/</link>
+ <description>Test Description</description>
+ <item>
+ <title>Test Item 1</title>
+ <link>http://example.com/item1</link>
+ <description>Item 1 Description</description>
+ <pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate>
+ </item>
+</channel>
+</rss>`)
+ }))
+ defer ts.Close()
+
+ // Add the feed
+ f := &feed.Feed{Url: ts.URL}
+ err := f.Create()
+ if err != nil {
+ t.Fatalf("Failed to create feed: %v", err)
+ }
+
+ // Crawl
+ ch := make(chan string, 1)
+ CrawlFeed(f, ch)
+
+ res := <-ch
+ if res == "" {
+ t.Fatal("CrawlFeed returned empty result")
+ }
+
+ // Verify items were stored
+ items, err := item.Filter(0, f.Id, "", false, false, 0, "")
+ if err != nil {
+ t.Fatalf("Failed to filter items: %v", err)
+ }
+
+ if len(items) != 1 {
+ t.Fatalf("Expected 1 item, got %d", len(items))
+ }
+
+ if items[0].Title != "Test Item 1" {
+ t.Errorf("Expected 'Test Item 1', got %q", items[0].Title)
+ }
+}