diff options
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/crawler.out | 40 | ||||
| -rw-r--r-- | crawler/crawler_test.go | 45 | ||||
| -rw-r--r-- | crawler/integration_test.go | 67 |
3 files changed, 152 insertions, 0 deletions
diff --git a/crawler/crawler.out b/crawler/crawler.out new file mode 100644 index 0000000..e859782 --- /dev/null +++ b/crawler/crawler.out @@ -0,0 +1,40 @@ +mode: set +adammathes.com/neko/crawler/crawler.go:16.14,21.16 4 1 +adammathes.com/neko/crawler/crawler.go:21.16,23.3 1 0 +adammathes.com/neko/crawler/crawler.go:25.2,25.36 1 1 +adammathes.com/neko/crawler/crawler.go:25.36,28.3 2 1 +adammathes.com/neko/crawler/crawler.go:30.2,30.26 1 1 +adammathes.com/neko/crawler/crawler.go:30.26,33.3 2 1 +adammathes.com/neko/crawler/crawler.go:34.2,36.34 2 1 +adammathes.com/neko/crawler/crawler.go:36.34,38.3 1 1 +adammathes.com/neko/crawler/crawler.go:39.2,39.16 1 1 +adammathes.com/neko/crawler/crawler.go:42.66,44.23 1 1 +adammathes.com/neko/crawler/crawler.go:44.23,48.3 3 1 +adammathes.com/neko/crawler/crawler.go:54.44,66.16 3 1 +adammathes.com/neko/crawler/crawler.go:66.16,68.3 1 0 +adammathes.com/neko/crawler/crawler.go:70.2,74.16 4 1 +adammathes.com/neko/crawler/crawler.go:74.16,76.3 1 1 +adammathes.com/neko/crawler/crawler.go:78.2,78.17 1 1 +adammathes.com/neko/crawler/crawler.go:78.17,79.16 1 1 +adammathes.com/neko/crawler/crawler.go:79.16,81.17 2 1 +adammathes.com/neko/crawler/crawler.go:81.17,83.5 1 0 +adammathes.com/neko/crawler/crawler.go:87.2,87.53 1 1 +adammathes.com/neko/crawler/crawler.go:87.53,89.3 1 1 +adammathes.com/neko/crawler/crawler.go:91.2,92.16 2 1 +adammathes.com/neko/crawler/crawler.go:92.16,94.3 1 0 +adammathes.com/neko/crawler/crawler.go:95.2,95.26 1 1 +adammathes.com/neko/crawler/crawler.go:101.48,112.16 6 1 +adammathes.com/neko/crawler/crawler.go:112.16,116.3 3 1 +adammathes.com/neko/crawler/crawler.go:118.2,122.31 4 1 +adammathes.com/neko/crawler/crawler.go:122.31,129.45 6 1 +adammathes.com/neko/crawler/crawler.go:129.45,131.4 1 1 +adammathes.com/neko/crawler/crawler.go:135.3,137.9 3 1 +adammathes.com/neko/crawler/crawler.go:137.9,139.4 1 0 +adammathes.com/neko/crawler/crawler.go:140.3,140.43 1 1 +adammathes.com/neko/crawler/crawler.go:140.43,142.4 1 0 +adammathes.com/neko/crawler/crawler.go:144.3,144.31 1 1 +adammathes.com/neko/crawler/crawler.go:144.31,146.4 1 1 +adammathes.com/neko/crawler/crawler.go:146.9,148.4 1 1 +adammathes.com/neko/crawler/crawler.go:150.3,152.17 3 1 +adammathes.com/neko/crawler/crawler.go:152.17,154.4 1 0 +adammathes.com/neko/crawler/crawler.go:159.2,159.46 1 1 diff --git a/crawler/crawler_test.go b/crawler/crawler_test.go index f0cff9a..e0c4c6b 100644 --- a/crawler/crawler_test.go +++ b/crawler/crawler_test.go @@ -1,8 +1,10 @@ package crawler import ( + "log" "net/http" "net/http/httptest" + "strings" "testing" "adammathes.com/neko/config" @@ -231,3 +233,46 @@ func TestCrawl(t *testing.T) { t.Errorf("Expected 1 item after crawl, got %d", count) } } + +func TestCrawlFeedWithExtensions(t *testing.T) { + setupTestDB(t) + + rssContent := `<?xml version="1.0" encoding="UTF-8"?> +<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/"> + <channel> + <title>Extension Feed</title> + <item> + <title>Extension Article</title> + <link>https://example.com/ext</link> + <description>Short description</description> + <content:encoded><![CDATA[Much longer content that should be used as description]]></content:encoded> + </item> + </channel> +</rss>` + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + w.Write([]byte(rssContent)) + })) + defer ts.Close() + + f := &feed.Feed{Url: ts.URL, Title: "Extension Test"} + f.Create() + + ch := make(chan string, 1) + CrawlFeed(f, ch) + <-ch + + var itemTitle, itemDesc string + err := models.DB.QueryRow("SELECT title, description FROM item WHERE feed_id = ?", f.Id).Scan(&itemTitle, &itemDesc) + if err != nil { + log.Fatal(err) + } + + if itemTitle != "Extension Article" { + t.Errorf("Expected title 'Extension Article', got %q", itemTitle) + } + if !strings.Contains(itemDesc, "Much longer content") { + t.Errorf("Expected description to contain encoded content, got %q", itemDesc) + } +} diff --git a/crawler/integration_test.go b/crawler/integration_test.go new file mode 100644 index 0000000..633b60f --- /dev/null +++ b/crawler/integration_test.go @@ -0,0 +1,67 @@ +package crawler + +import ( + "fmt" + "net/http" + "net/http/httptest" + "os" + "testing" + + "adammathes.com/neko/models/feed" + "adammathes.com/neko/models/item" +) + +func TestCrawlIntegration(t *testing.T) { + setupTestDB(t) + + // Mock RSS feed server + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") + os.Stdout.Write([]byte("serving mock rss\n")) + fmt.Fprint(w, `<?xml version="1.0" encoding="UTF-8" ?> +<rss version="2.0"> +<channel> + <title>Test Feed</title> + <link>http://example.com/</link> + <description>Test Description</description> + <item> + <title>Test Item 1</title> + <link>http://example.com/item1</link> + <description>Item 1 Description</description> + <pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate> + </item> +</channel> +</rss>`) + })) + defer ts.Close() + + // Add the feed + f := &feed.Feed{Url: ts.URL} + err := f.Create() + if err != nil { + t.Fatalf("Failed to create feed: %v", err) + } + + // Crawl + ch := make(chan string, 1) + CrawlFeed(f, ch) + + res := <-ch + if res == "" { + t.Fatal("CrawlFeed returned empty result") + } + + // Verify items were stored + items, err := item.Filter(0, f.Id, "", false, false, 0, "") + if err != nil { + t.Fatalf("Failed to filter items: %v", err) + } + + if len(items) != 1 { + t.Fatalf("Expected 1 item, got %d", len(items)) + } + + if items[0].Title != "Test Item 1" { + t.Errorf("Expected 'Test Item 1', got %q", items[0].Title) + } +} |
