diff options
| author | Adam Mathes <adam@adammathes.com> | 2026-02-13 17:03:02 -0800 |
|---|---|---|
| committer | Adam Mathes <adam@adammathes.com> | 2026-02-13 17:03:02 -0800 |
| commit | ddcbfc92b9c7b1c932c4bdcadf393b95aa0adc0c (patch) | |
| tree | 50ecca7761bd81c3716a7a0bafc01613daf1c5a8 /models | |
| parent | 2d1b58d49c99d2cbecc253b655ee583880156f40 (diff) | |
| download | neko-ddcbfc92b9c7b1c932c4bdcadf393b95aa0adc0c.tar.gz neko-ddcbfc92b9c7b1c932c4bdcadf393b95aa0adc0c.tar.bz2 neko-ddcbfc92b9c7b1c932c4bdcadf393b95aa0adc0c.zip | |
fix(crawler): prevent panic on missing Content-Type and use custom UA (NK-uywybr)
Diffstat (limited to 'models')
| -rw-r--r-- | models/feed/feed.go | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/models/feed/feed.go b/models/feed/feed.go index b0f7c69..95e7104 100644 --- a/models/feed/feed.go +++ b/models/feed/feed.go @@ -3,6 +3,8 @@ package feed import ( "log" "net/http" + "strings" + "time" "adammathes.com/neko/models" "github.com/PuerkitoBio/goquery" @@ -118,24 +120,38 @@ func (f *Feed) Create() error { // Given a string `url`, return to the best guess of the feed func ResolveFeedURL(url string) string { - resp, err := http.Get(url) + c := &http.Client{ + Timeout: 10 * http.DefaultClient.Timeout, + } + if c.Timeout == 0 { + c.Timeout = 10 * time.Second + } + + req, err := http.NewRequest("GET", url, nil) if err != nil { - // handle errors better return url } + req.Header.Set("User-Agent", "neko RSS Crawler +https://github.com/adammathes/neko") - // Check content-type header first - // if it's feed-ish, just use it - contentType := resp.Header["Content-Type"][0] - switch contentType { + resp, err := c.Do(req) + if err != nil { + return url + } + defer resp.Body.Close() + + contentType := resp.Header.Get("Content-Type") + if contentType == "" { + return url + } - case "text/xml": + switch { + case strings.HasPrefix(contentType, "text/xml"): return url - case "text/rss+xml": + case strings.HasPrefix(contentType, "text/rss+xml"): return url - case "application/rss+xml": + case strings.HasPrefix(contentType, "application/rss+xml"): return url - case "application/atom+xml": + case strings.HasPrefix(contentType, "application/atom+xml"): return url } |
