aboutsummaryrefslogtreecommitdiffstats
path: root/importer/importer.go
diff options
context:
space:
mode:
authorgoogle-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>2025-05-22 23:41:54 +0000
committergoogle-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>2025-05-22 23:41:54 +0000
commit7f26740f4c4905cad6b71ea749b83fa5a51c1a0f (patch)
tree89d40fd313f2b64f262b31690925e5f7aa461a5f /importer/importer.go
parentfd747e166a1f1a1f22a27e44810836b2bd2244c7 (diff)
downloadneko-7f26740f4c4905cad6b71ea749b83fa5a51c1a0f.tar.gz
neko-7f26740f4c4905cad6b71ea749b83fa5a51c1a0f.tar.bz2
neko-7f26740f4c4905cad6b71ea749b83fa5a51c1a0f.zip
feat: Add OPML import functionality via CLIopml-import-cli
This change introduces the ability to import feeds from an OPML file using a command-line interface. Key features: - New `--import-opml` (or `-I`) flag in `main.go` to specify the OPML file path. - The `importer.ImportOPML` function in `importer/importer.go` handles the parsing of the OPML file (using `github.com/gilliek/go-opml`) and addition of new feeds to the database. - Recursive processing of OPML outlines allows for importing feeds nested within folders. - Feeds are identified by their XML URL; existing feeds with the same URL are skipped to avoid duplicates. - Title extraction prioritizes the `title` attribute, then the `text` attribute of an outline, and falls back to "Untitled Feed". Comprehensive unit tests have been added in `importer/importer_test.go` to verify: - Correct parsing and importing of various OPML structures. - Proper handling of duplicate feeds (skipping). - Correct title extraction logic. - Database interaction and cleanup.
Diffstat (limited to 'importer/importer.go')
-rw-r--r--importer/importer.go149
1 files changed, 90 insertions, 59 deletions
diff --git a/importer/importer.go b/importer/importer.go
index 4c48bb0..10a0a66 100644
--- a/importer/importer.go
+++ b/importer/importer.go
@@ -1,80 +1,111 @@
package importer
import (
- // "bufio"
- "encoding/json"
- //"fmt"
- "io"
"log"
- "adammathes.com/neko/models/feed"
- "adammathes.com/neko/models/item"
"os"
-)
-
-type IItem struct {
- Title string `json:"title"`
- Url string `json:"url"`
- Description string `json:"description"`
- ReadState bool `json:"read"`
- Starred bool `json:"starred"`
- Date *IDate `json:"date"`
- Feed *IFeed `json:"feed"`
-}
-type IFeed struct {
- Url string `json:"url"`
- Title string `json:"title"`
- WebUrl string `json:"web_url"`
-}
+ "adammathes.com/neko/models/feed"
+ "github.com/gilliek/go-opml/opml"
+)
-type IDate struct {
- Date string `json:"$date"`
-}
+// ImportOPML imports feeds from an OPML file.
+func ImportOPML(filename string) {
+ log.Printf("Importing OPML file: %s", filename)
-func ImportJSON(filename string) {
+ // Step 2: Open the file specified by filename.
+ // Note: opml.NewOPMLFromFile handles file opening internally.
+ // So, we directly use it.
- f, err := os.Open(filename)
+ // Step 3: Parse the OPML data from the opened file.
+ opmlDoc, err := opml.NewOPMLFromFile(filename)
if err != nil {
- log.Fatal(err)
+ log.Println("Error parsing OPML file:", err)
+ return
}
- dec := json.NewDecoder(f)
- for {
- var ii IItem
- if err := dec.Decode(&ii); err == io.EOF {
- break
- } else if err != nil {
- log.Println(err)
- } else {
- InsertIItem(&ii)
- }
+ if opmlDoc.Body == nil {
+ log.Println("OPML body is nil, no outlines to process.")
+ return
}
+
+ // Step 4: Iterate through opmlDoc.Body.Outlines recursively.
+ processOutlines(opmlDoc.Body.Outlines)
}
-func InsertIItem(ii *IItem) {
- var f feed.Feed
+// processOutlines is a helper function to recursively traverse OPML outlines.
+func processOutlines(outlines []opml.Outline) {
+ for _, outline := range outlines {
+ // Step 5a: Check if outline.XMLURL is not empty.
+ if outline.XMLURL == "" {
+ log.Printf("Skipping outline with empty XMLURL (likely a category): %s", getTitle(outline))
+ // Recursively process children if any, even if it's a category
+ if len(outline.Outlines) > 0 {
+ processOutlines(outline.Outlines)
+ }
+ continue
+ }
- if ii.Feed == nil {
- return
- }
- err := f.ByUrl(ii.Feed.Url)
- if err != nil {
- f.Url = ii.Feed.Url
- f.Title = ii.Feed.Title
- f.Create()
- }
+ // Step 5b: Create a feed.Feed object.
+ f := feed.Feed{}
- var i item.Item
- i.FeedId = f.Id
- i.Title = ii.Title
- i.Url = ii.Url
- i.Description = ii.Description
+ // Step 5c: Set f.Url from outline.XMLURL.
+ f.Url = outline.XMLURL
- i.PublishDate = ii.Date.Date
+ // Step 5d: Set f.Title from outline.Title or outline.Text.
+ if outline.Title != "" {
+ f.Title = outline.Title
+ } else if outline.Text != "" {
+ f.Title = outline.Text
+ } else {
+ // Fallback if both Title and Text are empty
+ f.Title = "Untitled Feed"
+ log.Printf("Feed with URL %s has no Title or Text, using default 'Untitled Feed'", f.Url)
+ }
+
+ // Step 5e: Set f.WebUrl from outline.HTMLURL.
+ f.WebUrl = outline.HTMLURL // HTMLURL can be empty, which is fine.
- err = i.Create()
- log.Printf("inserted %s\n", i.Url)
- if err != nil {
- log.Println(err)
+ // Step 5f: Check if a feed with f.Url already exists.
+ existingFeed, err := f.ByUrl(f.Url)
+ if err != nil {
+ // Step 5g: If the feed does not exist (error is not nil), then call f.Create().
+ // Assuming error means not found. A more specific error check might be needed
+ // depending on the actual behavior of f.ByUrl (e.g., if it returns a specific error type for "not found").
+ log.Printf("Feed with URL %s not found, creating new entry: %s", f.Url, f.Title)
+ if createErr := f.Create(); createErr != nil {
+ log.Println("Error creating feed:", createErr)
+ } else {
+ log.Printf("Successfully added feed: %s (%s)", f.Title, f.Url)
+ }
+ } else if existingFeed != nil && existingFeed.Id > 0 { // Check if a valid feed was returned
+ // Step 5h: If the feed already exists, log that it's being skipped.
+ log.Printf("Feed already exists, skipping: %s (%s)", existingFeed.Title, existingFeed.Url)
+ } else {
+ // This case could occur if f.ByUrl returns (nil, nil) or an error that isn't truly "not found"
+ // but also doesn't return an existing feed. Treat as "not found" for robustness.
+ log.Printf("Feed with URL %s not found (or ambiguous check), creating new entry: %s", f.Url, f.Title)
+ if createErr := f.Create(); createErr != nil {
+ log.Println("Error creating feed:", createErr)
+ } else {
+ log.Printf("Successfully added feed: %s (%s)", f.Title, f.Url)
+ }
+ }
+
+ // Recursively process children if any (feeds can be nested within other feeds in OPML)
+ if len(outline.Outlines) > 0 {
+ processOutlines(outline.Outlines)
+ }
+ }
+}
+
+// getTitle is a helper to get a display title for an outline,
+// preferring Title, then Text. Used for logging categories.
+func getTitle(outline opml.Outline) string {
+ if outline.Title != "" {
+ return outline.Title
+ }
+ if outline.Text != "" {
+ return outline.Text
}
+ return "[No Title/Text]"
}