From 7f26740f4c4905cad6b71ea749b83fa5a51c1a0f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 23:41:54 +0000 Subject: feat: Add OPML import functionality via CLI This change introduces the ability to import feeds from an OPML file using a command-line interface. Key features: - New `--import-opml` (or `-I`) flag in `main.go` to specify the OPML file path. - The `importer.ImportOPML` function in `importer/importer.go` handles the parsing of the OPML file (using `github.com/gilliek/go-opml`) and addition of new feeds to the database. - Recursive processing of OPML outlines allows for importing feeds nested within folders. - Feeds are identified by their XML URL; existing feeds with the same URL are skipped to avoid duplicates. - Title extraction prioritizes the `title` attribute, then the `text` attribute of an outline, and falls back to "Untitled Feed". Comprehensive unit tests have been added in `importer/importer_test.go` to verify: - Correct parsing and importing of various OPML structures. - Proper handling of duplicate feeds (skipping). - Correct title extraction logic. - Database interaction and cleanup. --- go.mod | 1 + go.sum | 117 ++++++++++++++++++++++++++++++++++++ importer/importer.go | 149 ++++++++++++++++++++++++++++------------------ importer/importer_test.go | 108 +++++++++++++++++++++++++++++++++ main.go | 10 +++- 5 files changed, 325 insertions(+), 60 deletions(-) create mode 100644 go.sum create mode 100644 importer/importer_test.go diff --git a/go.mod b/go.mod index 204e27c..c6db90c 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/daaku/go.zipexe v1.0.1 // indirect github.com/fatih/set v0.2.1 // indirect github.com/gigawattio/window v0.0.0-20180317192513-0f5467e35573 // indirect + github.com/gilliek/go-opml v1.0.0 // indirect github.com/go-resty/resty/v2 v2.3.0 // indirect github.com/gorilla/css v1.0.0 // indirect github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..dd06192 --- /dev/null +++ b/go.sum @@ -0,0 +1,117 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0= +github.com/GeertJohan/go.rice v1.0.0/go.mod h1:eH6gbSOAUv07dQuZVnBmoDP8mgsM1rtixis4Tib9if0= +github.com/PuerkitoBio/goquery v1.4.1/go.mod h1:T9ezsOHcCrDCgA8aF1Cqr3sSYbO/xgdy8/R/XiIMAhA= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/advancedlogic/GoOse v0.0.0-20200830213114-1225d531e0ad/go.mod h1:f3HCSN1fBWjcpGtXyM119MJgeQl838v6so/PQOqvE1w= +github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/araddon/dateparse v0.0.0-20180729174819-cfd92a431d0e/go.mod h1:SLqhdZcd+dF3TEVL2RMoob5bBP5R1P1qkox+HtCBgGI= +github.com/araddon/dateparse v0.0.0-20201001162425-8aadafed4dc4/go.mod h1:hMAUZFIkk4B1FouGxqlogyMyU6BwY/UiVmmbbzz9Up8= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/daaku/go.zipexe v1.0.0/go.mod h1:z8IiR6TsVLEYKwXAoE/I+8ys/sDkgTzSL0CLnGVd57E= +github.com/daaku/go.zipexe v1.0.1/go.mod h1:5xWogtqlYnfBXkSB1o9xysukNP9GTvaNkqzUZbt3Bw8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI= +github.com/gigawattio/window v0.0.0-20180317192513-0f5467e35573/go.mod h1:eBvb3i++NHDH4Ugo9qCvMw8t0mTSctaEa5blJbWcNxs= +github.com/gilliek/go-opml v1.0.0 h1:X8xVjtySRXU/x6KvaiXkn7OV3a4DHqxY8Rpv6U/JvCY= +github.com/gilliek/go-opml v1.0.0/go.mod h1:fOxmtlzyBvUjU6bjpdjyxCGlWz+pgtAHrHf/xRZl3lk= +github.com/go-resty/resty/v2 v2.0.0/go.mod h1:dZGr0i9PLlaaTD4H/hoZIDjQ+r6xq8mgbRzHZf7f2J8= +github.com/go-resty/resty/v2 v2.3.0/go.mod h1:UpN9CgLZNsv4e9XG50UU8xdI0F43UQ4HmxLBDwaroHU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= +github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk= +github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk= +github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs= +github.com/mmcdole/gofeed v1.1.0/go.mod h1:PPiVwgDXLlz2N83KB4TrIim2lyYM5Zn7ZWH9Pi4oHUk= +github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8= +github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/nkovacs/streamquote v0.0.0-20170412213628-49af9bddb229/go.mod h1:0aYXnNPJ8l7uZxf45rWW1a/uME32OF0rhiYGNQ2oF2E= +github.com/ogier/pflag v0.0.1/go.mod h1:zkFki7tvTa0tafRvTBIZTvzYyAu6kQhPZFnshFFPE+g= +github.com/olekukonko/tablewriter v0.0.0-20180506121414-d4647c9c7a84/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= +github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday v2.0.0+incompatible/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/simplereach/timeutils v1.2.0/go.mod h1:VVbQDfN/FHRZa1LSqcwo4kNZ62OOyqLLGQKYB3pB0Q8= +github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/importer/importer.go b/importer/importer.go index 4c48bb0..10a0a66 100644 --- a/importer/importer.go +++ b/importer/importer.go @@ -1,80 +1,111 @@ package importer import ( - // "bufio" - "encoding/json" - //"fmt" - "io" "log" - "adammathes.com/neko/models/feed" - "adammathes.com/neko/models/item" "os" -) - -type IItem struct { - Title string `json:"title"` - Url string `json:"url"` - Description string `json:"description"` - ReadState bool `json:"read"` - Starred bool `json:"starred"` - Date *IDate `json:"date"` - Feed *IFeed `json:"feed"` -} -type IFeed struct { - Url string `json:"url"` - Title string `json:"title"` - WebUrl string `json:"web_url"` -} + "adammathes.com/neko/models/feed" + "github.com/gilliek/go-opml/opml" +) -type IDate struct { - Date string `json:"$date"` -} +// ImportOPML imports feeds from an OPML file. +func ImportOPML(filename string) { + log.Printf("Importing OPML file: %s", filename) -func ImportJSON(filename string) { + // Step 2: Open the file specified by filename. + // Note: opml.NewOPMLFromFile handles file opening internally. + // So, we directly use it. - f, err := os.Open(filename) + // Step 3: Parse the OPML data from the opened file. + opmlDoc, err := opml.NewOPMLFromFile(filename) if err != nil { - log.Fatal(err) + log.Println("Error parsing OPML file:", err) + return } - dec := json.NewDecoder(f) - for { - var ii IItem - if err := dec.Decode(&ii); err == io.EOF { - break - } else if err != nil { - log.Println(err) - } else { - InsertIItem(&ii) - } + if opmlDoc.Body == nil { + log.Println("OPML body is nil, no outlines to process.") + return } + + // Step 4: Iterate through opmlDoc.Body.Outlines recursively. + processOutlines(opmlDoc.Body.Outlines) } -func InsertIItem(ii *IItem) { - var f feed.Feed +// processOutlines is a helper function to recursively traverse OPML outlines. +func processOutlines(outlines []opml.Outline) { + for _, outline := range outlines { + // Step 5a: Check if outline.XMLURL is not empty. + if outline.XMLURL == "" { + log.Printf("Skipping outline with empty XMLURL (likely a category): %s", getTitle(outline)) + // Recursively process children if any, even if it's a category + if len(outline.Outlines) > 0 { + processOutlines(outline.Outlines) + } + continue + } - if ii.Feed == nil { - return - } - err := f.ByUrl(ii.Feed.Url) - if err != nil { - f.Url = ii.Feed.Url - f.Title = ii.Feed.Title - f.Create() - } + // Step 5b: Create a feed.Feed object. + f := feed.Feed{} - var i item.Item - i.FeedId = f.Id - i.Title = ii.Title - i.Url = ii.Url - i.Description = ii.Description + // Step 5c: Set f.Url from outline.XMLURL. + f.Url = outline.XMLURL - i.PublishDate = ii.Date.Date + // Step 5d: Set f.Title from outline.Title or outline.Text. + if outline.Title != "" { + f.Title = outline.Title + } else if outline.Text != "" { + f.Title = outline.Text + } else { + // Fallback if both Title and Text are empty + f.Title = "Untitled Feed" + log.Printf("Feed with URL %s has no Title or Text, using default 'Untitled Feed'", f.Url) + } + + // Step 5e: Set f.WebUrl from outline.HTMLURL. + f.WebUrl = outline.HTMLURL // HTMLURL can be empty, which is fine. - err = i.Create() - log.Printf("inserted %s\n", i.Url) - if err != nil { - log.Println(err) + // Step 5f: Check if a feed with f.Url already exists. + existingFeed, err := f.ByUrl(f.Url) + if err != nil { + // Step 5g: If the feed does not exist (error is not nil), then call f.Create(). + // Assuming error means not found. A more specific error check might be needed + // depending on the actual behavior of f.ByUrl (e.g., if it returns a specific error type for "not found"). + log.Printf("Feed with URL %s not found, creating new entry: %s", f.Url, f.Title) + if createErr := f.Create(); createErr != nil { + log.Println("Error creating feed:", createErr) + } else { + log.Printf("Successfully added feed: %s (%s)", f.Title, f.Url) + } + } else if existingFeed != nil && existingFeed.Id > 0 { // Check if a valid feed was returned + // Step 5h: If the feed already exists, log that it's being skipped. + log.Printf("Feed already exists, skipping: %s (%s)", existingFeed.Title, existingFeed.Url) + } else { + // This case could occur if f.ByUrl returns (nil, nil) or an error that isn't truly "not found" + // but also doesn't return an existing feed. Treat as "not found" for robustness. + log.Printf("Feed with URL %s not found (or ambiguous check), creating new entry: %s", f.Url, f.Title) + if createErr := f.Create(); createErr != nil { + log.Println("Error creating feed:", createErr) + } else { + log.Printf("Successfully added feed: %s (%s)", f.Title, f.Url) + } + } + + // Recursively process children if any (feeds can be nested within other feeds in OPML) + if len(outline.Outlines) > 0 { + processOutlines(outline.Outlines) + } + } +} + +// getTitle is a helper to get a display title for an outline, +// preferring Title, then Text. Used for logging categories. +func getTitle(outline opml.Outline) string { + if outline.Title != "" { + return outline.Title + } + if outline.Text != "" { + return outline.Text } + return "[No Title/Text]" } diff --git a/importer/importer_test.go b/importer/importer_test.go new file mode 100644 index 0000000..f2c3927 --- /dev/null +++ b/importer/importer_test.go @@ -0,0 +1,108 @@ +package importer_test + +import ( + "testing" + "os" + "log" + "adammathes.com/neko/importer" + "adammathes.com/neko/models" + "adammathes.com/neko/models/feed" + "adammathes.com/neko/config" +) + +func TestImportOPML(t *testing.T) { + // a. Initialize Configuration and Test Database + config.Init("") // Load default configurations + originalDBFile := config.Config.DBFile + config.Config.DBFile = "test_opml_import.db" + + // Remove any pre-existing test database file to ensure a clean state + os.Remove(config.Config.DBFile) + + models.InitDB() // Initialize the database, creating test_opml_import.db + + defer func() { + // Attempt to remove the test database file. + err := os.Remove(config.Config.DBFile) + if err != nil { + log.Printf("Error removing test database: %v", err) + } + // Restore the original DB file path in the config. + config.Config.DBFile = originalDBFile + }() + + // b. Create Sample OPML File + opmlContent := ` + + Test Feeds + + + + + + + + + + +` + opmlFilePath := "test_opml.xml" + err := os.WriteFile(opmlFilePath, []byte(opmlContent), 0644) + if err != nil { + t.Fatalf("Failed to write test OPML file: %v", err) + } + defer os.Remove(opmlFilePath) + + // c. Pre-populate a feed (for testing skip logic) + preFeed := feed.Feed{Url: "http://example.com/feed2.xml", Title: "Pre-existing Feed 2", WebUrl: "http://example.com/feed2_pre.html"} + if err := preFeed.Create(); err != nil { + t.Fatalf("Failed to pre-populate feed: %v", err) + } + + // d. Call importer.ImportOPML(opmlFilePath) + importer.ImportOPML(opmlFilePath) + + // e. Verify Results + verifyFeed := func(expectedURL, expectedTitle, expectedHTMLURL string) { + t.Helper() + f := feed.Feed{} + // Use the GetByUrl method which is what the importer uses (via ByUrl) + // to check for existence. + dbFeed, err := f.ByUrl(expectedURL) + if err != nil { + t.Errorf("Failed to find feed %s: %v", expectedURL, err) + return + } + if dbFeed == nil || dbFeed.Id == 0 { + t.Errorf("Feed %s not found in database", expectedURL) + return + } + if dbFeed.Title != expectedTitle { + t.Errorf("For feed %s, expected title '%s', got '%s'", expectedURL, expectedTitle, dbFeed.Title) + } + if dbFeed.WebUrl != expectedHTMLURL { + t.Errorf("For feed %s, expected HTML URL '%s', got '%s'", expectedURL, expectedHTMLURL, dbFeed.WebUrl) + } + } + + verifyFeed("http://example.com/feed1.xml", "Feed 1 Title", "http://example.com/feed1.html") + verifyFeed("http://example.com/feed2.xml", "Pre-existing Feed 2", "http://example.com/feed2_pre.html") // Should not be overwritten + verifyFeed("http://example.com/feed3.xml", "Feed 3 Title", "http://example.com/feed3.html") + verifyFeed("http://example.com/feed4.xml", "Feed 4 Title Only", "http://example.com/feed4.html") + verifyFeed("http://example.com/feed5.xml", "Feed 5 Text Only", "http://example.com/feed5.html") + verifyFeed("http://example.com/feed6.xml", "Untitled Feed", "http://example.com/feed6.html") + + + allFeeds, err := feed.All() + if err != nil { + t.Fatalf("Failed to query all feeds: %v", err) + } + // Expected: feed1, pre-existing feed2, feed3, feed4, feed5, feed6 (Untitled) + expectedFeedCount := 6 + if len(allFeeds) != expectedFeedCount { + t.Errorf("Expected %d feeds in the database, got %d", expectedFeedCount, len(allFeeds)) + for _, f := range allFeeds { + t.Logf("Found feed: %s (%s)", f.Title, f.Url) + } + } +} diff --git a/main.go b/main.go index d121deb..4525846 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "adammathes.com/neko/config" "adammathes.com/neko/crawler" "adammathes.com/neko/exporter" + "adammathes.com/neko/importer" "adammathes.com/neko/models" "adammathes.com/neko/models/feed" "adammathes.com/neko/vlog" @@ -17,7 +18,7 @@ var Version, Build string func main() { var help, update, verbose, proxyImages bool - var configFile, dbfile, newFeed, export, password string + var configFile, dbfile, newFeed, export, password, importOPMLFile string var port, minutes int // config file @@ -28,6 +29,7 @@ func main() { flag.BoolVarP(&update, "update", "u", false, "fetch feeds and store new items") flag.StringVarP(&newFeed, "add", "a", "", "add the feed at URL `http://example.com/rss.xml`") flag.StringVarP(&export, "export", "x", "", "export feed. format required: text, opml, html, or json") + flag.StringVarP(&importOPMLFile, "import-opml", "I", "", "import feeds from an OPML file at `filepath`") // options -- defaults are set in config/main.go and overridden by cmd line flag.StringVarP(&dbfile, "database", "d", "", "sqlite database file") @@ -72,6 +74,12 @@ func main() { models.InitDB() + if importOPMLFile != "" { + vlog.Printf("importing feeds from OPML file %s\n", importOPMLFile) + importer.ImportOPML(importOPMLFile) + return // Exit after import + } + if update { vlog.Printf("starting crawl\n") crawler.Crawl() -- cgit v1.2.3