From c3741cda2f080061f1a100fbd6bf1566a7d5fe9c Mon Sep 17 00:00:00 2001 From: Clark Kampfe Date: Sun, 18 Feb 2024 08:24:52 -0600 Subject: [PATCH] Import OPML files (#32) * WIP: initial opml import * clean up opml import * add opml.rs, oops --- Cargo.lock | 55 ++++++++++++++++++++ Cargo.toml | 1 + src/app.rs | 4 +- src/main.rs | 147 +++++++++++++++++++++++++++++++++++++--------------- src/opml.rs | 72 +++++++++++++++++++++++++ src/rss.rs | 24 ++++++++- 6 files changed, 256 insertions(+), 47 deletions(-) create mode 100644 src/opml.rs diff --git a/Cargo.lock b/Cargo.lock index 6e2eb80..ad197f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,6 +537,31 @@ dependencies = [ "wasi", ] +[[package]] +name = "hard-xml" +version = "1.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fafc2bcb74049535eb6fab49eb20164a427867a9e809516ef95a98e961164432" +dependencies = [ + "hard-xml-derive", + "jetscii", + "lazy_static", + "memchr", + "xmlparser", +] + +[[package]] +name = "hard-xml-derive" +version = "1.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a345b327da51b997c94f841d9db6b2d292c7632713bd8a1b8b191e8b819df7" +dependencies = [ + "bitflags 1.3.2", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -695,6 +720,12 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +[[package]] +name = "jetscii" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e" + [[package]] name = "jni" version = "0.21.1" @@ -732,6 +763,12 @@ version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10257499f089cd156ad82d0a9cd57d9501fa2c989068992a97eb3c27836f206b" +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.153" @@ -932,6 +969,17 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "opml" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2f96426c857a92676dc29a9e2a181eb39321047ac994491c69eae01619ddf2" +dependencies = [ + "hard-xml", + "serde", + "thiserror", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -1236,6 +1284,7 @@ dependencies = [ "directories", "html2text", "num_cpus", + "opml", "r2d2", "r2d2_sqlite", "ratatui", @@ -2214,6 +2263,12 @@ dependencies = [ "markup5ever", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index a78a9a2..9d5cc06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ diligent-date-parser = "0.1" directories = "5" html2text = "0.12" num_cpus = "1.16" +opml = "1.1" r2d2 = "0.8" r2d2_sqlite = "0.23" rss = { version = "2.0", default-features = false } diff --git a/src/app.rs b/src/app.rs index 75fdc4b..67a128c 100644 --- a/src/app.rs +++ b/src/app.rs @@ -68,7 +68,7 @@ impl App { ]; pub fn new( - options: crate::Options, + options: crate::ReadOptions, event_s: std::sync::mpsc::Sender>, ) -> Result { Ok(App { @@ -195,7 +195,7 @@ pub struct AppImpl { impl AppImpl { pub fn new( - options: crate::Options, + options: crate::ReadOptions, event_s: std::sync::mpsc::Sender>, ) -> Result { let mut conn = rusqlite::Connection::open(&options.database_path)?; diff --git a/src/main.rs b/src/main.rs index a91b9e4..f885e4f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use crate::modes::{Mode, Selected}; use anyhow::Result; use app::App; -use clap::Parser; +use clap::{Parser, Subcommand}; use crossterm::event; use crossterm::event::{Event as CEvent, KeyCode, KeyModifiers}; use crossterm::execute; @@ -19,6 +19,7 @@ use std::{thread, time}; mod app; mod modes; +mod opml; mod rss; mod ui; mod util; @@ -28,39 +29,83 @@ pub enum Event { Tick, } -// Only used to take input at the boundary. -// Turned into `Options` with `to_options()`. /// A TUI RSS reader with vim-like controls and a local-first, offline-first focus -#[derive(Clone, Debug, Parser)] +#[derive(Debug, Parser)] #[command(author, version, about, name = "russ")] -struct CliOptions { - /// Override where `russ` stores and reads feeds. - /// By default, the feeds database on Linux this will be at `XDG_DATA_HOME/russ/feeds.db` or `$HOME/.local/share/russ/feeds.db`. - /// On MacOS it will be at `$HOME/Library/Application Support/russ/feeds.db`. - /// On Windows it will be at `{FOLDERID_LocalAppData}/russ/data/feeds.db`. - #[arg(short, long)] - database_path: Option, - /// time in ms between two ticks - #[arg(short, long, default_value = "250")] - tick_rate: u64, - /// number of seconds to show the flash message before clearing it - #[arg(short, long, default_value = "4", value_parser = parse_seconds)] - flash_display_duration_seconds: time::Duration, - /// RSS/Atom network request timeout in seconds - #[arg(short, long, default_value = "5", value_parser = parse_seconds)] - network_timeout: time::Duration, +struct Options { + #[command(subcommand)] + subcommand: Command, } -impl CliOptions { - fn to_options(&self) -> std::io::Result { - let database_path = get_database_path(self)?; +/// Only used to take input at the boundary. +/// Turned into `ValidatedOptions` with `validate()`. +#[derive(Debug, Subcommand)] +enum Command { + /// Read your feeds + Read { + /// Override where `russ` stores and reads feeds. + /// By default, the feeds database on Linux this will be at `XDG_DATA_HOME/russ/feeds.db` or `$HOME/.local/share/russ/feeds.db`. + /// On MacOS it will be at `$HOME/Library/Application Support/russ/feeds.db`. + /// On Windows it will be at `{FOLDERID_LocalAppData}/russ/data/feeds.db`. + #[arg(short, long)] + database_path: Option, + /// time in ms between two ticks + #[arg(short, long, default_value = "250")] + tick_rate: u64, + /// number of seconds to show the flash message before clearing it + #[arg(short, long, default_value = "4", value_parser = parse_seconds)] + flash_display_duration_seconds: time::Duration, + /// RSS/Atom network request timeout in seconds + #[arg(short, long, default_value = "5", value_parser = parse_seconds)] + network_timeout: time::Duration, + }, + /// Import feeds from an OPML document + Import { + /// Override where `russ` stores and reads feeds. + /// By default, the feeds database on Linux this will be at `XDG_DATA_HOME/russ/feeds.db` or `$HOME/.local/share/russ/feeds.db`. + /// On MacOS it will be at `$HOME/Library/Application Support/russ/feeds.db`. + /// On Windows it will be at `{FOLDERID_LocalAppData}/russ/data/feeds.db`. + #[arg(short, long)] + database_path: Option, + #[arg(short, long)] + opml_path: PathBuf, + /// RSS/Atom network request timeout in seconds + #[arg(short, long, default_value = "5", value_parser = parse_seconds)] + network_timeout: time::Duration, + }, +} - Ok(Options { - database_path, - tick_rate: self.tick_rate, - flash_display_duration_seconds: self.flash_display_duration_seconds, - network_timeout: self.network_timeout, - }) +impl Command { + fn validate(&self) -> std::io::Result { + match self { + Command::Read { + database_path, + tick_rate, + flash_display_duration_seconds, + network_timeout, + } => { + let database_path = get_database_path(database_path)?; + + Ok(ValidatedOptions::Read(ReadOptions { + database_path, + tick_rate: *tick_rate, + flash_display_duration_seconds: *flash_display_duration_seconds, + network_timeout: *network_timeout, + })) + } + Command::Import { + database_path, + opml_path, + network_timeout, + } => { + let database_path = get_database_path(database_path)?; + Ok(ValidatedOptions::Import(ImportOptions { + database_path, + opml_path: opml_path.to_owned(), + network_timeout: *network_timeout, + })) + } + } } } @@ -69,21 +114,30 @@ fn parse_seconds(s: &str) -> Result { Ok(time::Duration::from_secs(as_u64)) } -/// internal, validated options +/// internal, validated options for the normal reader mode +#[derive(Debug)] +enum ValidatedOptions { + Read(ReadOptions), + Import(ImportOptions), +} + #[derive(Clone, Debug)] -pub struct Options { - /// feed database path +struct ReadOptions { database_path: PathBuf, - /// time in ms between two ticks tick_rate: u64, - /// number of seconds to show the flash message before clearing it flash_display_duration_seconds: time::Duration, - /// RSS/Atom network request timeout in seconds network_timeout: time::Duration, } -fn get_database_path(cli_options: &CliOptions) -> std::io::Result { - let database_path = if let Some(database_path) = cli_options.database_path.as_ref() { +#[derive(Debug)] +struct ImportOptions { + database_path: PathBuf, + opml_path: PathBuf, + network_timeout: time::Duration, +} + +fn get_database_path(database_path: &Option) -> std::io::Result { + let database_path = if let Some(database_path) = database_path { database_path.to_owned() } else { let mut database_path = directories::ProjectDirs::from("", "", "russ") @@ -113,7 +167,7 @@ fn io_loop( app: App, sx: mpsc::Sender, rx: mpsc::Receiver, - options: &Options, + options: &ReadOptions, ) -> Result<()> { use IoCommand::*; @@ -272,11 +326,7 @@ fn clear_flash_after(sx: mpsc::Sender, duration: time::Duration) { }); } -fn main() -> Result<()> { - let cli_options: CliOptions = CliOptions::parse(); - - let options = cli_options.to_options()?; - +fn run_reader(options: ReadOptions) -> Result<()> { enable_raw_mode()?; let mut stdout = stdout(); @@ -408,3 +458,14 @@ fn main() -> Result<()> { Ok(()) } + +fn main() -> Result<()> { + let options = Options::parse(); + + let validated_options = options.subcommand.validate()?; + + match validated_options { + ValidatedOptions::Import(options) => crate::opml::import(options), + ValidatedOptions::Read(options) => run_reader(options), + } +} diff --git a/src/opml.rs b/src/opml.rs new file mode 100644 index 0000000..b75a91a --- /dev/null +++ b/src/opml.rs @@ -0,0 +1,72 @@ +use crate::ImportOptions; +use anyhow::{Context, Result}; + +pub(crate) fn import(options: ImportOptions) -> Result<()> { + let mut conn = rusqlite::Connection::open(options.database_path)?; + + crate::rss::initialize_db(&mut conn)?; + + let opml_file = + std::fs::File::open(options.opml_path).context("must provide a valid OPML file")?; + + let mut opml_reader = std::io::BufReader::new(opml_file); + + let opml_document = + opml::OPML::from_reader(&mut opml_reader).context("unable to parse provided OPML file")?; + + let http_client = ureq::AgentBuilder::new() + .timeout_read(options.network_timeout) + .build(); + + let feed_urls = get_feed_urls(&opml_document); + + let mut successful_imports = 0; + let mut failed_imports = vec![]; + + for feed_url in feed_urls { + eprintln!(">>>>>>>>>>"); + eprintln!("{}: starting import", feed_url); + match crate::rss::subscribe_to_feed(&http_client, &mut conn, &feed_url) { + Ok(_feed_id) => { + eprintln!("{feed_url}: OK"); + successful_imports += 1; + } + Err(e) => { + eprintln!("ERROR: {:?}", e); + failed_imports.push(feed_url); + } + }; + eprintln!("<<<<<<<<<<"); + } + + eprintln!(); + eprintln!("{successful_imports} feeds imported"); + eprintln!("{} feeds failed to import", failed_imports.len()); + + if !failed_imports.is_empty() { + eprintln!(); + + for failed_import_url in failed_imports { + eprintln!("{failed_import_url} failed to import"); + } + } + + Ok(()) +} + +// outlines can be nested within other outlines in a tree structure, +// so we have to traverse them +fn get_feed_urls(opml_document: &opml::OPML) -> Vec { + let mut outlines_stack = opml_document.body.outlines.to_owned(); + let mut feed_urls = vec![]; + + while let Some(this_outline) = outlines_stack.pop() { + outlines_stack.extend_from_slice(&this_outline.outlines); + + if let Some(xml_url) = this_outline.xml_url { + feed_urls.push(xml_url); + } + } + + feed_urls +} diff --git a/src/rss.rs b/src/rss.rs index 4284d01..b304638 100644 --- a/src/rss.rs +++ b/src/rss.rs @@ -248,8 +248,19 @@ pub fn subscribe_to_feed( match feed_and_entries { FeedResponse::CacheMiss(feed_and_entries) => { let feed_id = in_transaction(conn, |tx| { - let feed_id = create_feed(tx, &feed_and_entries.feed)?; - add_entries_to_feed(tx, feed_id, &feed_and_entries.entries)?; + let feed_id = create_feed(tx, &feed_and_entries.feed).with_context(|| { + format!( + "creating feed {:?} failed", + &feed_and_entries.feed.feed_link + ) + })?; + add_entries_to_feed(tx, feed_id, &feed_and_entries.entries).with_context(|| { + format!( + "inserting {} entries for feed {:?} failed", + &feed_and_entries.entries.len(), + &feed_and_entries.feed.feed_link + ) + })?; Ok(feed_id) })?; @@ -425,6 +436,15 @@ pub fn initialize_db(conn: &mut rusqlite::Connection) -> Result<()> { tx.execute("ALTER TABLE feeds ADD COLUMN latest_etag TEXT", [])?; } + if schema_version <= 2 { + tx.pragma_update(None, "user_version", 3)?; + + tx.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS feeds_feed_link ON feeds (feed_link)", + [], + )?; + } + Ok(()) }) }