From b33cd1e7059a271f499b030caf2176b60fd24ccb Mon Sep 17 00:00:00 2001 From: Kienan Stewart Date: Sat, 24 Sep 2022 18:41:00 -0400 Subject: [PATCH] Create rss feeds from updates --- Cargo.lock | 432 +++++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 8 +- src/main.rs | 98 +++++++++++- 3 files changed, 528 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a727777..4adbf17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,37 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi-to-html" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ee82de0545b181a17cbdef44fce80ecaf394e001da7ea279008bf2e0944bee" +dependencies = [ + "regex", + "thiserror", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "async-trait" version = "0.1.53" @@ -22,6 +53,30 @@ dependencies = [ "syn", ] +[[package]] +name = "atom_syndication" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21fb6a0b39c6517edafe46f8137e53c51742425a4dae1c73ee12264a37ad7541" +dependencies = [ + "chrono", + "derive_builder", + "diligent-date-parser", + "never", + "quick-xml", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -40,6 +95,29 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "blake2b_simd" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.9.1" @@ -84,6 +162,27 @@ dependencies = [ "winapi", ] +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim 0.8.0", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + [[package]] name = "convert_case" version = "0.4.0" @@ -106,6 +205,16 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "crossbeam-utils" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" +dependencies = [ + "cfg-if", + "lazy_static", +] + [[package]] name = "cssparser" version = "0.27.2" @@ -133,6 +242,94 @@ dependencies = [ "syn", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f2c43f534ea4b0b049015d00269734195e6d3f0f6635cb692251aca6f9f8b3c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e91455b86830a1c21799d94524df0845183fa55bafd9aa137b01c7d1065fa36" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.10.0", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29b5acf0dea37a7f66f7b25d2c5e93fd46f8f6968b1a5d7a3e02e97768afc95a" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "derive_builder" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d13202debe11181040ae9063d739fa32cfcaaebe2275fe387703460ae2365b30" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66e616858f6187ed828df7c64a6d71720d83767a7f19740b2d1b6fe6327b36e5" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58a94ace95092c5acb1e97a7e846b310cfbd499652f72297da7493f618a98d73" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -146,6 +343,26 @@ dependencies = [ "syn", ] +[[package]] +name = "diligent-date-parser" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d0fd95c7c02e2d6c588c6c5628466fff9bdde4b8c6196465e087b08e792720" +dependencies = [ + "chrono", +] + +[[package]] +name = "dirs" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "displaydoc" version = "0.1.7" @@ -178,6 +395,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding_rs" version = "0.8.31" @@ -395,11 +618,24 @@ checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" name = "haunter" version = "0.1.0" dependencies = [ + "ansi-to-html", + "chrono", + "prettydiff", + "rss", "scraper", "tempfile", "thirtyfour_sync", ] +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -494,6 +730,12 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.2.3" @@ -650,6 +892,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "never" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" + [[package]] name = "new_debug_unreachable" version = "1.0.4" @@ -760,7 +1008,7 @@ checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.13", "smallvec", "windows-sys", ] @@ -893,6 +1141,55 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "prettydiff" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6176190f1637d46034820b82fbe758727ccb40da9c9fc2255d695eb05ea29c" +dependencies = [ + "ansi_term", + "prettytable-rs", + "structopt", +] + +[[package]] +name = "prettytable-rs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd04b170004fa2daccf418a7f8253aaf033c27760b5f225889024cf66d7ac2e" +dependencies = [ + "atty", + "csv", + "encode_unicode", + "lazy_static", + "term", + "unicode-width", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -908,6 +1205,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quote" version = "1.0.18" @@ -998,6 +1305,12 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + [[package]] name = "redox_syscall" version = "0.2.13" @@ -1007,6 +1320,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" +dependencies = [ + "getrandom 0.1.16", + "redox_syscall 0.1.57", + "rust-argon2", +] + [[package]] name = "regex" version = "1.5.5" @@ -1018,6 +1342,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.25" @@ -1069,6 +1399,30 @@ dependencies = [ "winreg", ] +[[package]] +name = "rss" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acaf1331b7fc4edc3c2920819fee1766c27e8d40da593155832db3d6dea64e92" +dependencies = [ + "atom_syndication", + "derive_builder", + "never", + "quick-xml", +] + +[[package]] +name = "rust-argon2" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" +dependencies = [ + "base64", + "blake2b_simd", + "constant_time_eq", + "crossbeam-utils", +] + [[package]] name = "rustc_version" version = "0.4.0" @@ -1299,6 +1653,42 @@ dependencies = [ "regex", ] +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "syn" version = "1.0.95" @@ -1319,7 +1709,7 @@ dependencies = [ "cfg-if", "fastrand", "libc", - "redox_syscall", + "redox_syscall 0.2.13", "remove_dir_all", "winapi", ] @@ -1335,6 +1725,26 @@ dependencies = [ "utf-8", ] +[[package]] +name = "term" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42" +dependencies = [ + "byteorder", + "dirs", + "winapi", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thin-slice" version = "0.1.1" @@ -1542,6 +1952,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" + [[package]] name = "unicode-width" version = "0.1.9" @@ -1578,6 +1994,18 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "want" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 594ebda..5d5c547 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -thirtyfour_sync = "0.27" +ansi-to-html = "0.1" +chrono = "0.4" +prettydiff = "0.6" +rss = "2" scraper = "0.13" -tempfile = "3" \ No newline at end of file +tempfile = "3" +thirtyfour_sync = "0.27" diff --git a/src/main.rs b/src/main.rs index 555f395..5f8676f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,14 @@ use std::cmp::Ordering; use std::env; -use std::path::Path; +use std::path::{Path,PathBuf}; use std::str::FromStr; use std::thread; use std::time::Duration; use std::time::Instant; +use ansi_to_html; +use chrono; +use rss; use thirtyfour_sync::WebDriverCommands; mod conf; @@ -16,18 +19,92 @@ struct Job { selector: String, every: Duration, last_run: Option, + output_file: Option, + channel: Option, } impl Job { fn new(url: &str, selector: &str, conf: &Conf) -> Job { - let job = Job { + let mut job = Job { url: String::from_str(url).unwrap(), selector: String::from_str(selector).unwrap(), every: conf.check_interval, last_run: None, + output_file: None, + channel: None, + }; + let mut output_file = conf.output_dir.clone(); + let mut file_name = job.url.clone().replace("/", "-"); + file_name.push_str(".rss"); + output_file = output_file.join(Path::new(&file_name)); + job.output_file = Some(output_file); + + match std::fs::File::open(job.output_file.as_ref().unwrap()) { + Err(why) => { + println!("Failed to open '{}': {}", job.output_file.as_ref().unwrap().display(), why); + println!("Creating empty RSS channel for job '{}'", job.url); + job.channel = Some( + rss::ChannelBuilder::default() + .title(url) + .link(url) + .description("haunting") + .build() + ); + job.channel.as_mut().unwrap().set_generator("Haunter".to_string()); + }, + Ok(file) => { + job.channel = Some( + rss::Channel::read_from(std::io::BufReader::new(file)).unwrap() + ); + }, }; return job; } + + fn update(&mut self, value: &str, diff: &str) { + if self.channel.is_none() { + println!("Skipping update of channel: no channel set"); + return; + } + let channel = self.channel.as_mut().unwrap(); + let update_time = chrono::Utc::now(); + let item = rss::ItemBuilder::default() + .title(format!("Update to '{}'", self.url)) + .link(self.url.clone()) + .pub_date(update_time.to_rfc2822()) + .content(format!(r#" +New content at {}:
+
+{}
+
+

+Diff:
+
+{}
+
+"#, + update_time.format("%d/%m/%Y %H:%M"), + ansi_to_html::convert_escaped(value).unwrap().as_str(), + ansi_to_html::convert_escaped(diff).unwrap().as_str() + ) + ) + .build(); + + channel.items.push(item); + + if self.output_file.is_some() { + match std::fs::File::create(self.output_file.as_ref().unwrap()) { + Err(why) => { + println!("Failed to open '{}' for writing: {}", self.output_file.as_ref().unwrap().display(), why); + }, + Ok(file) => { + channel.write_to( + std::io::BufWriter::new(file) + ).expect("Failed to write updated channel"); + }, + }; + } + } } struct ThreadJob { @@ -103,7 +180,7 @@ fn main() { let some_job = Job::new("https://www.rust-lang.org", "a.download-link", &conf); let other_job = Job::new( "https://arstechnica.com/", - "li.split-feature:nth-child(1) > header:nth-child(4) > h2:nth-child(1) > a:nth-child(1)", + "section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)", &conf ); jobs.push(ThreadJob { @@ -139,13 +216,18 @@ fn main() { } println!("Job for '{}' took about {}s", tj.job.url, duration.as_secs()); if tj.last_result.is_none() { - println!("New result: '{}'\n", result); + println!("New result for job: '{}'\n", tj.job.url); + let diff = prettydiff::diff_lines( + "", result.as_str()); + tj.job.update(result.as_str(), diff.to_string().as_str()); tj.last_result = Some(result); } else { if tj.last_result.as_ref().unwrap().ne(&result) { - println!("Change detected\nOld value: '{}'\nNew value: '{}'", - tj.last_result.as_ref().unwrap(), result); + println!("Change detected for job '{}'", tj.job.url); + let diff = prettydiff::diff_lines( + tj.last_result.as_ref().unwrap(), result.as_str()); + tj.job.update(result.as_str(), diff.to_string().as_str()); tj.last_result = Some(result); } } @@ -176,6 +258,10 @@ fn main() { break; } } + else { + let time_since_last_run = Instant::now().duration_since(tj.job.last_run.unwrap()); + println!("Job '{}' - {}s since last run", tj.job.url, time_since_last_run.as_secs()); + } } break; }