Create rss feeds from updates

This commit is contained in:
Kienan Stewart 2022-09-24 18:41:00 -04:00
parent 20870b6444
commit b33cd1e705
3 changed files with 528 additions and 10 deletions

432
Cargo.lock generated
View File

@ -11,6 +11,37 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi-to-html"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ee82de0545b181a17cbdef44fce80ecaf394e001da7ea279008bf2e0944bee"
dependencies = [
"regex",
"thiserror",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "arrayref"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
[[package]]
name = "arrayvec"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "async-trait"
version = "0.1.53"
@ -22,6 +53,30 @@ dependencies = [
"syn",
]
[[package]]
name = "atom_syndication"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21fb6a0b39c6517edafe46f8137e53c51742425a4dae1c73ee12264a37ad7541"
dependencies = [
"chrono",
"derive_builder",
"diligent-date-parser",
"never",
"quick-xml",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -40,6 +95,29 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "blake2b_simd"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
dependencies = [
"arrayref",
"arrayvec",
"constant_time_eq",
]
[[package]]
name = "bstr"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
dependencies = [
"lazy_static",
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.9.1"
@ -84,6 +162,27 @@ dependencies = [
"winapi",
]
[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim 0.8.0",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "convert_case"
version = "0.4.0"
@ -106,6 +205,16 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "crossbeam-utils"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "cssparser"
version = "0.27.2"
@ -133,6 +242,94 @@ dependencies = [
"syn",
]
[[package]]
name = "csv"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"itoa 0.4.8",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "darling"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f2c43f534ea4b0b049015d00269734195e6d3f0f6635cb692251aca6f9f8b3c"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e91455b86830a1c21799d94524df0845183fa55bafd9aa137b01c7d1065fa36"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim 0.10.0",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29b5acf0dea37a7f66f7b25d2c5e93fd46f8f6968b1a5d7a3e02e97768afc95a"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "derive_builder"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d13202debe11181040ae9063d739fa32cfcaaebe2275fe387703460ae2365b30"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66e616858f6187ed828df7c64a6d71720d83767a7f19740b2d1b6fe6327b36e5"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_macro"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58a94ace95092c5acb1e97a7e846b310cfbd499652f72297da7493f618a98d73"
dependencies = [
"derive_builder_core",
"syn",
]
[[package]]
name = "derive_more"
version = "0.99.17"
@ -146,6 +343,26 @@ dependencies = [
"syn",
]
[[package]]
name = "diligent-date-parser"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2d0fd95c7c02e2d6c588c6c5628466fff9bdde4b8c6196465e087b08e792720"
dependencies = [
"chrono",
]
[[package]]
name = "dirs"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "displaydoc"
version = "0.1.7"
@ -178,6 +395,12 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.31"
@ -395,11 +618,24 @@ checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
name = "haunter"
version = "0.1.0"
dependencies = [
"ansi-to-html",
"chrono",
"prettydiff",
"rss",
"scraper",
"tempfile",
"thirtyfour_sync",
]
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
@ -494,6 +730,12 @@ dependencies = [
"tokio-native-tls",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.2.3"
@ -650,6 +892,12 @@ dependencies = [
"tempfile",
]
[[package]]
name = "never"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91"
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
@ -760,7 +1008,7 @@ checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"redox_syscall 0.2.13",
"smallvec",
"windows-sys",
]
@ -893,6 +1141,55 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "prettydiff"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b6176190f1637d46034820b82fbe758727ccb40da9c9fc2255d695eb05ea29c"
dependencies = [
"ansi_term",
"prettytable-rs",
"structopt",
]
[[package]]
name = "prettytable-rs"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fd04b170004fa2daccf418a7f8253aaf033c27760b5f225889024cf66d7ac2e"
dependencies = [
"atty",
"csv",
"encode_unicode",
"lazy_static",
"term",
"unicode-width",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
@ -908,6 +1205,16 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quick-xml"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]]
name = "quote"
version = "1.0.18"
@ -998,6 +1305,12 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "redox_syscall"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "redox_syscall"
version = "0.2.13"
@ -1007,6 +1320,17 @@ dependencies = [
"bitflags",
]
[[package]]
name = "redox_users"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d"
dependencies = [
"getrandom 0.1.16",
"redox_syscall 0.1.57",
"rust-argon2",
]
[[package]]
name = "regex"
version = "1.5.5"
@ -1018,6 +1342,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
[[package]]
name = "regex-syntax"
version = "0.6.25"
@ -1069,6 +1399,30 @@ dependencies = [
"winreg",
]
[[package]]
name = "rss"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acaf1331b7fc4edc3c2920819fee1766c27e8d40da593155832db3d6dea64e92"
dependencies = [
"atom_syndication",
"derive_builder",
"never",
"quick-xml",
]
[[package]]
name = "rust-argon2"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb"
dependencies = [
"base64",
"blake2b_simd",
"constant_time_eq",
"crossbeam-utils",
]
[[package]]
name = "rustc_version"
version = "0.4.0"
@ -1299,6 +1653,42 @@ dependencies = [
"regex",
]
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "structopt"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
dependencies = [
"clap",
"lazy_static",
"structopt-derive",
]
[[package]]
name = "structopt-derive"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "1.0.95"
@ -1319,7 +1709,7 @@ dependencies = [
"cfg-if",
"fastrand",
"libc",
"redox_syscall",
"redox_syscall 0.2.13",
"remove_dir_all",
"winapi",
]
@ -1335,6 +1725,26 @@ dependencies = [
"utf-8",
]
[[package]]
name = "term"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42"
dependencies = [
"byteorder",
"dirs",
"winapi",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thin-slice"
version = "0.1.1"
@ -1542,6 +1952,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
[[package]]
name = "unicode-width"
version = "0.1.9"
@ -1578,6 +1994,18 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "want"
version = "0.3.0"

View File

@ -6,6 +6,10 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
thirtyfour_sync = "0.27"
ansi-to-html = "0.1"
chrono = "0.4"
prettydiff = "0.6"
rss = "2"
scraper = "0.13"
tempfile = "3"
thirtyfour_sync = "0.27"

View File

@ -1,11 +1,14 @@
use std::cmp::Ordering;
use std::env;
use std::path::Path;
use std::path::{Path,PathBuf};
use std::str::FromStr;
use std::thread;
use std::time::Duration;
use std::time::Instant;
use ansi_to_html;
use chrono;
use rss;
use thirtyfour_sync::WebDriverCommands;
mod conf;
@ -16,18 +19,92 @@ struct Job {
selector: String,
every: Duration,
last_run: Option<Instant>,
output_file: Option<PathBuf>,
channel: Option<rss::Channel>,
}
impl Job {
fn new(url: &str, selector: &str, conf: &Conf) -> Job {
let job = Job {
let mut job = Job {
url: String::from_str(url).unwrap(),
selector: String::from_str(selector).unwrap(),
every: conf.check_interval,
last_run: None,
output_file: None,
channel: None,
};
let mut output_file = conf.output_dir.clone();
let mut file_name = job.url.clone().replace("/", "-");
file_name.push_str(".rss");
output_file = output_file.join(Path::new(&file_name));
job.output_file = Some(output_file);
match std::fs::File::open(job.output_file.as_ref().unwrap()) {
Err(why) => {
println!("Failed to open '{}': {}", job.output_file.as_ref().unwrap().display(), why);
println!("Creating empty RSS channel for job '{}'", job.url);
job.channel = Some(
rss::ChannelBuilder::default()
.title(url)
.link(url)
.description("haunting")
.build()
);
job.channel.as_mut().unwrap().set_generator("Haunter".to_string());
},
Ok(file) => {
job.channel = Some(
rss::Channel::read_from(std::io::BufReader::new(file)).unwrap()
);
},
};
return job;
}
fn update(&mut self, value: &str, diff: &str) {
if self.channel.is_none() {
println!("Skipping update of channel: no channel set");
return;
}
let channel = self.channel.as_mut().unwrap();
let update_time = chrono::Utc::now();
let item = rss::ItemBuilder::default()
.title(format!("Update to '{}'", self.url))
.link(self.url.clone())
.pub_date(update_time.to_rfc2822())
.content(format!(r#"
New content at {}: <br>
<pre class="new-value">
{}
</pre>
<br><br>
Diff: <br>
<pre class="diff-value">
{}
</pre>
"#,
update_time.format("%d/%m/%Y %H:%M"),
ansi_to_html::convert_escaped(value).unwrap().as_str(),
ansi_to_html::convert_escaped(diff).unwrap().as_str()
)
)
.build();
channel.items.push(item);
if self.output_file.is_some() {
match std::fs::File::create(self.output_file.as_ref().unwrap()) {
Err(why) => {
println!("Failed to open '{}' for writing: {}", self.output_file.as_ref().unwrap().display(), why);
},
Ok(file) => {
channel.write_to(
std::io::BufWriter::new(file)
).expect("Failed to write updated channel");
},
};
}
}
}
struct ThreadJob {
@ -103,7 +180,7 @@ fn main() {
let some_job = Job::new("https://www.rust-lang.org", "a.download-link", &conf);
let other_job = Job::new(
"https://arstechnica.com/",
"li.split-feature:nth-child(1) > header:nth-child(4) > h2:nth-child(1) > a:nth-child(1)",
"section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)",
&conf
);
jobs.push(ThreadJob {
@ -139,13 +216,18 @@ fn main() {
}
println!("Job for '{}' took about {}s", tj.job.url, duration.as_secs());
if tj.last_result.is_none() {
println!("New result: '{}'\n", result);
println!("New result for job: '{}'\n", tj.job.url);
let diff = prettydiff::diff_lines(
"", result.as_str());
tj.job.update(result.as_str(), diff.to_string().as_str());
tj.last_result = Some(result);
}
else {
if tj.last_result.as_ref().unwrap().ne(&result) {
println!("Change detected\nOld value: '{}'\nNew value: '{}'",
tj.last_result.as_ref().unwrap(), result);
println!("Change detected for job '{}'", tj.job.url);
let diff = prettydiff::diff_lines(
tj.last_result.as_ref().unwrap(), result.as_str());
tj.job.update(result.as_str(), diff.to_string().as_str());
tj.last_result = Some(result);
}
}
@ -176,6 +258,10 @@ fn main() {
break;
}
}
else {
let time_since_last_run = Instant::now().duration_since(tj.job.last_run.unwrap());
println!("Job '{}' - {}s since last run", tj.job.url, time_since_last_run.as_secs());
}
}
break;
}