Use text-diff to compare run results

This commit is contained in:
Kienan Stewart 2022-10-08 20:39:55 -04:00
parent 3c21f234d8
commit a69fd14ea4
3 changed files with 70 additions and 19 deletions

69
Cargo.lock generated
View File

@ -27,7 +27,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [ dependencies = [
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -74,7 +74,7 @@ checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [ dependencies = [
"hermit-abi", "hermit-abi",
"libc", "libc",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -159,7 +159,7 @@ dependencies = [
"num-traits", "num-traits",
"serde", "serde",
"time", "time",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -360,7 +360,7 @@ checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
dependencies = [ dependencies = [
"libc", "libc",
"redox_users", "redox_users",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -625,6 +625,7 @@ dependencies = [
"rss", "rss",
"scraper", "scraper",
"tempfile", "tempfile",
"text-diff",
"thirtyfour_sync", "thirtyfour_sync",
] ]
@ -816,6 +817,16 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
dependencies = [
"winapi 0.2.8",
"winapi-build",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
@ -1185,7 +1196,7 @@ dependencies = [
"csv", "csv",
"encode_unicode", "encode_unicode",
"lazy_static", "lazy_static",
"term", "term 0.5.2",
"unicode-width", "unicode-width",
] ]
@ -1383,7 +1394,7 @@ version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [ dependencies = [
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -1468,7 +1479,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75" checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75"
dependencies = [ dependencies = [
"lazy_static", "lazy_static",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -1632,7 +1643,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
dependencies = [ dependencies = [
"libc", "libc",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -1734,7 +1745,7 @@ dependencies = [
"libc", "libc",
"redox_syscall 0.2.13", "redox_syscall 0.2.13",
"remove_dir_all", "remove_dir_all",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -1748,6 +1759,16 @@ dependencies = [
"utf-8", "utf-8",
] ]
[[package]]
name = "term"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2077e54d38055cf1ca0fd7933a2e00cd3ec8f6fed352b2a377f06dcdaaf3281"
dependencies = [
"kernel32-sys",
"winapi 0.2.8",
]
[[package]] [[package]]
name = "term" name = "term"
version = "0.5.2" version = "0.5.2"
@ -1756,7 +1777,17 @@ checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"dirs", "dirs",
"winapi", "winapi 0.3.9",
]
[[package]]
name = "text-diff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "309238dd66f8bf11a20d015b727b926f294a13fcb8d56770bb984e7a22c43897"
dependencies = [
"getopts",
"term 0.2.14",
] ]
[[package]] [[package]]
@ -1839,7 +1870,7 @@ checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [ dependencies = [
"libc", "libc",
"wasi 0.10.0+wasi-snapshot-preview1", "wasi 0.10.0+wasi-snapshot-preview1",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -1872,7 +1903,7 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
"socket2", "socket2",
"tokio-macros", "tokio-macros",
"winapi", "winapi 0.3.9",
] ]
[[package]] [[package]]
@ -2133,6 +2164,12 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "winapi"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"
@ -2143,6 +2180,12 @@ dependencies = [
"winapi-x86_64-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu",
] ]
[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
[[package]] [[package]]
name = "winapi-i686-pc-windows-gnu" name = "winapi-i686-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"
@ -2204,5 +2247,5 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
dependencies = [ dependencies = [
"winapi", "winapi 0.3.9",
] ]

View File

@ -11,6 +11,7 @@ chrono = "0.4"
inotify = "0.10" inotify = "0.10"
prettydiff = "0.6" prettydiff = "0.6"
rss = "2" rss = "2"
text-diff = "0.4"
scraper = "0.13" scraper = "0.13"
tempfile = "3" tempfile = "3"
thirtyfour_sync = "0.27" thirtyfour_sync = "0.27"

View File

@ -14,6 +14,8 @@ use conf::Conf;
mod job; mod job;
use job::Job; use job::Job;
use text_diff;
struct ThreadJob { struct ThreadJob {
job: Job, job: Job,
handle: Option< handle: Option<
@ -152,18 +154,23 @@ fn main() {
println!("Job for '{}' took about {}s", tj.job.url, duration.as_secs()); println!("Job for '{}' took about {}s", tj.job.url, duration.as_secs());
if tj.last_result.is_none() { if tj.last_result.is_none() {
println!("New result for job: '{}'\n", tj.job.url); println!("New result for job: '{}'\n", tj.job.url);
let diff = prettydiff::diff_lines( // Use scraper + fragments to attempt to "normalize"
"", result.as_str()); let fragment = scraper::Html::parse_fragment(result.as_str());
tj.job.update(result.as_str(), diff.to_string().as_str()); tj.last_result = Some(fragment.root_element().inner_html());
tj.last_result = Some(result); tj.job.update(tj.last_result.as_ref().unwrap().as_str(), "");
} }
else { else {
if tj.last_result.as_ref().unwrap().ne(&result) { let fragment = scraper::Html::parse_fragment(result.as_str());
let normalized = fragment.root_element().inner_html();
let (dist, _changeset) = text_diff::diff(
tj.last_result.as_ref().unwrap(), &normalized.as_str(), ""
);
if dist != 0 {
println!("Change detected for job '{}'", tj.job.url); println!("Change detected for job '{}'", tj.job.url);
let diff = prettydiff::diff_lines( let diff = prettydiff::diff_lines(
tj.last_result.as_ref().unwrap(), result.as_str()); tj.last_result.as_ref().unwrap(), result.as_str());
tj.job.update(result.as_str(), diff.to_string().as_str()); tj.job.update(result.as_str(), diff.to_string().as_str());
tj.last_result = Some(result); tj.last_result = Some(normalized);
} }
} }
tj.handle = None; tj.handle = None;