Compare commits

...

8 Commits

Author SHA1 Message Date
Kienan Stewart 4b36b75d9f Add first pass at using inotify to watch the job directory for changes
There are number of cases which currently aren't handled:

 * moving a file out of or into the directory
 * a file being touched: should that be used to reset that last_run
   value?
 * when jobs are removed, it is regardless of thread state so there
   may be children that never get joined
2022-09-25 18:36:40 -04:00
Kienan Stewart 518f0b284f Add method to check job extension on path 2022-09-25 18:12:20 -04:00
Kienan Stewart da4e79463f Load jobs from job files in job directory 2022-09-25 17:50:01 -04:00
Kienan Stewart 82bd7e2d5c Create channel when creating job from file 2022-09-25 17:49:26 -04:00
Kienan Stewart d2866c3f37 Set source_file on jobs when created from a job file 2022-09-25 17:26:27 -04:00
Kienan Stewart fef475c8ad Set default output_file for jobs when loaded from file 2022-09-25 17:16:50 -04:00
Kienan Stewart 38b5d5bb32 Split Job struct into it's own module 2022-09-25 17:11:48 -04:00
Kienan Stewart 514209dae2 Refactor reading configuration file into a separate function
This allows for reusing it elsewhere (eg. for job configuration files)
2022-09-25 16:39:25 -04:00
5 changed files with 417 additions and 146 deletions

23
Cargo.lock generated
View File

@ -620,6 +620,7 @@ version = "0.1.0"
dependencies = [
"ansi-to-html",
"chrono",
"inotify",
"prettydiff",
"rss",
"scraper",
@ -757,6 +758,28 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "inotify"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf888f9575c290197b2c948dc9e9ff10bd1a39ad1ea8585f734585fa6b9d3f9"
dependencies = [
"bitflags",
"futures-core",
"inotify-sys",
"libc",
"tokio",
]
[[package]]
name = "inotify-sys"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb"
dependencies = [
"libc",
]
[[package]]
name = "instant"
version = "0.1.12"

View File

@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
ansi-to-html = "0.1"
chrono = "0.4"
inotify = "0.10"
prettydiff = "0.6"
rss = "2"
scraper = "0.13"

View File

@ -22,36 +22,13 @@ impl Conf {
}
pub fn update_from_file(&mut self, path: &Path) {
let mut file = match std::fs::File::open(path) {
Err(why) => {
println!("Could not open file '{}': {}", path.display(), why); return;
},
Ok(file) => file,
let items = match read_conf_file(path) {
Err(_) => return,
Ok(items) => items,
};
let mut content = String::new();
match file.read_to_string(&mut content) {
Err(why) => println!("Could not read from file '{}': {}", path.display(), why),
Ok(_) => (),
}
let lines = content.lines();
for line in lines {
if line.starts_with('#') {
continue;
}
let result = line.split_once('=');
if result.is_none() {
println!("Skipping configuration line '{}', no key-value delimiter (=) found", line);
continue;
}
let key = result.unwrap().0.trim();
let value = result.unwrap().1.trim();
if key.eq("") || value.eq("") {
println!("Skipping configuration line '{}', no key or value side is empty", line);
continue;
}
for item in items.iter() {
let key = item.0.as_str();
let value = item.1.as_str();
match key {
"job_dir" => {
println!("{} changed from '{}' to '{}' by line in '{}'",
@ -83,6 +60,46 @@ impl Conf {
}
}
pub fn read_conf_file(path: &Path) -> Result<std::vec::Vec<(String, String)>, &str> {
let mut file = match std::fs::File::open(path) {
Err(why) => {
println!("Could not open file '{}': {}", path.display(), why);
return Err("Could not open file");
},
Ok(file) => file,
};
let mut content = String::new();
match file.read_to_string(&mut content) {
Err(why) => {
println!("Could not read from file '{}': {}", path.display(), why);
return Err("Could not read file");
},
Ok(_) => (),
}
let mut results = std::vec::Vec::<(String, String)>::new();
let lines = content.lines();
for line in lines {
if line.starts_with('#') {
continue;
}
let result = line.split_once('=');
if result.is_none() {
println!("Skipping configuration line '{}', no key-value delimiter (=) found", line);
continue;
}
let key = result.unwrap().0.trim();
let value = result.unwrap().1.trim();
if key.eq("") || value.eq("") {
println!("Skipping configuration line '{}', no key or value side is empty", line);
continue;
}
results.push((key.to_string(), value.to_string()));
}
return Ok(results);
}
#[cfg(test)]
mod tests {

240
src/job.rs Normal file
View File

@ -0,0 +1,240 @@
use std::path::{Path,PathBuf};
use std::str::FromStr;
use std::time::Duration;
use std::time::Instant;
use ansi_to_html;
use chrono;
use rss;
use crate::conf::Conf;
pub struct Job {
pub url: String,
pub selector: String,
pub every: Duration,
pub last_run: Option<Instant>,
pub output_file: Option<PathBuf>,
pub channel: Option<rss::Channel>,
pub source_file: Option<PathBuf>,
}
impl Job {
fn default(conf: &Conf) -> Job {
return Job {
url: "".to_string(),
selector: "".to_string(),
every: conf.check_interval,
last_run: None,
output_file: None,
channel: None,
source_file: None,
};
}
fn set_default_output_file(&mut self, conf: &Conf) {
let mut output_file = conf.output_dir.clone();
let mut file_name = self.url.clone().replace("/", "-");
file_name.push_str(".rss");
output_file = output_file.join(Path::new(&file_name));
self.output_file = Some(output_file);
}
fn set_default_channel(&mut self) {
match std::fs::File::open(self.output_file.as_ref().unwrap()) {
Err(why) => {
println!("Failed to open '{}': {}", self.output_file.as_ref().unwrap().display(), why);
println!("Creating empty RSS channel for job '{}'", self.url);
self.channel = Some(
rss::ChannelBuilder::default()
.title(self.url.as_str())
.link(self.url.as_str())
.description("haunting")
.build()
);
self.channel.as_mut().unwrap().set_generator("Haunter".to_string());
},
Ok(file) => {
self.channel = Some(
rss::Channel::read_from(std::io::BufReader::new(file)).unwrap()
);
},
};
}
pub fn has_job_file_extension(path: &Path) -> bool {
match path.extension() {
Some(x) => {
if ! "job".eq(x) {
return false;
}
},
None => {
return false;
}
};
return true;
}
pub fn new(url: &str, selector: &str, conf: &Conf) -> Job {
let mut job = Job::default(conf);
job.url = url.to_string();
job.set_default_output_file(conf);
job.selector = selector.to_string();
job.set_default_channel();
return job;
}
pub fn from_file<'a>(path: &'a Path, conf: &'a Conf) -> Result<Job, &'a str> {
let mut job = Job::default(conf);
let items = match crate::conf::read_conf_file(path) {
Err(_) => return Err("Failed to read from configuration file"),
Ok(items) => items,
};
for item in items.iter() {
let key = item.0.as_str();
match key {
"url" => {
job.url = item.1.clone();
},
"selector" => {
job.selector = item.1.clone();
},
"every" => {
let converted_value = match item.1.parse::<u64>() {
Err(why) => {
println!("Failed to convert '{}' to u64: {}", item.1, why);
return Err("Failed to parse value of 'every'");
},
Ok(v) => v,
};
job.every = Duration::new(converted_value, 0);
},
"output_file" => {
job.output_file = Some(
conf.output_dir.join(PathBuf::from_str(item.1.as_str()).unwrap())
);
}
_ => {
println!("Unknown key '{}' in job file '{}'", key, path.display());
return Err("Unknown key");
}
}
}
if job.output_file.is_none() {
job.set_default_output_file(conf);
}
job.set_default_channel();
job.source_file = Some(PathBuf::from(path));
return Ok(job);
}
pub fn update(&mut self, value: &str, diff: &str) {
if self.channel.is_none() {
println!("Skipping update of channel: no channel set");
return;
}
let channel = self.channel.as_mut().unwrap();
let update_time = chrono::Utc::now();
let item = rss::ItemBuilder::default()
.title(format!("Update to '{}'", self.url))
.link(self.url.clone())
.pub_date(update_time.to_rfc2822())
.content(format!(r#"
New content at {}: <br>
<pre class="new-value">
{}
</pre>
<br><br>
Diff: <br>
<pre class="diff-value">
{}
</pre>
"#,
update_time.format("%d/%m/%Y %H:%M"),
ansi_to_html::convert_escaped(value).unwrap().as_str(),
ansi_to_html::convert_escaped(diff).unwrap().as_str()
)
)
.build();
channel.items.push(item);
if self.output_file.is_some() {
match std::fs::File::create(self.output_file.as_ref().unwrap()) {
Err(why) => {
println!("Failed to open '{}' for writing: {}", self.output_file.as_ref().unwrap().display(), why);
},
Ok(file) => {
channel.write_to(
std::io::BufWriter::new(file)
).expect("Failed to write updated channel");
},
};
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn create() {
let conf = Conf::get_default_conf();
let job = Job::new(&"my/url", &"myselector", &conf);
assert_eq!(job.url, "my/url");
assert_eq!(job.output_file.unwrap().to_str().unwrap(), "results.d/my-url.rss");
assert_eq!(job.selector, "myselector");
assert!(job.source_file.is_none());
assert!(job.channel.is_some());
}
#[test]
fn create_from_file() {
let conf = Conf::get_default_conf();
let mut tf = NamedTempFile::new().unwrap();
let job_conf = r#"
url = http://example.com/test
output_file = example_output.atom
every=7200
selector = section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)
"#;
tf.write_all(job_conf.as_bytes()).expect("Failed to write configuration to file");
let job = Job::from_file(tf.path(), &conf).expect("Failed to read configuration file");
assert_eq!(job.url, "http://example.com/test");
assert_eq!(job.output_file.unwrap().to_str().unwrap(), "results.d/example_output.atom");
assert_eq!(job.every.as_secs(), 7200);
assert_eq!(job.selector, "section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)");
assert_eq!(job.source_file.unwrap().to_str(), tf.path().to_str());
assert!(job.channel.is_some());
}
#[test]
fn create_from_file_default_output_file() {
let conf = Conf::get_default_conf();
let mut tf = NamedTempFile::new().unwrap();
let job_conf = r#"
url = http://example.com/test
every=7200
selector = section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)
"#;
tf.write_all(job_conf.as_bytes()).expect("Failed to write configuration to file");
let job = Job::from_file(tf.path(), &conf).expect("Failed to read configuration file");
assert_eq!(job.url, "http://example.com/test");
assert_eq!(job.output_file.unwrap().to_str().unwrap(), "results.d/http:--example.com-test.rss");
assert_eq!(job.every.as_secs(), 7200);
assert_eq!(job.selector, "section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)");
assert_eq!(job.source_file.unwrap().to_str(), tf.path().to_str());
assert!(job.channel.is_some());
}
}

View File

@ -1,111 +1,18 @@
use std::cmp::Ordering;
use std::env;
use std::path::{Path,PathBuf};
use std::str::FromStr;
use std::thread;
use std::time::Duration;
use std::time::Instant;
use ansi_to_html;
use chrono;
use rss;
use inotify;
use thirtyfour_sync::WebDriverCommands;
mod conf;
use conf::Conf;
struct Job {
url: String,
selector: String,
every: Duration,
last_run: Option<Instant>,
output_file: Option<PathBuf>,
channel: Option<rss::Channel>,
}
impl Job {
fn new(url: &str, selector: &str, conf: &Conf) -> Job {
let mut job = Job {
url: String::from_str(url).unwrap(),
selector: String::from_str(selector).unwrap(),
every: conf.check_interval,
last_run: None,
output_file: None,
channel: None,
};
let mut output_file = conf.output_dir.clone();
let mut file_name = job.url.clone().replace("/", "-");
file_name.push_str(".rss");
output_file = output_file.join(Path::new(&file_name));
job.output_file = Some(output_file);
match std::fs::File::open(job.output_file.as_ref().unwrap()) {
Err(why) => {
println!("Failed to open '{}': {}", job.output_file.as_ref().unwrap().display(), why);
println!("Creating empty RSS channel for job '{}'", job.url);
job.channel = Some(
rss::ChannelBuilder::default()
.title(url)
.link(url)
.description("haunting")
.build()
);
job.channel.as_mut().unwrap().set_generator("Haunter".to_string());
},
Ok(file) => {
job.channel = Some(
rss::Channel::read_from(std::io::BufReader::new(file)).unwrap()
);
},
};
return job;
}
fn update(&mut self, value: &str, diff: &str) {
if self.channel.is_none() {
println!("Skipping update of channel: no channel set");
return;
}
let channel = self.channel.as_mut().unwrap();
let update_time = chrono::Utc::now();
let item = rss::ItemBuilder::default()
.title(format!("Update to '{}'", self.url))
.link(self.url.clone())
.pub_date(update_time.to_rfc2822())
.content(format!(r#"
New content at {}: <br>
<pre class="new-value">
{}
</pre>
<br><br>
Diff: <br>
<pre class="diff-value">
{}
</pre>
"#,
update_time.format("%d/%m/%Y %H:%M"),
ansi_to_html::convert_escaped(value).unwrap().as_str(),
ansi_to_html::convert_escaped(diff).unwrap().as_str()
)
)
.build();
channel.items.push(item);
if self.output_file.is_some() {
match std::fs::File::create(self.output_file.as_ref().unwrap()) {
Err(why) => {
println!("Failed to open '{}' for writing: {}", self.output_file.as_ref().unwrap().display(), why);
},
Ok(file) => {
channel.write_to(
std::io::BufWriter::new(file)
).expect("Failed to write updated channel");
},
};
}
}
}
mod job;
use job::Job;
struct ThreadJob {
job: Job,
@ -176,23 +83,50 @@ fn main() {
std::process::exit(1);
}
let mut jobs = Vec::new();
let some_job = Job::new("https://www.rust-lang.org", "a.download-link", &conf);
let other_job = Job::new(
"https://arstechnica.com/",
"section.listing:nth-child(2) > ul:nth-child(1) > li:nth-child(3) > header:nth-child(3) > h2:nth-child(1) > a:nth-child(1)",
&conf
);
jobs.push(ThreadJob {
job: some_job,
handle: None,
last_result: None,
});
jobs.push(ThreadJob {
job: other_job,
handle: None,
last_result: None,
});
let mut jobs: Vec<ThreadJob> = Vec::new();
// Load all jobs from job directory
let job_dir = conf.job_dir.clone();
for entry in std::fs::read_dir(job_dir).expect("Failed to iterate over job directory") {
let _entry = match entry {
Err(why) => {
println!("Skipping '{}': Error reading file in job directory", why);
continue;
},
Ok(value) => value,
};
let md = _entry.metadata().expect("Failed to read file metadata");
if !md.is_file() {
println!("Skipping '{}': not a file", _entry.path().display());
continue;
}
if !Job::has_job_file_extension(&_entry.path()) {
println!("Skipping '{}': does not have '.job' extension", _entry.path().display());
continue;
}
let job = match Job::from_file(&_entry.path(), &conf) {
Err(why) => {
println!("Failed to load job from '{}': {}", _entry.path().display(), why);
continue;
},
Ok(value) => value,
};
jobs.push(ThreadJob {
job: job,
handle: None,
last_result: None,
});
}
let mut inotify = inotify::Inotify::init()
.expect("Failed to initialize inotify");
inotify.add_watch(
conf.job_dir.to_str().unwrap().clone(),
inotify::WatchMask::CREATE | inotify::WatchMask::DELETE
).expect("Failed to start watching job dir for changes");
let mut notify_buffer = [0; 1024];
let max_running_tasks = 5;
loop {
@ -258,13 +192,69 @@ fn main() {
break;
}
}
else {
let time_since_last_run = Instant::now().duration_since(tj.job.last_run.unwrap());
println!("Job '{}' - {}s since last run", tj.job.url, time_since_last_run.as_secs());
}
}
break;
}
let events = inotify.read_events(&mut notify_buffer);
if events.is_ok() {
for event in events.unwrap() {
if event.name.is_none() {
continue;
}
let mut path = PathBuf::new();
path.push(&conf.job_dir.to_str().unwrap().clone());
path.push(event.name.unwrap());
match event.mask {
inotify::EventMask::CREATE => {
let md = match path.metadata() {
Err(why) => {
println!("Failed to read metadata from event file '{}': {}",
path.display(), why);
continue;
},
Ok(value) => value,
};
if !md.is_file() {
continue;
}
if !Job::has_job_file_extension(&path) {
continue;
}
let job = match Job::from_file(&path, &conf) {
Err(why) => {
println!("Failed to load job from '{}': {}", path.display(), why);
continue;
},
Ok(value) => value,
};
jobs.push(ThreadJob {
job: job,
handle: None,
last_result: None,
});
println!("Added job from '{}' being created", path.display());
},
inotify::EventMask::DELETE => {
let x = jobs.len();
// @TODO If a thread is currently running, we never join it again.
jobs.retain(
|tj|
!(tj.job.source_file.is_some() && path.to_str().eq(&tj.job.source_file.as_ref().unwrap().to_str()))
);
println!(
"{} jobs removed due to deletion of '{}'",
x - jobs.len(), path.display()
);
},
mask => {
println!("Unhandled event mask: {:?}", mask);
continue;
}
};
}
}
std::thread::sleep(Duration::new(1, 0));
}
}