Compare commits
No commits in common. "6fd8bf43040da77cd2325eba095ed4b556e95d10" and "24baa3a6a66fec67bf71fd18b8035b2a491b126b" have entirely different histories.
6fd8bf4304
...
24baa3a6a6
|
@ -3,11 +3,7 @@ version: '3'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
driver:
|
driver:
|
||||||
image: docker.io/selenium/standalone-firefox
|
image: docker.io/selenium/standalone-chrome
|
||||||
environment:
|
|
||||||
- "SE_START_XVFB=false"
|
|
||||||
- "SE_NODE_OVERRIDE_MAX_SESSIONS=true"
|
|
||||||
- "SE_NODE_MAX_SESSIONS=${HAUNTER_MAX_SESSIONS:-5}"
|
|
||||||
ports:
|
ports:
|
||||||
- '4444:4444'
|
- '4444:4444'
|
||||||
# watcher:
|
# watcher:
|
||||||
|
|
70
src/main.rs
70
src/main.rs
|
@ -1,10 +1,8 @@
|
||||||
use std::cmp::Ordering;
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use thirtyfour_sync::WebDriverCommands;
|
use thirtyfour_sync::WebDriverCommands;
|
||||||
|
|
||||||
mod conf;
|
mod conf;
|
||||||
|
@ -27,23 +25,6 @@ struct ThreadJob<'a> {
|
||||||
last_result: Option<String>,
|
last_result: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ThreadJob<'_> {
|
|
||||||
fn lru_not_running(&self, b: &ThreadJob) -> Option<Ordering> {
|
|
||||||
// the "greater" value is one that is running, but we
|
|
||||||
// don't recheck the thread state every time. If there's
|
|
||||||
// a handle that isn't none, assume it's running.
|
|
||||||
if self.handle.is_some() != b.handle.is_some() {
|
|
||||||
if self.handle.is_some() {
|
|
||||||
return Some(Ordering::Greater);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return Some(Ordering::Less);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return self.job.last_run.partial_cmp(&b.job.last_run);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut conf = Conf {
|
let mut conf = Conf {
|
||||||
job_dir: String::from_str("jobs.d").unwrap(),
|
job_dir: String::from_str("jobs.d").unwrap(),
|
||||||
|
@ -88,11 +69,24 @@ fn main() {
|
||||||
last_result: None,
|
last_result: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
let max_running_tasks = 5;
|
// @BUG: It seems the selenium chrome driver can't handle concurrent sessions from
|
||||||
|
// multiple threads. When the threads attempt to run concurrently, there are crashes,
|
||||||
|
// eg.
|
||||||
|
//
|
||||||
|
// thread '<unnamed>' panicked at 'failed to get url: UnknownError(WebDriverErrorInfo { status: 500, error: "", value: WebDriverErrorValue { message: "unknown error: session deleted because of page crash\nfrom tab crashed\n (Session info: chrome=105.0.5195.52)
|
||||||
|
//
|
||||||
|
// This should just run single jobs consecutively as a result.
|
||||||
loop {
|
loop {
|
||||||
let mut running_tasks = 0;
|
for tj in jobs.iter_mut() {
|
||||||
for tj in jobs.iter_mut().filter(|job| job.handle.is_some()) {
|
let should_run_by_time = tj.job.last_run.is_some() && Instant::now().duration_since(tj.job.last_run.unwrap()).ge(&tj.job.every);
|
||||||
// Check if the task is done
|
if tj.handle.is_none() && (should_run_by_time || tj.job.last_run.is_none()) {
|
||||||
|
tj.handle = Some(thread::spawn(|| {
|
||||||
|
return get_source(driver, tj.job.url);
|
||||||
|
}));
|
||||||
|
println!("Started thread for '{}'", tj.job.url);
|
||||||
|
tj.job.last_run = Some(Instant::now());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if tj.handle.is_some() && tj.handle.as_ref().unwrap().is_finished() {
|
if tj.handle.is_some() && tj.handle.as_ref().unwrap().is_finished() {
|
||||||
let duration = Instant::now().duration_since(tj.job.last_run.unwrap());
|
let duration = Instant::now().duration_since(tj.job.last_run.unwrap());
|
||||||
tj.job.last_run = Some(Instant::now());
|
tj.job.last_run = Some(Instant::now());
|
||||||
|
@ -122,42 +116,18 @@ fn main() {
|
||||||
}
|
}
|
||||||
tj.handle = None;
|
tj.handle = None;
|
||||||
}
|
}
|
||||||
else if tj.handle.is_some() {
|
|
||||||
running_tasks += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while running_tasks < max_running_tasks {
|
|
||||||
// Sort by least recently run
|
|
||||||
// According to the docs, unstable_by is preferred for speed +
|
|
||||||
// reduced memory allocations, but doesn't guarantee order of
|
|
||||||
// equal elements.
|
|
||||||
jobs.sort_unstable_by(|a, b| a.lru_not_running(b).unwrap());
|
|
||||||
for tj in jobs.iter_mut() {
|
|
||||||
let should_run_by_time = tj.job.last_run.is_some() && Instant::now().duration_since(tj.job.last_run.unwrap()).ge(&tj.job.every);
|
|
||||||
if tj.handle.is_none() && (should_run_by_time || tj.job.last_run.is_none()) {
|
|
||||||
tj.handle = Some(thread::spawn(|| {
|
|
||||||
return get_source(driver, tj.job.url);
|
|
||||||
}));
|
|
||||||
println!("Started thread for '{}'", tj.job.url);
|
|
||||||
tj.job.last_run = Some(Instant::now());
|
|
||||||
running_tasks += 1;
|
|
||||||
if running_tasks >= max_running_tasks {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
std::thread::sleep(Duration::new(1, 0));
|
std::thread::sleep(Duration::new(1, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_source(driver: &str, url: &str) -> Result<String, &'static str> {
|
fn get_source(driver: &str, url: &str) -> Result<String, &'static str> {
|
||||||
// The firefox driver seems to crash less often than the chrome driver.
|
let caps = thirtyfour_sync::DesiredCapabilities::chrome();
|
||||||
let caps = thirtyfour_sync::DesiredCapabilities::firefox();
|
|
||||||
let driver = thirtyfour_sync::WebDriver::new(driver, &caps).expect("failed to get driver");
|
let driver = thirtyfour_sync::WebDriver::new(driver, &caps).expect("failed to get driver");
|
||||||
driver.get(url).expect("failed to get url");
|
driver.get(url).expect("failed to get url");
|
||||||
let source = driver.page_source().expect("failed to get page source");
|
let source = driver.page_source().expect("failed to get page source");
|
||||||
driver.quit().expect("failed to close session");
|
driver.quit().expect("failed to close session");
|
||||||
return Ok(source);
|
return Ok(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue