From 6fd8bf43040da77cd2325eba095ed4b556e95d10 Mon Sep 17 00:00:00 2001 From: Kienan Stewart Date: Sat, 24 Sep 2022 10:13:51 -0400 Subject: [PATCH] Switch from chrome driver to firefox driver and increase concurrency --- container-compose.yml | 6 +++++- src/main.rs | 12 +++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/container-compose.yml b/container-compose.yml index ad35cc7..d0199e0 100644 --- a/container-compose.yml +++ b/container-compose.yml @@ -3,7 +3,11 @@ version: '3' services: driver: - image: docker.io/selenium/standalone-chrome + image: docker.io/selenium/standalone-firefox + environment: + - "SE_START_XVFB=false" + - "SE_NODE_OVERRIDE_MAX_SESSIONS=true" + - "SE_NODE_MAX_SESSIONS=${HAUNTER_MAX_SESSIONS:-5}" ports: - '4444:4444' # watcher: diff --git a/src/main.rs b/src/main.rs index e6c8b2b..31bbcfa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -88,14 +88,7 @@ fn main() { last_result: None, }); - // @BUG: It seems the selenium chrome driver can't handle concurrent sessions from - // multiple threads. When the threads attempt to run concurrently, there are crashes, - // eg. - // - // thread '' panicked at 'failed to get url: UnknownError(WebDriverErrorInfo { status: 500, error: "", value: WebDriverErrorValue { message: "unknown error: session deleted because of page crash\nfrom tab crashed\n (Session info: chrome=105.0.5195.52) - // - // This runs single jobs consecutively as a result. - let max_running_tasks = 1; + let max_running_tasks = 5; loop { let mut running_tasks = 0; for tj in jobs.iter_mut().filter(|job| job.handle.is_some()) { @@ -160,7 +153,8 @@ fn main() { } fn get_source(driver: &str, url: &str) -> Result { - let caps = thirtyfour_sync::DesiredCapabilities::chrome(); + // The firefox driver seems to crash less often than the chrome driver. + let caps = thirtyfour_sync::DesiredCapabilities::firefox(); let driver = thirtyfour_sync::WebDriver::new(driver, &caps).expect("failed to get driver"); driver.get(url).expect("failed to get url"); let source = driver.page_source().expect("failed to get page source");