lfs_scraper/src/main.rs
2023-01-03 20:39:03 +01:00

157 lines
4.2 KiB
Rust

use clokwerk::{Scheduler, TimeUnits};
use rss::{ChannelBuilder, ItemBuilder};
use std::fs::File;
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Duration;
use time::OffsetDateTime;
#[derive(Clone, PartialEq, Eq)]
struct RemainingPlace {
id: String,
description: String,
date: String,
free: usize,
}
struct Args {
url: String,
rss_file: String,
}
fn parse_args() -> Result<Args, pico_args::Error> {
let mut pargs = pico_args::Arguments::from_env();
let args = Args {
url: pargs.value_from_str("--url")?,
rss_file: pargs.value_from_str("--rss-file")?,
};
Ok(args)
}
fn main() {
let args = match parse_args() {
Ok(v) => v,
Err(err) => {
eprintln!("Error: {err}");
std::process::exit(1);
}
};
let last_places: Arc<Mutex<Vec<RemainingPlace>>> = Arc::new(Mutex::new(Vec::new()));
let mut scheduler = Scheduler::new();
scheduler
.every(30.minutes())
.run(move || match get_current_places(&args.url) {
Ok(places) => {
if should_feed_be_updated(&places, &last_places) {
update_rss_file(places, &args.url, &args.rss_file);
println!("Updated feed.");
} else {
println!("No update.");
}
}
Err(error) => {
println!("Error: {}", error);
}
});
loop {
scheduler.run_pending();
thread::sleep(Duration::from_millis(10));
}
}
fn get_current_places(url: &str) -> Result<Vec<RemainingPlace>, reqwest::Error> {
let body = reqwest::blocking::get(url)?.text()?;
let start = body.find("<tbody").unwrap();
let end = body.find("</tbody>").unwrap();
let table = &body[start..=(end + 7)];
let mut places: Vec<RemainingPlace> = Vec::new();
let mut iter = table.lines();
while let Some(line) = iter.next() {
if line.contains("<tr>") {
let id = parse_node(iter.next().unwrap());
let description = parse_node(iter.next().unwrap());
let date = parse_node(iter.next().unwrap());
let free = parse_node(iter.next().unwrap()).parse().unwrap();
let place = RemainingPlace {
id,
description,
date,
free,
};
places.push(place);
}
}
Ok(places)
}
fn update_rss_file(places: Vec<RemainingPlace>, url: &str, rss_file: &str) {
let mut channel = ChannelBuilder::default()
.title(String::from("LFS Restplatzbörse"))
.link(url.to_string())
.description(String::from(
"Ein RSS Feed der Restplatzbörse der Landesfeuerwehrschule Sachsen. Nicht offiziell.",
))
.language(Some("de-DE".to_string()))
.build();
let title = format!("Restplatzbörse Update - {}", OffsetDateTime::now_local().unwrap().format(time::macros::format_description!("[year]-[month]-[day] [hour]:[minute]")).unwrap());
let content = places
.iter()
.map(|place| {
format!(
"{} - {} - {} - {} Plätze</br>",
place.id, place.description, place.date, place.free
)
})
.collect::<Vec<String>>()
.join("\n");
let item = ItemBuilder::default()
.title(Some(title))
.content(Some(content))
.build();
channel.set_items(vec![item]);
let output = File::create(rss_file).unwrap();
channel.pretty_write_to(output, ' ' as u8, 2).unwrap();
}
fn should_feed_be_updated(
new_places: &Vec<RemainingPlace>,
last_places: &Mutex<Vec<RemainingPlace>>,
) -> bool {
let mut last_places = last_places.lock().unwrap();
let are_the_same_places = new_places.len() == last_places.len()
&& new_places
.iter()
.zip(last_places.iter())
.all(|(one, two)| one == two);
if !are_the_same_places {
last_places.clear();
last_places.append(&mut new_places.clone());
}
!are_the_same_places
}
fn parse_node(input: &str) -> String {
let start = input.find(">").unwrap();
let end = input.find("</").unwrap();
input[(start + 1)..end].to_string()
}