feat: WIP revive scraper with new endpoint

This commit is contained in:
Max Hohlfeld 2025-08-05 11:13:53 +02:00
parent 623b0c814e
commit 599942e732
4 changed files with 940 additions and 366 deletions

1104
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -6,8 +6,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
reqwest = { version = "0.11.24", features = ["blocking"] }
reqwest = { version = "0.11.24", features = ["blocking", "cookies", "json"] }
rss = "2"
time = { version = "0.3", features = ["local-offset", "formatting", "macros"]}
clokwerk = "0.4"
pico-args = "0.5"
serde = { version = "1.0.208", features = ["derive", "serde_derive"] }
serde_json = "1.0.125"
chrono = { version = "0.4.38", features = ["serde"] }

View File

@ -24,7 +24,7 @@ fn parse_args() -> Result<Args, pico_args::Error> {
let args = Args {
url: match url {
Some(val) => val,
None => String::from("https://www.lfs.sachsen.de/restplatzboerse-5152.html"),
None => String::from("https://sachsen.leveso.de/Spa/#/offered-remains-list-appointments"),
},
rss_file: pargs.value_from_str("--rss-file")?,
};

View File

@ -1,4 +1,6 @@
use crate::html_parser::{parse_inner_node, replace_html_codes};
use chrono::{Local, Months, NaiveDate, NaiveDateTime};
use reqwest::header::{ACCEPT, CONTENT_TYPE};
use serde::{Deserialize, Serialize};
#[derive(Clone, PartialEq, Eq)]
pub struct RemainingPlace {
@ -20,74 +22,145 @@ impl PartialOrd for RemainingPlace {
}
}
#[derive(Deserialize)]
struct RemainsListResponse {
data: Vec<RemainsEntry>,
}
#[derive(Deserialize)]
struct RemainsEntry {
id: i32,
courseName: String,
coursePublicName: String,
courseShortName: String,
number: String,
locationName: String,
locationShortName: String,
startDateIso: NaiveDate,
endDateIso: NaiveDate,
// naivedatetime mit custom format -> YYYY-MM-DDTHH:MM:SS+mm:mm
start: String,
end: String,
startTimeInfo: String,
endTimeInfo: String,
numOfAvailablePlaces: u32,
courseYear_id: u32,
publicVisibility: u32,
publicCommentForSendingOrganisation: Option<String>,
status: u32,
courseYear_status: u32,
numOfParticipants: u32,
numOfPlaces: u32,
language: String,
faculty_id: u32,
facultyName: Option<String>,
}
#[derive(Serialize)]
struct RemainsListRequest {
courseYear_id: u32,
useDateInterval: bool,
selectedIntervalStart: NaiveDateTime,
selectedIntervalEnd: NaiveDateTime,
refreshParams: RefreshParams,
ifField: String,
}
#[derive(Serialize, Deserialize)]
struct RefreshParams {
paginationInfo: PaginationInfo,
sortItems: Vec<String>,
filterItems: Vec<String>,
aggregateParam: AggregateParam,
}
#[derive(Serialize, Deserialize)]
struct PaginationInfo {
currentPageIndex: u32,
currentPageNumOfItems: u32,
maxPageIndex: u32,
totalNumOfItems: u32,
pageSize: u32,
}
#[derive(Serialize, Deserialize)]
struct AggregateParam {
items: Vec<String>,
selectedIds: Vec<String>,
aggregatesOnly: bool,
}
pub fn get_current_places(url: &str) -> Result<Vec<RemainingPlace>, reqwest::Error> {
let body = reqwest::blocking::get(url)?.text()?;
let client = reqwest::blocking::Client::new();
let req_body = RemainsListRequest {
courseYear_id: 13,
useDateInterval: true,
selectedIntervalStart: Local::now().naive_utc(),
selectedIntervalEnd: Local::now()
.naive_utc()
.checked_add_months(Months::new(12))
.unwrap(),
refreshParams: RefreshParams {
paginationInfo: PaginationInfo {
currentPageIndex: 0,
currentPageNumOfItems: 50,
maxPageIndex: 0,
totalNumOfItems: 9,
pageSize: 50,
},
sortItems: Vec::default(),
filterItems: Vec::default(),
aggregateParam: AggregateParam {
items: Vec::default(),
selectedIds: Vec::default(),
aggregatesOnly: false,
},
},
ifField: "id".to_string(),
};
let start = body.find("<tbody").unwrap();
let end = body.find("</tbody>").unwrap();
let req = client
.post("https://sachsen.leveso.de/Api/Public/GetRemainsListCourseAppointmentList")
.body(serde_json::to_string(&req_body).unwrap())
.header(ACCEPT, "application/json")
.header(CONTENT_TYPE, "application/json")
.build()?;
let table = &body[start..=(end + 7)];
let body = client.execute(req)?;
let json_body: RemainsListResponse = body.json().unwrap();
let mut places: Vec<RemainingPlace> = Vec::new();
let mut lines: Vec<String> = Vec::new();
let mut line = table.replace("\n", "").replace("\r", "");
while let Some(begin) = line.find("<td") {
match line.find("</td>") {
Some(end) => {
let inner_node = &line[(begin + 5)..end];
let content = parse_inner_node(inner_node);
let escaped_content = replace_html_codes(&content);
lines.push(escaped_content);
line.replace_range(begin..=end + 5, "");
}
None => break,
}
}
lines.chunks(5).for_each(|chunk| {
let free = try_parse_free_slot(&chunk[3]);
let (id, description) = if let Some((id, description)) = chunk[0].split_once(' ') {
(id, description)
} else {
return;
};
if free.is_none() {
return;
}
let new_remaining_place = RemainingPlace {
id: id.to_string(),
description: description.to_string(),
date: chunk[2].clone(),
free: free.unwrap()
};
places.push(new_remaining_place);
});
places.sort();
let places = json_body
.data
.iter()
.map(|r| RemainingPlace {
id: create_id(&r),
description: create_description(&r),
date: r.startDateIso.format("%d.%m.%Y").to_string(),
free: r.numOfAvailablePlaces as usize,
})
.collect();
Ok(places)
}
fn try_parse_free_slot(content: &str) -> Option<usize> {
if let Ok(slots) = content.parse::<usize>() {
return Some(slots)
fn create_id(entry: &RemainsEntry) -> String {
if let Some((only_id, _)) = entry.number.split_once(' ') {
return only_id.to_string();
}
if let Some((left, right)) = content.split_once(' ') {
if let Ok(slots) = left.parse::<usize>() {
return Some(slots)
}
if let Ok(slots) = right.parse::<usize>() {
return Some(slots)
}
}
None
entry.number.to_string()
}
fn create_description(entry: &RemainsEntry) -> String {
let title_without_number = if let Some((_, v)) = entry.courseName.split_once(' ') {
v
} else {
&entry.courseName
};
if entry.publicVisibility == 0 {
return format!("{} (PLATZ NICHT ÖFFENTLICH!)", title_without_number);
}
title_without_number.to_string()
}