From d4520dfbd02b5e59e3170f7aebe52dcac9a0aa58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Barfu=C3=9F?= Date: Fri, 22 Dec 2023 23:53:03 +0100 Subject: [PATCH] +Campus Scraper, +Cache per Site --- nodemon.json | 4 +--- src/cache.ts | 38 +++++++++++++++++++++++++++++++++----- src/index.ts | 16 +++++++++++----- src/stripper.ts | 48 ++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 87 insertions(+), 19 deletions(-) diff --git a/nodemon.json b/nodemon.json index ede7826..51c5fa7 100644 --- a/nodemon.json +++ b/nodemon.json @@ -2,7 +2,5 @@ "verbose": true, "ignore": [], "watch": ["src/**/*.ts"], - "execMap": { - "ts": "node --inspect=0.0.0.0:9229 --nolazy -r ts-node/register" - } + "exec": "ts-node ./src/index.ts" } diff --git a/src/cache.ts b/src/cache.ts index 3da1ef0..bef0d8e 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -1,14 +1,14 @@ -export default class Cache { +class SiteCache { cachedData: any; lastUsed: Date; lifeTime: number; - mensa: string; + key: string; constructor() { this.cachedData = null; this.lastUsed = new Date(); this.lifeTime = 1000 * 30; - this.mensa = ""; + this.key = ""; } get() { @@ -23,10 +23,38 @@ export default class Cache { return this.cachedData; } - set(mensa: string, data: any) { + set(key: string, data: any) { this.cachedData = data; this.lastUsed = new Date(); - this.mensa = mensa; + this.key = key; } } +export default class Cache { + cache: SiteCache[]; + + constructor() { + this.cache = []; + } + + get(key: string) { + for (let i = 0; i < this.cache.length; i++) { + if (this.cache[i].key === key) { + return this.cache[i].get(); + } + } + return null; + } + + set(key: string, data: any) { + for (let i = 0; i < this.cache.length; i++) { + if (this.cache[i].key === key) { + this.cache[i].set(key, data); + return; + } + } + let siteCache = new SiteCache(); + siteCache.set(key, data); + this.cache.push(siteCache); + } +} diff --git a/src/index.ts b/src/index.ts index 3e86875..9eebe08 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,11 +7,11 @@ import morgan from "morgan"; import fetch from './fetch.js'; import Cache from './cache.js'; -import stripHtml from "./stripper.js"; +import {stripMensa, stripCampus} from "./stripper.js"; dotenv.config(); -const app: Express = express().use(cors({ origin: '*' })); +const app: Express = express().use(cors({ origin: '*' })).use(bodyParser.json()); app.use(morgan('combined')) const port = process.env.PORT || 3000; @@ -24,7 +24,7 @@ app.get("/", (req: Request, res: Response) => { res.send("Mensa API"); }); -app.get("/api/:Ort", (req: Request, res: Response) => { +app.get("/api/:Ort/:Mensa?", (req: Request, res: Response) => { if (req.params.Ort === null) { return res.send("Invalid request"); } @@ -34,7 +34,13 @@ app.get("/api/:Ort", (req: Request, res: Response) => { }else { let url = baseUrl + req.params.Ort.toLowerCase(); fetch(url).then((data) => { - let stripedData = stripHtml(data); + let stripedData = null; + if (req.params.Mensa !== null) { + let stripedData = stripMensa(data); + }else { + let stripedData = stripCampus(data); + } + cache.set(req.params.Ort, stripedData); res.send(stripedData); }); @@ -43,7 +49,7 @@ app.get("/api/:Ort", (req: Request, res: Response) => { }); app.get("/api", (req: Request, res: Response) => { - res.send("/api/:Ort/:Mensa"); + res.send("/api/:Ort/:Mensa?"); }); app.listen(port, () => { diff --git a/src/stripper.ts b/src/stripper.ts index 638a900..a9149e6 100644 --- a/src/stripper.ts +++ b/src/stripper.ts @@ -1,14 +1,50 @@ import * as cheerio from 'cheerio'; -export default function stripHtml(html: string): string { +class Campus { + name: string; + mensen: Mensa[]; + + constructor(name: string) { + this.name = name; + this.mensen = []; + } + addMensa(mensa: Mensa) { + this.mensen.push(mensa); + } +} + +class Mensa { + name: string; + url: string; + + constructor(name: string, url: string) { + this.name = name; + this.url = url; + } +} + +export function stripCampus(html: string): JSON { const $ = cheerio.load(html); - const $mensen = $('.group').find('.element'); - console.log($mensen.text() + " length: " + $mensen.length); + let ort :Campus[] = []; + const $campus = $('.tagged').find('.group'); - const elements = $('.elements:eq(0)'); - console.log(elements.text()); - return $.text(); + $campus.each((i, elem) => { + let campus = new Campus($(elem).find('h2').text()); + //console.log($(elem).find('h2').text() + "\n"); + $(elem).find('a').each((i, elem) => { + const link = $(elem).attr('href')?.replace("/index.html",""); + campus.addMensa(new Mensa($(elem).text(), link ?? "")); + //console.log(" -> " + $(elem).text() + " | " + $(elem).attr('href')?.replace("/index.html","") + "\n"); + }); + ort.push(campus); + }); + return JSON.parse(JSON.stringify(ort)); +} + +export function stripMensa(html: string): string { + const $ = cheerio.load(html); + return "Mensa"; }