+Campus Scraper, +Cache per Site

This commit is contained in:
Jan Barfuß 2023-12-22 23:53:03 +01:00
parent 873237d3d2
commit d4520dfbd0
4 changed files with 87 additions and 19 deletions

View File

@ -2,7 +2,5 @@
"verbose": true, "verbose": true,
"ignore": [], "ignore": [],
"watch": ["src/**/*.ts"], "watch": ["src/**/*.ts"],
"execMap": { "exec": "ts-node ./src/index.ts"
"ts": "node --inspect=0.0.0.0:9229 --nolazy -r ts-node/register"
}
} }

View File

@ -1,14 +1,14 @@
export default class Cache { class SiteCache {
cachedData: any; cachedData: any;
lastUsed: Date; lastUsed: Date;
lifeTime: number; lifeTime: number;
mensa: string; key: string;
constructor() { constructor() {
this.cachedData = null; this.cachedData = null;
this.lastUsed = new Date(); this.lastUsed = new Date();
this.lifeTime = 1000 * 30; this.lifeTime = 1000 * 30;
this.mensa = ""; this.key = "";
} }
get() { get() {
@ -23,10 +23,38 @@ export default class Cache {
return this.cachedData; return this.cachedData;
} }
set(mensa: string, data: any) { set(key: string, data: any) {
this.cachedData = data; this.cachedData = data;
this.lastUsed = new Date(); this.lastUsed = new Date();
this.mensa = mensa; this.key = key;
} }
} }
export default class Cache {
cache: SiteCache[];
constructor() {
this.cache = [];
}
get(key: string) {
for (let i = 0; i < this.cache.length; i++) {
if (this.cache[i].key === key) {
return this.cache[i].get();
}
}
return null;
}
set(key: string, data: any) {
for (let i = 0; i < this.cache.length; i++) {
if (this.cache[i].key === key) {
this.cache[i].set(key, data);
return;
}
}
let siteCache = new SiteCache();
siteCache.set(key, data);
this.cache.push(siteCache);
}
}

View File

@ -7,11 +7,11 @@ import morgan from "morgan";
import fetch from './fetch.js'; import fetch from './fetch.js';
import Cache from './cache.js'; import Cache from './cache.js';
import stripHtml from "./stripper.js"; import {stripMensa, stripCampus} from "./stripper.js";
dotenv.config(); dotenv.config();
const app: Express = express().use(cors({ origin: '*' })); const app: Express = express().use(cors({ origin: '*' })).use(bodyParser.json());
app.use(morgan('combined')) app.use(morgan('combined'))
const port = process.env.PORT || 3000; const port = process.env.PORT || 3000;
@ -24,7 +24,7 @@ app.get("/", (req: Request, res: Response) => {
res.send("Mensa API"); res.send("Mensa API");
}); });
app.get("/api/:Ort", (req: Request, res: Response) => { app.get("/api/:Ort/:Mensa?", (req: Request, res: Response) => {
if (req.params.Ort === null) { if (req.params.Ort === null) {
return res.send("Invalid request"); return res.send("Invalid request");
} }
@ -34,7 +34,13 @@ app.get("/api/:Ort", (req: Request, res: Response) => {
}else { }else {
let url = baseUrl + req.params.Ort.toLowerCase(); let url = baseUrl + req.params.Ort.toLowerCase();
fetch(url).then((data) => { fetch(url).then((data) => {
let stripedData = stripHtml(data); let stripedData = null;
if (req.params.Mensa !== null) {
let stripedData = stripMensa(data);
}else {
let stripedData = stripCampus(data);
}
cache.set(req.params.Ort, stripedData); cache.set(req.params.Ort, stripedData);
res.send(stripedData); res.send(stripedData);
}); });
@ -43,7 +49,7 @@ app.get("/api/:Ort", (req: Request, res: Response) => {
}); });
app.get("/api", (req: Request, res: Response) => { app.get("/api", (req: Request, res: Response) => {
res.send("/api/:Ort/:Mensa"); res.send("/api/:Ort/:Mensa?");
}); });
app.listen(port, () => { app.listen(port, () => {

View File

@ -1,14 +1,50 @@
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
export default function stripHtml(html: string): string { class Campus {
name: string;
mensen: Mensa[];
constructor(name: string) {
this.name = name;
this.mensen = [];
}
addMensa(mensa: Mensa) {
this.mensen.push(mensa);
}
}
class Mensa {
name: string;
url: string;
constructor(name: string, url: string) {
this.name = name;
this.url = url;
}
}
export function stripCampus(html: string): JSON {
const $ = cheerio.load(html); const $ = cheerio.load(html);
const $mensen = $('.group').find('.element'); let ort :Campus[] = [];
console.log($mensen.text() + " length: " + $mensen.length); const $campus = $('.tagged').find('.group');
const elements = $('.elements:eq(0)'); $campus.each((i, elem) => {
console.log(elements.text()); let campus = new Campus($(elem).find('h2').text());
return $.text(); //console.log($(elem).find('h2').text() + "\n");
$(elem).find('a').each((i, elem) => {
const link = $(elem).attr('href')?.replace("/index.html","");
campus.addMensa(new Mensa($(elem).text(), link ?? ""));
//console.log(" -> " + $(elem).text() + " | " + $(elem).attr('href')?.replace("/index.html","") + "\n");
});
ort.push(campus);
});
return JSON.parse(JSON.stringify(ort));
}
export function stripMensa(html: string): string {
const $ = cheerio.load(html);
return "Mensa";
} }