+Campus Scraper, +Cache per Site

This commit is contained in:
Jan Barfuß 2023-12-22 23:53:03 +01:00
parent 873237d3d2
commit d4520dfbd0
4 changed files with 87 additions and 19 deletions

View File

@ -2,7 +2,5 @@
"verbose": true,
"ignore": [],
"watch": ["src/**/*.ts"],
"execMap": {
"ts": "node --inspect=0.0.0.0:9229 --nolazy -r ts-node/register"
}
"exec": "ts-node ./src/index.ts"
}

View File

@ -1,14 +1,14 @@
export default class Cache {
class SiteCache {
cachedData: any;
lastUsed: Date;
lifeTime: number;
mensa: string;
key: string;
constructor() {
this.cachedData = null;
this.lastUsed = new Date();
this.lifeTime = 1000 * 30;
this.mensa = "";
this.key = "";
}
get() {
@ -23,10 +23,38 @@ export default class Cache {
return this.cachedData;
}
set(mensa: string, data: any) {
set(key: string, data: any) {
this.cachedData = data;
this.lastUsed = new Date();
this.mensa = mensa;
this.key = key;
}
}
export default class Cache {
cache: SiteCache[];
constructor() {
this.cache = [];
}
get(key: string) {
for (let i = 0; i < this.cache.length; i++) {
if (this.cache[i].key === key) {
return this.cache[i].get();
}
}
return null;
}
set(key: string, data: any) {
for (let i = 0; i < this.cache.length; i++) {
if (this.cache[i].key === key) {
this.cache[i].set(key, data);
return;
}
}
let siteCache = new SiteCache();
siteCache.set(key, data);
this.cache.push(siteCache);
}
}

View File

@ -7,11 +7,11 @@ import morgan from "morgan";
import fetch from './fetch.js';
import Cache from './cache.js';
import stripHtml from "./stripper.js";
import {stripMensa, stripCampus} from "./stripper.js";
dotenv.config();
const app: Express = express().use(cors({ origin: '*' }));
const app: Express = express().use(cors({ origin: '*' })).use(bodyParser.json());
app.use(morgan('combined'))
const port = process.env.PORT || 3000;
@ -24,7 +24,7 @@ app.get("/", (req: Request, res: Response) => {
res.send("Mensa API");
});
app.get("/api/:Ort", (req: Request, res: Response) => {
app.get("/api/:Ort/:Mensa?", (req: Request, res: Response) => {
if (req.params.Ort === null) {
return res.send("Invalid request");
}
@ -34,7 +34,13 @@ app.get("/api/:Ort", (req: Request, res: Response) => {
}else {
let url = baseUrl + req.params.Ort.toLowerCase();
fetch(url).then((data) => {
let stripedData = stripHtml(data);
let stripedData = null;
if (req.params.Mensa !== null) {
let stripedData = stripMensa(data);
}else {
let stripedData = stripCampus(data);
}
cache.set(req.params.Ort, stripedData);
res.send(stripedData);
});
@ -43,7 +49,7 @@ app.get("/api/:Ort", (req: Request, res: Response) => {
});
app.get("/api", (req: Request, res: Response) => {
res.send("/api/:Ort/:Mensa");
res.send("/api/:Ort/:Mensa?");
});
app.listen(port, () => {

View File

@ -1,14 +1,50 @@
import * as cheerio from 'cheerio';
export default function stripHtml(html: string): string {
class Campus {
name: string;
mensen: Mensa[];
constructor(name: string) {
this.name = name;
this.mensen = [];
}
addMensa(mensa: Mensa) {
this.mensen.push(mensa);
}
}
class Mensa {
name: string;
url: string;
constructor(name: string, url: string) {
this.name = name;
this.url = url;
}
}
export function stripCampus(html: string): JSON {
const $ = cheerio.load(html);
const $mensen = $('.group').find('.element');
console.log($mensen.text() + " length: " + $mensen.length);
let ort :Campus[] = [];
const $campus = $('.tagged').find('.group');
const elements = $('.elements:eq(0)');
console.log(elements.text());
return $.text();
$campus.each((i, elem) => {
let campus = new Campus($(elem).find('h2').text());
//console.log($(elem).find('h2').text() + "\n");
$(elem).find('a').each((i, elem) => {
const link = $(elem).attr('href')?.replace("/index.html","");
campus.addMensa(new Mensa($(elem).text(), link ?? ""));
//console.log(" -> " + $(elem).text() + " | " + $(elem).attr('href')?.replace("/index.html","") + "\n");
});
ort.push(campus);
});
return JSON.parse(JSON.stringify(ort));
}
export function stripMensa(html: string): string {
const $ = cheerio.load(html);
return "Mensa";
}