Skip to content
Snippets Groups Projects
Commit 4eef9954 authored by Raven Z.'s avatar Raven Z. :cat2:
Browse files

parser should be working now

parent a726814a
Branches
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ const MOMENT = require('moment') ...@@ -5,6 +5,7 @@ const MOMENT = require('moment')
const MENU_URL = 'https://www.studierendenwerk-aachen.de/speiseplaene/ahornstrasse-w-en.html' const MENU_URL = 'https://www.studierendenwerk-aachen.de/speiseplaene/ahornstrasse-w-en.html'
const fetchTime = MOMENT()
AXIOS.get(MENU_URL) AXIOS.get(MENU_URL)
.then(processResponse) .then(processResponse)
.catch(err => console.error('ERR: ', err)); .catch(err => console.error('ERR: ', err));
...@@ -13,9 +14,21 @@ function processResponse(response) { ...@@ -13,9 +14,21 @@ function processResponse(response) {
const dom = new JSDOM(response.data) const dom = new JSDOM(response.data)
const doc = dom.window.document const doc = dom.window.document
const daysDom = doc.querySelectorAll('.accordion > div') const daysDom = doc.querySelectorAll('.accordion > div')
const additivesDom = doc.querySelector('#additives')
const daysParsed = [...daysDom].map(parseDay) const daysParsed = [...daysDom].map(parseDay)
const daysMap = {}
daysParsed.forEach(d => { daysMap[d.date] = d })
const additives = parseAdditives(additivesDom)
console.log(JSON.stringify(daysParsed))
const result = {
days: daysMap,
additives,
url: MENU_URL,
fetchedAt: fetchTime.format()
}
console.log(JSON.stringify(result))
} }
function parseDay(dayDom) { function parseDay(dayDom) {
...@@ -27,12 +40,14 @@ function parseDay(dayDom) { ...@@ -27,12 +40,14 @@ function parseDay(dayDom) {
const extrasDom = dayDom.querySelectorAll('table.extras tr') const extrasDom = dayDom.querySelectorAll('table.extras tr')
const menues = [...menuesDom].map(parseMenuEntry) const menues = [...menuesDom].map(parseMenuEntry)
const extras = [...extrasDom].map(parseExtrasEntry)
return { return {
date: date.format('YYYY-MM-DD'), date: date.format('YYYY-MM-DD'),
label: titleText, label: titleText,
today: isActive, today: isActive,
menu: [...menues], menu: [...menues],
extras,
} }
} }
...@@ -50,8 +65,8 @@ function parseMenuEntry(row) { ...@@ -50,8 +65,8 @@ function parseMenuEntry(row) {
case 'bg-color': return null case 'bg-color': return null
default: return '?:'+cls default: return '?:'+cls
}}).filter(e => !!e) }}).filter(e => !!e)
const categoryDom = row.querySelector('menue-category'); const categoryDom = row.querySelector('.menue-category');
const priceDom = row.querySelector('menue-price'); const priceDom = row.querySelector('.menue-price');
const descDom = row.querySelector('.expand-nutr') const descDom = row.querySelector('.expand-nutr')
const nutrDom = row.querySelector('.nutr-info') const nutrDom = row.querySelector('.nutr-info')
...@@ -69,6 +84,30 @@ function parseMenuEntry(row) { ...@@ -69,6 +84,30 @@ function parseMenuEntry(row) {
} }
} }
function parseExtrasEntry(row) {
const categoryDom = row.querySelector('.menue-category');
const descDom = row.querySelector('.menue-desc')
const nutrDom = row.querySelectorAll('.nutr-info')
const category = categoryDom ? categoryDom.textContent : undefined
const description = parseDescription(descDom)
const nutrition = [...nutrDom].map(parseNutrition)
const count = Math.max(description.length, nutrition.length)
let options = [];
for (let i = 0; i < count; i++) {
options.push({
description: description[i] || null,
nutrition: nutrition[i] || null,
})
}
return {
category,
options,
}
}
function parseDescription(descDom) { function parseDescription(descDom) {
const parts = [] const parts = []
descDom.childNodes.forEach(node => { descDom.childNodes.forEach(node => {
...@@ -89,8 +128,10 @@ function parseDescription(descDom) { ...@@ -89,8 +128,10 @@ function parseDescription(descDom) {
} }
function parseNutrition(nutrDom) { function parseNutrition(nutrDom) {
if (nutrDom.textContent === '-') return null
const values = {} const values = {}
nutrDom.querySelector('div').childNodes.forEach(node => { const container = nutrDom.querySelector('div') || nutrDom
container.childNodes.forEach(node => {
if (node.nodeName !== '#text') return; if (node.nodeName !== '#text') return;
const match = node.data.match(/^\s*([^=]*?)\s*=\s*(.*?)\s*$/) const match = node.data.match(/^\s*([^=]*?)\s*=\s*(.*?)\s*$/)
if (!match) { // should not happen ... if (!match) { // should not happen ...
...@@ -111,3 +152,25 @@ function parseNutrition(nutrDom) { ...@@ -111,3 +152,25 @@ function parseNutrition(nutrDom) {
}) })
return values return values
} }
function parseAdditives(additivesDom) {
const rawText = additivesDom.textContent
const values = {}
rawText
.split(',')
.forEach(s => {
const matches = s
.trim()
.replace(/^\s*(with|contains)\s*/, '')
.match(/^\((.*?)\)\s*(.*?)$/)
if (!matches) {
if (!values['_?']) values['_?'] = []
values['_?'].push(s)
return
}
const symbol = matches[1]
const text = matches[2]
values[symbol] = text
})
return values
}
...@@ -440,6 +440,11 @@ ...@@ -440,6 +440,11 @@
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
}, },
"mustache": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/mustache/-/mustache-4.0.0.tgz",
"integrity": "sha512-FJgjyX/IVkbXBXYUwH+OYwQKqWpFPLaLVESd70yHjSDunwzV2hZOoTBvPf4KLoxesUzzyfTH6F784Uqd7Wm5yA=="
},
"nwsapi": { "nwsapi": {
"version": "2.2.0", "version": "2.2.0",
"resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.0.tgz", "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.0.tgz",
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
"dependencies": { "dependencies": {
"axios": "^0.19.2", "axios": "^0.19.2",
"jsdom": "^16.2.0", "jsdom": "^16.2.0",
"moment": "^2.24.0" "moment": "^2.24.0",
"mustache": "^4.0.0"
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment