diff --git a/chart-gen.js b/chart-gen.js index 0fc4cda..6c2cd1d 100644 --- a/chart-gen.js +++ b/chart-gen.js @@ -1,9 +1,3 @@ -// clear output -function clearStatsOutput() { - const stats = document.getElementById("stats-div"); - stats.innerHTML = ``; -} - // find variable frequencies function extractFrequency(rooms, category) { const frequency = rooms.reduce((acc, curr) => { @@ -15,7 +9,7 @@ function extractFrequency(rooms, category) { } // find sqft bin frequencies -function Frequency(rooms, category) { +function sqftFrequency(rooms, category) { const frequency = rooms.reduce((acc, curr) => { acc[curr[category]] = (acc[curr[category]] || 0) + 1; return acc; @@ -27,16 +21,16 @@ function Frequency(rooms, category) { // generate charts function stats(rooms) { document.getElementById("stats-div").innerHTML = - `

Colleges

+ `

Colleges



-

Buildings

+

Buildings



-

Room Types

+

Room Types



-

Sq. Ft.

`; +

Sq. Ft.

`; const collegeFrequency = extractFrequency(rooms, "college"); const buildingFrequency = extractFrequency(rooms, "building"); diff --git a/file-parse.js b/file-parse.js new file mode 100644 index 0000000..f4cf42c --- /dev/null +++ b/file-parse.js @@ -0,0 +1,113 @@ +// upload file handler +export function parseFile(event) { + return new Promise((resolve, reject) => { + // set the worker + pdfjsLib.GlobalWorkerOptions.workerSrc = + "https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.5.136/pdf.worker.min.mjs"; + + // get file + const file = event.target.files[0]; + + // verify that file is pdf + if (file.type !== "application/pdf") { + alert("Please upload a PDF file."); + reject("File is not a PDF."); + return; + } + + // clear output + clearTableOutput(); + clearStatsOutput(); + + // new file reader + const reader = new FileReader(); + + // when file successfully read + reader.onload = function (event) { + // store raw binary data of file + const typedarray = new Uint8Array(event.target.result); + + // get pdf from raw binary data + pdfjsLib.getDocument(typedarray).promise.then(function (pdf) { + // initializes array to hold promises for each page + const pagesPromises = []; + + // extract room + room infos + const rooms = []; + let roomsInfoList = []; + + // don't remove if + // not whitespace and not any of following: + // 'College' + // 'Building' + // 'Room' + // 'Type' + // 'Sq. Ft.' + // 'Updated' + function noRemove(item) { + return ( + !/^\s*$/.test(item.str) && + item.str !== "College" && + item.str !== "Building" && + item.str !== "Room" && + item.str !== "Type" && + item.str !== "Sq. Ft." && + !item.str.includes("Updated") && + item.str !== "Dorm" && + item.str !== "Sq Foot" && + !item.str.includes("you") + ); + } + + // iterate through each page + for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) { + // keep track of promises for each page (when each page gets retrieved) + pagesPromises.push( + pdf.getPage(pageNum).then(function (page) { + // get text content of each page + return page.getTextContent().then(function (textContent) { + // extract relevant room info + const roomsInPage = textContent.items + .filter(noRemove) + .map((item) => item.str) + .filter((item) => item !== ""); + + // organize room info + let room = {}; + for (let i = 0; i < roomsInPage.length; i++) { + if (i % 5 === 0) { + room = {}; + rooms.push(room); + room["college"] = roomsInPage[i]; + } else if (i % 5 === 1) { + room["building"] = roomsInPage[i]; + } else if (i % 5 === 2) { + room["room"] = roomsInPage[i]; + } else if (i % 5 === 3) { + room["type"] = roomsInPage[i]; + } else { + room["sqft"] = roomsInPage[i]; + } + } + roomsInfoList = roomsInfoList.concat(roomsInPage); + }); + }), + ); + } + + // when all pages processed + Promise.all(pagesPromises) + .then(function () { + console.log("All pages processed"); + resolve(rooms); + }) + .catch((error) => { + reject(error); + }); + }); + }; + + // read file to trigger load even when read complete + reader.readAsArrayBuffer(file); + }); +} diff --git a/index.html b/index.html index b079ce1..4ccdba8 100644 --- a/index.html +++ b/index.html @@ -14,6 +14,20 @@ + +

+ Upload the latest Available Room List pdf. +
+ Then click on the left table column titles to sort. +
+ Search on the right to filter. +

+
+
+ +
+
+