From e482b8a6e039fe8593953d55831c32e97206ed69 Mon Sep 17 00:00:00 2001 From: theng Date: Sun, 14 Nov 2021 20:46:58 +0800 Subject: [PATCH 1/4] Draft --- app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app.py b/app.py index e293225..c7f7dbd 100644 --- a/app.py +++ b/app.py @@ -1 +1,3 @@ # Your DB Code Here + +WIP From e93753b641e4dbacba38fdbf0711ad4ff75d1a36 Mon Sep 17 00:00:00 2001 From: theng Date: Fri, 31 Dec 2021 18:08:28 +0800 Subject: [PATCH 2/4] Add .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f4f55d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +db.py +__pycache__ +*.json \ No newline at end of file From fbdcaf85edf6791b975311f9377534473021195e Mon Sep 17 00:00:00 2001 From: theng Date: Sat, 1 Jan 2022 02:32:54 +0800 Subject: [PATCH 3/4] Retrieve Q1 data --- app.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index c7f7dbd..394bbe4 100644 --- a/app.py +++ b/app.py @@ -1,3 +1,26 @@ # Your DB Code Here -WIP +from db import db +from db import DB_PASSWORD, DB_USERNAME, URL_LINK +import pymongo +from bson.objectid import ObjectId +import datetime + +URL = URL_LINK.format(DB_USERNAME, DB_PASSWORD) +client = pymongo.MongoClient(URL) +db = client["RHDEVS-BE-Mongo"] + +# 1. Number of entries of selling data grouped by isbn + +stage_1 = { + "$group": + { + "_id": "$isbn", + "order_count": {"$sum": 1}, + } +} + +# 2. For books with more than 15000 total copies sold, return all the titles in a single array +# 3. Find the number of books by John Doe, sorted by isbn (but isbn is not returned) +# 4. Return the price per book for each order +# 5. Return the average price per book for each order supplied from titan (we want to sum up all the total copies sold and total price of all copies sold where titan is a supplier, but average.like weighted average for the number of bookd) \ No newline at end of file From 4f546a38eb1e46c3fa158387b8cf10eced10e13a Mon Sep 17 00:00:00 2001 From: theng Date: Mon, 3 Jan 2022 16:54:31 +0800 Subject: [PATCH 4/4] Update Q1 and Q2 to Q5 requirements --- app.py | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 394bbe4..ad0ed51 100644 --- a/app.py +++ b/app.py @@ -20,7 +20,143 @@ } } +aggregation_1 = db.books_selling_data.aggregate([stage_1]) + + +# print(list(aggregation_1)) + # 2. For books with more than 15000 total copies sold, return all the titles in a single array + +stage_2_1 = { + "$group": + { + "_id": "$isbn", + "total_copies": { "$sum": "$copies_sold" } + + } +} + +stage_2_2 = { + "$match": + { + "total_copies_sold": { "$gt": 15000 } + } + +} + +stage_2_3 = { + "$lookup": { + "from": "books", + "localField": "_id", + "foreignField": "isbn", + "as": "book_info" + } +} + +stage_2_4 = { + "$project": { + "_id": 0, + "total_copies_sold": 0, + "title": 1 + } +} + + +aggregation_2 = db.books_selling_data.aggregate([stage_2_1, stage_2_2, stage_2_3, stage_2_4]) + +# print(list(aggregation_2)) + # 3. Find the number of books by John Doe, sorted by isbn (but isbn is not returned) + +stage_3_1 = { + "$match": { + "author": "John Doe" + } +} + + +stage_3_2 = { + "$sort": { + "$isbn": 1 + } +} + +stage_3_3 = { + "$project": { + "isbn": 0, + "title": 1, + "author": 1 + } +} + + + +aggregation_3 = db.books.aggregate([stage_3_1, stage_3_2, stage_3_3]) + +# print(len(list(aggregation_3))) + # 4. Return the price per book for each order -# 5. Return the average price per book for each order supplied from titan (we want to sum up all the total copies sold and total price of all copies sold where titan is a supplier, but average.like weighted average for the number of bookd) \ No newline at end of file + + +stage_4_1 = { + "$addFields": { + "price_per_book": { + "$round": [{"$divide": ["$total_price", "$copies_sold"]}, 2] + } + } +} + +stage_4_2 = { + "$project": { + "_id": 0, + "order_id": 1, + "copies_sold": 0, + "total_price": 0, + "supplier": 0 + } +} + + +aggregation_4 = db.books_selling_data.aggregate([stage_4_1, stage_4_2]) + +# print(list(aggregation_4)) + +# 5. Return the average price per book for each order supplied from titan (we want to sum up all the total copies sold and total price of all copies sold where titan is a supplier, but average.like weighted average for the number of bookd) + +stage_5_1 = { + "$unwind": "$supplier" +} + +stage_5_2 = { + "$group": { + "_id": "$supplier", + "overall_sale": {"$sum": "$copies_sold"}, + "overall_price": {"$sum": "total_price"} + } +} + +stage_5_3 = { + "$match": { + "supplier": "titan" + } +} + + + +stage_5_4 = { + "addFields": { + "average_price_per_book": {"round": [{"divide":["overall_price", "overall_sale"]}, 2]} + } +} + +stage_5_5 = { + "$project": { + "$_id": 1, + "$average_price_per_book": 1 + } +} + + +aggregation_5 = db.books_selling_data.aggregate([stage_5_1, stage_5_2, stage_5_3, stage_5_4, stage_5_5]) + +# print(list(aggregation_5)) \ No newline at end of file