diff --git a/02_activities/assignments/Microcredential_Cohort/Assignment2.md b/02_activities/assignments/Microcredential_Cohort/Assignment2.md index d91d3c9d3..70f7b291d 100644 --- a/02_activities/assignments/Microcredential_Cohort/Assignment2.md +++ b/02_activities/assignments/Microcredential_Cohort/Assignment2.md @@ -58,6 +58,30 @@ The store wants to keep customer addresses. Propose two architectures for the CU ``` Your answer... ``` +Architecture 1 — Overwrite the Address (SCD Type 1) + +CUSTOMER_ADDRESS (Type 1) --> This design keeps only the current address.If a customer moves, the old address is lost. +customer_id (PK, FK → CUSTOMER) +street +city +province +postal_code +country + + +Architecture 2 — Retain Address History (SCD Type 2): +CUSTOMER_ADDRESS (Type 2) --> This design keeps every version of a customer’s address over time +customer_address_id (PK) +customer_id (FK → CUSTOMER) +street +city +province +postal_code +country +start_date +end_date +is_current + *** diff --git a/02_activities/assignments/Microcredential_Cohort/assignment2.sql b/02_activities/assignments/Microcredential_Cohort/assignment2.sql index 4079c18ae..18d3e7ceb 100644 --- a/02_activities/assignments/Microcredential_Cohort/assignment2.sql +++ b/02_activities/assignments/Microcredential_Cohort/assignment2.sql @@ -23,8 +23,9 @@ Edit the appropriate columns -- you're making two edits -- and the NULL rows wil All the other rows will remain the same. */ --QUERY 1 - - +SELECT +product_name || ', ' || COALESCE(product_size,'')|| ' (' || COALESCE(product_qty_type, 'unit') || ')' AS list_of_products +FROM product; --END QUERY @@ -41,8 +42,32 @@ HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). Filter the visits to dates before April 29, 2022. */ --QUERY 2 - - +--counter changing on each new market date +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date + ) AS visit_number +FROM customer_purchases +WHERE market_date < '2022-04-29' +ORDER BY customer_id, market_date; + +--select only the unique market dates per customer +SELECT + customer_id, + market_date, + DENSE_RANK() OVER ( + PARTITION BY customer_id + ORDER BY market_date + ) AS visit_number +FROM ( + SELECT DISTINCT customer_id, market_date + FROM customer_purchases + WHERE market_date < '2022-04-29' +) +ORDER BY customer_id, market_date; --END QUERY @@ -53,8 +78,18 @@ only the customer’s most recent visit. HINT: Do not use the previous visit dates filter. */ --QUERY 3 - - +SELECT * +FROM ( + SELECT + customer_id, + market_date, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS visit_number_desc + FROM customer_purchases +) +WHERE visit_number_desc = 1; --END QUERY @@ -66,8 +101,29 @@ You can make this a running count by including an ORDER BY within the PARTITION Filter the visits to dates before April 29, 2022. */ --QUERY 4 - - +SELECT + customer_id, + product_id, + market_date, + COUNT(*) OVER ( + PARTITION BY customer_id, product_id + ) AS total_purchases_of_product +FROM customer_purchases +WHERE market_date < '2022-04-29' +ORDER BY customer_id, product_id, market_date; + +--running count +SELECT + customer_id, + product_id, + market_date, + COUNT(*) OVER ( + PARTITION BY customer_id, product_id + ORDER BY market_date + ) AS running_purchase_count +FROM customer_purchases +WHERE market_date < '2022-04-29' +ORDER BY customer_id, product_id, market_date; --END QUERY @@ -85,21 +141,28 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ --QUERY 5 - - +SELECT + product_name, + CASE + WHEN INSTR(product_name, '-') = 0 THEN NULL + ELSE TRIM( + SUBSTR( + product_name, + INSTR(product_name, '-') + 1 + ) + ) + END AS description +FROM product; --END QUERY - /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ --QUERY 6 - - +SELECT * FROM product WHERE product_size REGEXP '[0-9]'; --END QUERY - -- UNION /* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales. @@ -111,8 +174,30 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling with a UNION binding them. */ --QUERY 7 - - +WITH date_totals AS ( + SELECT + market_date, + SUM(quantity * cost_to_customer_per_qty) AS total_sales + FROM customer_purchases + GROUP BY market_date +), +ranked_dates AS ( + SELECT + market_date, + total_sales, + RANK() OVER (ORDER BY total_sales DESC) AS best_rank, + RANK() OVER (ORDER BY total_sales ASC) AS worst_rank + FROM date_totals +) +SELECT market_date, total_sales, 'Highest Sales Day' AS category +FROM ranked_dates +WHERE best_rank = 1 + +UNION + +SELECT market_date, total_sales, 'Lowest Sales Day' AS category +FROM ranked_dates +WHERE worst_rank = 1; --END QUERY @@ -132,8 +217,35 @@ How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ --QUERY 8 - - +WITH vendor_products AS ( + SELECT + v.vendor_name, + p.product_name, + vi.original_price + FROM (SELECT vendor_id, product_id, original_price FROM vendor_inventory GROUP BY vendor_id, product_id, original_price) vi + JOIN vendor v ON vi.vendor_id = v.vendor_id + JOIN product p ON vi.product_id = p.product_id +), +all_customers AS ( + SELECT customer_id + FROM customer +), +vendor_cust AS ( + SELECT + vp.vendor_name, + vp.product_name, + vp.original_price, + cust.customer_id + FROM vendor_products vp + CROSS JOIN all_customers cust +) +SELECT + vendor_name, + product_name, + SUM(original_price * 5) AS total_revenue +FROM vendor_cust +GROUP BY vendor_name, product_name +ORDER BY vendor_name, product_name; --END QUERY @@ -145,8 +257,11 @@ It should use all of the columns from the product table, as well as a new column Name the timestamp column `snapshot_timestamp`. */ --QUERY 9 - - +DROP TABLE IF EXISTS product_units; +CREATE TEMP TABLE product_units AS +SELECT *,CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product +WHERE product_qty_type = 'unit'; --END QUERY @@ -155,8 +270,22 @@ Name the timestamp column `snapshot_timestamp`. */ This can be any product you desire (e.g. add another record for Apple Pie). */ --QUERY 10 - - +INSERT INTO product_units ( + product_id, + product_name, + product_size, + product_category_id, + product_qty_type, + snapshot_timestamp +) +VALUES ( + 777, + 'Apple Pie', + 'large', + 6, + 'unit', + CURRENT_TIMESTAMP +); --END QUERY @@ -168,11 +297,16 @@ HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ --QUERY 11 - +DELETE FROM product_units +WHERE product_name = 'Apple Pie' +AND snapshot_timestamp = ( + SELECT MIN(snapshot_timestamp) + FROM product_units + WHERE product_name = 'Apple Pie' +); --END QUERY - -- UPDATE /* 1.We want to add the current_quantity to the product_units table. First, add a new column, current_quantity to the table using the following syntax. @@ -191,10 +325,23 @@ Finally, make sure you have a WHERE statement to update the right row, When you have all of these components, you can run the update statement. */ --QUERY 12 +ALTER TABLE product_units +ADD current_quantity INT; - +UPDATE product_units +SET current_quantity = + CASE + WHEN product_id IN (SELECT product_id FROM vendor_inventory) + THEN ( + SELECT COALESCE(vi.quantity, 0) + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY vi.market_date DESC + LIMIT 1 + ) + ELSE 0 + END; --END QUERY - diff --git a/02_activities/assignments/Microcredential_Cohort/small_bookstore_ER.PNG b/02_activities/assignments/Microcredential_Cohort/small_bookstore_ER.PNG new file mode 100644 index 000000000..42ac889eb Binary files /dev/null and b/02_activities/assignments/Microcredential_Cohort/small_bookstore_ER.PNG differ