diff --git a/02_activities/assignments/Microcredential_Cohort/Assignment2.md b/02_activities/assignments/Microcredential_Cohort/Assignment2.md index d91d3c9d3..4e74da69b 100644 --- a/02_activities/assignments/Microcredential_Cohort/Assignment2.md +++ b/02_activities/assignments/Microcredential_Cohort/Assignment2.md @@ -56,7 +56,9 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. ``` -Your answer... +Type 1 will overwrite the existing record, keep the lastest. +Type 2 insert a new record and keep the old too, usual include the changed date and id. + ``` *** diff --git a/02_activities/assignments/Microcredential_Cohort/Assugnment_2_Prompt_1.pdf b/02_activities/assignments/Microcredential_Cohort/Assugnment_2_Prompt_1.pdf new file mode 100644 index 000000000..c6bc3151f Binary files /dev/null and b/02_activities/assignments/Microcredential_Cohort/Assugnment_2_Prompt_1.pdf differ diff --git a/02_activities/assignments/Microcredential_Cohort/Assugnment_2_Prompt_2.pdf b/02_activities/assignments/Microcredential_Cohort/Assugnment_2_Prompt_2.pdf new file mode 100644 index 000000000..1fd7e95ff Binary files /dev/null and b/02_activities/assignments/Microcredential_Cohort/Assugnment_2_Prompt_2.pdf differ diff --git a/02_activities/assignments/Microcredential_Cohort/assignment2.sql b/02_activities/assignments/Microcredential_Cohort/assignment2.sql index 4079c18ae..12e518afc 100644 --- a/02_activities/assignments/Microcredential_Cohort/assignment2.sql +++ b/02_activities/assignments/Microcredential_Cohort/assignment2.sql @@ -22,8 +22,15 @@ The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same. */ --QUERY 1 +/* +SELECT * +FROM product +LIMIT 40 +*/ - +SELECT +product_name || ', ' || ifnull(product_size,'')|| ' (' || ifnull(product_qty_type,'unit') || ')' +FROM product; --END QUERY @@ -40,8 +47,13 @@ each new market date for each customer, or select only the unique market dates p HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). Filter the visits to dates before April 29, 2022. */ --QUERY 2 +SELECT +market_date +,customer_id +,row_number()OVER(PARTITION by market_date, customer_id) as visit_count - +FROM customer_purchases +WHERE market_date < '2022-04-29'; --END QUERY @@ -52,8 +64,18 @@ then write another query that uses this one as a subquery (or temp table) and fi only the customer’s most recent visit. HINT: Do not use the previous visit dates filter. */ --QUERY 3 +DROP TABLE IF EXISTS temp.customer_recent_visit; +CREATE TABLE temp.customer_recent_visit as +SELECT +market_date +,customer_id +,row_number()OVER(PARTITION by customer_id ORDER by market_date DESC) as visit_count +FROM customer_purchases; +SELECT * +FROM temp.customer_recent_visit +WHERE visit_count = 1; --END QUERY @@ -65,8 +87,14 @@ customer_purchases table that indicates how many different times that customer h You can make this a running count by including an ORDER BY within the PARTITION BY if desired. Filter the visits to dates before April 29, 2022. */ --QUERY 4 +SELECT +market_date +,customer_id +,product_id +,count() OVER(PARTITION by customer_id, product_id order by market_date ) as order_count - +FROM customer_purchases +WHERE market_date < '2022-04-29'; --END QUERY @@ -84,8 +112,17 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ --QUERY 5 +SELECT +product_id +,product_name +, +CASE +WHEN INSTR(product_name, '-') > 0 THEN + TRIM(SUBSTR(product_name,INSTR(product_name,'-')+1)) + ELSE NULL +END AS description - +FROM product; --END QUERY @@ -93,8 +130,13 @@ Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR w /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ --QUERY 6 +SELECT +product_id +,product_name +,product_size - +FROM product +WHERE product_size REGEXP '[0-9]'; --END QUERY @@ -110,8 +152,41 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ --QUERY 7 +WITH daily_sales AS ( + SELECT + market_date, + SUM(cost_to_customer_per_qty*quantity) AS daily_total_sales + + FROM customer_purchases + GROUP BY market_date +), + +ranked_sales AS ( +SELECT +market_date +,daily_total_sales +,RANK()OVER(ORDER BY daily_total_sales DESC) AS best_day +,RANK()OVER(ORDER BY daily_total_sales ASC) AS worst_day +FROM daily_sales +) +SELECT +market_date +,daily_total_sales +,'Highest total sales' as sales_description + +FROM ranked_sales +WHERE best_day = 1 + +UNION + +SELECT +market_date +,daily_total_sales +,'Lowest total sales' as sales_description +FROM ranked_sales +WHERE worst_day = 1; --END QUERY @@ -131,8 +206,75 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ --QUERY 8 - - +DROP TABLE if EXISTS temp.work_vendor_inventory; +CREATE TABLE temp.work_vendor_inventory as + +SELECT --* +vi.product_id +,vi.vendor_id +,p.product_name +,v.vendor_name +,cp.cost_to_customer_per_qty +,cp.cost_to_customer_per_qty *5 AS total_possible_revenue +FROM vendor_inventory as vi -- 8 ROWS x 26; + +INNER JOIN product p +on p.product_id = vi.product_id + +INNER JOIN vendor v +on v.vendor_id = vi.vendor_id + +INNER JOIN customer_purchases cp +on cp.product_id = p.product_id + +GROUP by vi.product_id +; + +SELECT --* +vendor_name +,product_name +,cost_to_customer_per_qty *5 AS total_possible_revenue +--,sum(cost_to_customer_per_qty *5,) AS total_revenue_per_product +FROM temp.work_vendor_inventory as twvi +cross join customer c +; + +/* +sum up into total_revenue, thus only 6 row s of product. +*/ +SELECT +v.vendor_name +,p.product_name +--,COUNT(c.customer_id) AS total_customers +--,vi.original_price +,COUNT(c.customer_id) * 5 * vi.original_price AS total_revenue +/* + -- there is some difference from the vi.original_price vs cp.cost_to_customer_per_qty +,COUNT(c.customer_id) * 5 * (SELECT cp.cost_to_customer_per_qty + FROM customer_purchases cp + WHERE cp.product_id = p.product_id + LIMIT 1 + ) AS total_revenue2 +*/ +FROM vendor_inventory AS vi + +INNER JOIN product p +ON p.product_id = vi.product_id + +INNER JOIN vendor v +ON v.vendor_id = vi.vendor_id + +CROSS JOIN customer c + +GROUP BY +v.vendor_name, +p.product_name, +vi.original_price + +ORDER BY +v.vendor_name, +p.product_name +; --END QUERY @@ -144,7 +286,14 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ --QUERY 9 +DROP TABLE if EXISTS product_units; +CREATE TABLE product_units AS +SELECT +* +,CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product +WHERE product_qty_type = 'unit'; @@ -154,8 +303,18 @@ Name the timestamp column `snapshot_timestamp`. */ /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ --QUERY 10 - - +SELECT * +FROM product_units; +INSERT INTO product_units( +product_id, +product_name, +product_size, +product_category_id, +product_qty_type, +snapshot_timestamp) +VALUES(24,'Apple Pie','6"',3,'unit',CURRENT_TIMESTAMP); + SELECT* + FROM product_units; --END QUERY @@ -166,8 +325,10 @@ This can be any product you desire (e.g. add another record for Apple Pie). */ HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ --QUERY 11 - - +DELETE FROM product_units +WHERE product_name = 'Apple Pie' +AND product_id = 24 +; --END QUERY @@ -190,8 +351,21 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ --QUERY 12 +ALTER TABLE product_units +ADD current_quantity INT; - +UPDATE product_units as pu +SET current_quantity = COALESCE( +( +SELECT vi.quantity +FROM vendor_inventory vi +WHERE vi.product_id = pu.product_id +ORDER BY vi.market_date DESC +LIMIT 1 +),0); + +SELECT * +FROM product_units; --END QUERY