Hi @Paul_Wadsworth , the crux of what you’re asking is to create some kind of identifier that ranks your data. Typically this either exists in your data as part of the ETL process, or you can create a derived table using a rank window function and partition by whichever column you feel is appropriate to create the correct ranks you desire.
Once you have that, we can pretty easily call out your top ‘x’ patterns using parameters in LookML using this pattern (this is assuming you don’t have a rank in your data and have to create one) (redshift dialect).
view: top_5_brands_vs_other_ranked_by_user_selected_parameter {
label: "Top 5 Brands (Select Metric to Rank By)"
derived_table: {
#parameter value specifies which of the rankings from the inner table to use
sql:
select brand,
{% parameter brand_rank_criteria %} as brand_rank
from
(
select brand,
--metric to rank over is set by the user's brand_rank_criteria parameter selection
rank() over(order by count(*) desc) as brandRankOrderItemCount,
rank() over(order by sum(order_items.sale_price) desc) as brandRankSalePrice,
rank() over(order by avg(order_items.sale_price) desc) as brandRankAvgSalePrice,
rank() over(order by count(distinct user_id) desc) as brandRankDistinctUserCount,
rank() over(order by sum(order_items.sale_price)-sum(inventory_items.cost) desc) as brandRankMargin
FROM public.order_items AS order_items
LEFT JOIN public.inventory_items AS inventory_items ON order_items.inventory_item_id = inventory_items.id
LEFT JOIN public.products AS products ON inventory_items.product_id = products.id
group by brand
)brand_summary
;;
}
dimension: brand {
primary_key: yes
hidden: yes
type: string
sql: ${TABLE}.brand ;;
}
parameter: brand_rank_criteria {
description: "Specify which metric to rank brands by"
type: unquoted
#Set a default value so that the dynamic ranking still works even if the user doesn't use the parameter.
#Parameter defualt values work better without underscores, otherwise they sometimes load as '' when added, rather than with the corresponding label.
default_value: "brandRankOrderItemCount"
allowed_value: {
label: "Order Items Sold Count Rank"
value: "brandRankOrderItemCount"
}
allowed_value: {
label: "Order Items Average Sale Price Rank"
value: "brandRankAvgSalePrice"
}
allowed_value: {
label: "Distinct Users with Order Items Count Rank"
value: "brandRankDistinctUserCount"
}
allowed_value: {
label: "Total Margin Rank"
value: "brandRankMargin"
}
}
#### This parameter will allow a user to select a Top N ranking limit for bucketing the brands, almost like parameterizing the Row Limit in the UI
parameter: brand_rank_limit {
type: unquoted
default_value: "5"
allowed_value: {
label: "Top 5"
value: "5"
}
allowed_value: {
label: "Top 10"
value: "10"
}
allowed_value: {
label: "Top 20"
value: "20"
}
allowed_value: {
label: "Top 50"
value: "50"
}
}
dimension: brand_rank_top_5_brands {
#Adjust the label that appears in visualization to match the ranking criteria
label_from_parameter: brand_rank_criteria
type: string
sql: case when ${TABLE}.brand_rank<={% parameter brand_rank_limit %} then cast(${TABLE}.brand_rank as varchar) else 'other' end ;;
}
dimension: brand_name_top_5_brands {
label: "Brand Name (Top 5)"
type: string
sql: case when ${TABLE}.brand_rank<={% parameter brand_rank_limit %} then ${TABLE}.brand else 'other' end ;;
}
}
This can look confusing at first but we can walk through step by step.
Firstly the derived table sql.
select brand,
{% parameter brand_rank_criteria %} as brand_rank
from
(
select brand,
--metric to rank over is set by the user's brand_rank_criteria parameter selection
rank() over(order by count(*) desc) as brandRankOrderItemCount,
rank() over(order by sum(order_items.sale_price) desc) as brandRankSalePrice,
rank() over(order by avg(order_items.sale_price) desc) as brandRankAvgSalePrice,
rank() over(order by count(distinct user_id) desc) as brandRankDistinctUserCount,
rank() over(order by sum(order_items.sale_price)-sum(inventory_items.cost) desc) as brandRankMargin
FROM public.order_items AS order_items
LEFT JOIN public.inventory_items AS inventory_items ON order_items.inventory_item_id = inventory_items.id
LEFT JOIN public.products AS products ON inventory_items.product_id = products.id
group by brand
) brand_summary
Here we’re using a subquery to create a brand rank by various parameters and then allowing the end user to select the appropriate ranking depending on what the end user is interested in using this parameter. This is formatted in the LookML as follows:
parameter: brand_rank_criteria {
description: "Specify which metric to rank brands by"
type: unquoted
#Set a default value so that the dynamic ranking still works even if the user doesn't use the parameter.
#Parameter defualt values work better without underscores, otherwise they sometimes load as '' when added, rather than with the corresponding label.
default_value: "brandRankOrderItemCount"
allowed_value: {
label: "Order Items Sold Count Rank"
value: "brandRankOrderItemCount"
}
allowed_value: {
label: "Order Items Average Sale Price Rank"
value: "brandRankAvgSalePrice"
}
allowed_value: {
label: "Distinct Users with Order Items Count Rank"
value: "brandRankDistinctUserCount"
}
allowed_value: {
label: "Total Margin Rank"
value: "brandRankMargin"
}
}
Now we’re set up for success with a rank of our data. We can take this to a logical next step with parameters and allow the end user to select the top ‘x’ rank that they desire like this. Creating dimension with a case when
statement in the sql:
parameter of the LookML that will bucket anything beneath the value
parameter of brand_rank_limit
as ‘other’ and show the top ‘x’. You can see this here ${TABLE}.brand_rank<={% parameter brand_rank_limit %}
#### This parameter will allow a user to select a Top N ranking limit for bucketing the brands, almost like parameterizing the Row Limit in the UI
parameter: brand_rank_limit {
type: unquoted
default_value: "5"
allowed_value: {
label: "Top 5"
value: "5"
}
allowed_value: {
label: "Top 10"
value: "10"
}
allowed_value: {
label: "Top 20"
value: "20"
}
allowed_value: {
label: "Top 50"
value: "50"
}
}
dimension: brand_rank_top_5_brands {
#Adjust the label that appears in visualization to match the ranking criteria
label_from_parameter: brand_rank_criteria
type: string
sql: case when ${TABLE}.brand_rank<={% parameter brand_rank_limit %} then cast(${TABLE}.brand_rank as varchar) else 'other' end ;;
}
dimension: brand_name_top_5_brands {
label: "Brand Name (Top 5)"
type: string
sql: case when ${TABLE}.brand_rank<={% parameter brand_rank_limit %} then ${TABLE}.brand else 'other' end ;;
}
}
This will get you an explore that looks something like this.
With SQL generated like this.
WITH top_5_brands_vs_other_ranked_by_user_selected_parameter AS (select brand,
brandRankOrderItemCount as brand_rank
from
(
select brand,
--metric to rank over is set by the user's brand_rank_criteria parameter selection
rank() over(order by count(*) desc) as brandRankOrderItemCount,
rank() over(order by sum(order_items.sale_price) desc) as brandRankSalePrice,
rank() over(order by avg(order_items.sale_price) desc) as brandRankAvgSalePrice,
rank() over(order by count(distinct user_id) desc) as brandRankDistinctUserCount,
rank() over(order by sum(order_items.sale_price)-sum(inventory_items.cost) desc) as brandRankMargin
FROM public.order_items AS order_items
LEFT JOIN public.inventory_items AS inventory_items ON order_items.inventory_item_id = inventory_items.id
LEFT JOIN public.products AS products ON inventory_items.product_id = products.id
group by brand
)brand_summary
)
SELECT
case when top_5_brands_vs_other_ranked_by_user_selected_parameter.brand_rank<=5 then cast(top_5_brands_vs_other_ranked_by_user_selected_parameter.brand_rank as varchar) else 'other' end AS "top_5_brands_vs_other_ranked_by_user_selected_parameter.brand_rank_top_5_brands",
case when top_5_brands_vs_other_ranked_by_user_selected_parameter.brand_rank<=5 then top_5_brands_vs_other_ranked_by_user_selected_parameter.brand else 'other' end AS "top_5_brands_vs_other_ranked_by_user_selected_parameter.brand_name_top_5_brands",
COALESCE(COALESCE(CAST( ( SUM(DISTINCT (CAST(FLOOR(COALESCE(order_items.sale_price ,0)*(CAST(1000000 AS DOUBLE PRECISION)*1.0)) AS DECIMAL(38,0))) + CAST(STRTOL(LEFT(MD5(CAST(order_items.id AS VARCHAR)),15),16) AS DECIMAL(38,0))* 1.0e8 + CAST(STRTOL(RIGHT(MD5(CAST(order_items.id AS VARCHAR)),15),16) AS DECIMAL(38,0)) ) - SUM(DISTINCT CAST(STRTOL(LEFT(MD5(CAST(order_items.id AS VARCHAR)),15),16) AS DECIMAL(38,0))* 1.0e8 + CAST(STRTOL(RIGHT(MD5(CAST(order_items.id AS VARCHAR)),15),16) AS DECIMAL(38,0))) ) AS DOUBLE PRECISION) / CAST((CAST(1000000 AS DOUBLE PRECISION)) AS DOUBLE PRECISION), 0), 0) AS "order_items.total_sales_price"
FROM public.order_items AS order_items
LEFT JOIN public.inventory_items AS inventory_items ON order_items.inventory_item_id = inventory_items.id
LEFT JOIN public.products AS products ON inventory_items.product_id = products.id
LEFT JOIN top_5_brands_vs_other_ranked_by_user_selected_parameter ON top_5_brands_vs_other_ranked_by_user_selected_parameter.brand=products.brand
GROUP BY 1,2
ORDER BY 1
LIMIT 500
Happy Bucketing ��