Sample Header Ad - 728x90

Sql Server: Query assistance to retrieve count in two different groupings within a dataset

1 vote
2 answers
63 views
We have a dataset with "visits & events" with their "frequency". Sample data looks like this:
DROP TABLE IF EXISTS dbo.EventMetrics
GO
--
-- Create table with sample dataset
--

CREATE TABLE dbo.EventMetrics
(
	  EventMetricsID		INT			NOT NULL	IDENTITY(1,1)
	, EventDate				INT			NOT NULL
	, FacilityID			INT			NOT NULL
	, UniqueVisitID			VARCHAR(25)	NOT NULL
	, ItemID				INT			NOT NULL
	, ItemValue				VARCHAR(10)	NOT NULL
	, ItemFrequency			INT			NOT NULL
)

--
-- Insert some sample dataset
--
INSERT INTO dbo.EventMetrics (EventDate, FacilityID, UniqueVisitID, ItemID, ItemValue, ItemFrequency)
VALUES						  (20250101, 1, '2025-01-01_1_100', 1, 'M', 1)
							, (20250101, 1, '2025-01-01_1_101', 1, 'M', 1)
							, (20250101, 1, '2025-01-01_1_102', 1, 'M', 1)
							, (20250101, 1, '2025-01-01_1_103', 1, 'F', 1)
							, (20250101, 1, '2025-01-01_1_104', 1, 'F', 1)
							, (20250101, 1, '2025-01-01_1_105', 1, 'F', 1)

							, (20250101, 1, '2025-01-01_1_106', 2, 'Boston', 1)
							, (20250101, 1, '2025-01-01_1_107', 2, 'Boston', 1)
							, (20250101, 1, '2025-01-01_1_108', 2, 'Boston', 1)
							, (20250101, 1, '2025-01-01_1_109', 2, 'Tampa', 1)
							, (20250101, 1, '2025-01-01_1_110', 2, 'London', 1)

							, (20250101, 2, '2025-01-01_2_111', 1, 'M', 1)
							, (20250101, 2, '2025-01-01_2_113', 1, 'M', 1)
							, (20250101, 2, '2025-01-01_2_114', 1, 'F', 1)

							, (20250101, 2, '2025-01-01_2_115', 2, 'Boston', 1)
							, (20250101, 2, '2025-01-01_2_116', 2, 'Tampa', 1)
							, (20250101, 2, '2025-01-01_2_117', 2, 'Tampa', 1)
							, (20250101, 2, '2025-01-01_2_118', 2, 'Miami', 1)
							, (20250101, 2, '2025-01-01_2_119', 2, 'Miami', 1)
							, (20250101, 2, '2025-01-01_2_120', 2, 'Miami', 1)
							, (20250101, 2, '2025-01-01_2_121', 2, 'Miami', 1)

SELECT * FROM dbo.EventMetrics
Sample data looks like this: Initial dataset From this dataset, we want to capture some aggregate metrics. For example: 1. Number of times we see **ItemValue** (M) for **FacilityID** (1). - Answer is 3 2. Number of times we see **ItemID** (1) for **FacilityID** (1). - Answer is 6 3. Number of **UniqueVisitID**'s for a given **ItemValue** (M) & **FacilityID** (1) - Answer is 3 4. Number of **UniqueVisitID**'s for a given **ItemID** (M) & **FacilityID** (1) - Answer is 6 5. Number of **UniqueVisitID**'s in the current dataset - Answer is 21 Below is the SQL query that could answer some of my questions (from 1 to 3), but for questions 4 & 5, I need some mechanism to aggregate the numbers within the same SQL query. **My progress so far:**
--
-- WIP query
--
SELECT 	  FacilityID
		, ItemID
		, ItemValue
		, SUM(SUM(ItemFrequency)) OVER (PARTITION BY ItemID, FacilityID)				AS [Total_Items_Per_Facility]
		, SUM(SUM(ItemFrequency)) OVER (PARTITION BY ItemID, FacilityID, ItemValue)		AS [Count_Per_ItemValue_Per_Facility]
		, COUNT(DISTINCT UniqueVisitID)													AS [Count_Of_UniqueVisits_Per_ItemValue_Per_Facility]
		, '??'																			AS [Count_Of_UniqueVisits_Per_ItemID_Per_Facility]
		, '??'																			AS [Count_Of_UniqueVisits_For_This_Dataset]
FROM dbo.EventMetrics
WHERE EventDate BETWEEN 20250101 AND 20250101
AND FacilityID IN (1, 2)
AND ItemID IN (1, 2)
GROUP BY  FacilityID
		, ItemID
		, ItemValue
ORDER BY  FacilityID
		, ItemID
		, ItemValue
The above TSQL gives me the below result. Sample result Using CTE and an extra query I can get answers for bullet points 4 & 5. See the query below: **Current Solution (multi step)**
--
--	Current two step Solution, with two queries.
--
DECLARE @TotalUniqueVisitIDs INT

SELECT @TotalUniqueVisitIDs = COUNT(DISTINCT UniqueVisitID)
FROM dbo.EventMetrics
WHERE EventDate BETWEEN 20250101 AND 20250101
AND FacilityID IN (1, 2)
AND ItemID IN (1, 2)

;WITH Count_UniqueVisitID_Per_Facility (FacilityID, ItemID, Count_UniqueVisitID_Per_Facility)
AS	(
		SELECT FacilityID, ItemID, COUNT(DISTINCT UniqueVisitID) AS [Count_UniqueVisitID_Per_Facility]
		FROM dbo.EventMetrics
		WHERE EventDate BETWEEN 20250101 AND 20250101
		AND FacilityID IN (1, 2)
		AND ItemID IN (1, 2)
		GROUP BY FacilityID, ItemID

	)
SELECT 	  
		  E.FacilityID
		, E.ItemID
		, E.ItemValue
		, SUM(SUM(E.ItemFrequency)) OVER (PARTITION BY E.ItemID, E.FacilityID)				AS [Total_Items_Per_Facility]
		, SUM(SUM(E.ItemFrequency)) OVER (PARTITION BY E.ItemID, E.FacilityID, ItemValue)	AS [Count_Per_ItemValue_Per_Facility]
		, COUNT(DISTINCT UniqueVisitID)														AS [Count_Of_UniqueVisits_Per_ItemValue_Per_Facility]
		, UVF.Count_UniqueVisitID_Per_Facility												AS [Count_Of_UniqueVisits_Per_ItemID_Per_Facility]
		, @TotalUniqueVisitIDs																AS [Count_Of_UniqueVisits_For_This_Dataset]
FROM dbo.EventMetrics AS E

INNER JOIN Count_UniqueVisitID_Per_Facility AS UVF
	ON UVF.FacilityID = E.FacilityID
	AND UVF.ItemID = E.ItemID

WHERE E.EventDate BETWEEN 20250101 AND 20250101
AND E.FacilityID IN (1, 2)
AND E.ItemID IN (1, 2)

GROUP BY  E.FacilityID
		, E.ItemID
		, E.ItemValue
		, UVF.Count_UniqueVisitID_Per_Facility

ORDER BY  E.FacilityID
		, E.ItemID
		, E.ItemValue
**Question** How can I get one query to return metrics for all 5? **Context:** 1. This is a data warehouse table with billions of records 2. Clustered Columnstore index exists 3. There are more metrics generated as part of this query; For clarity, only the pertinent parts are included.
Asked by ToC (727 rep)
May 29, 2025, 07:35 PM
Last activity: May 30, 2025, 09:41 AM