All
I could use some help with a complex join. I have 3 SIMPLIFIED
FICTIONAL tables (add table scripts and data on bottom of message):
In words I want the following:
a resultset of the avg(x), count(flag), sum(flag) for each and every
(outer join) Station_Number where cast_id<>'full' and flag=0 and z<=3.
I would expect that the count(flag) would return the number of rows in
tblLog but it returns number of rows in tblData (the child of the
1:many). What am I doing wrong? The sql I am using is below (as well
as the result).
daben
--SQL
QUERY---
SELECT TOP 100 PERCENT
tblStation.Station_Number,
AVG(CASE WHEN x <> -999 AND flag=0 and cast_id<>'full' THEN x
END) AS AVG_x,
ISNULL(STDEV(CASE WHEN x <> -999 AND flag=0 and
cast_id<>'full' THEN x END),0)AS STDEV_x,
SUM(flag) AS SUM_flag, COUNT(flag) AS COUNT_flag
FROM
((tblStation LEFT JOIN tblLog
ON (tblStation.Station_Number = tblLog.Station_Number))
LEFT JOIN tblData
ON (tblLog.Log_Index = tblData.Log_Index
AND z<=3))
GROUP BY
tblStation.Station_Number
ORDER BY
tblStation.Station_Number
--QUERY
RESULTSET---
Station_Number AVG_x
STDEV_x SUM_flag
COUNT_flag
-- ----
---- --
--
1 1.4285714285714286
0.53452248382484868 3 13
2 NULL
0.0 NULL 0
3 NULL
0.0 NULL 0
4 NULL
0.0 0 1
5 NULL
0.0 6 7
6 NULL
0.0 NULL 0
7 NULL
0.0 NULL 0
8 2.2000000000000002
1.0954451150103324 3 10
9 NULL
0.0 NULL 0
10 NULL
0.0 NULL 0
(10 row(s) affected)
--SQL ADD
TABLE---
CREATE TABLE [dbo].[tblData] (
[Log_Index] [int] NOT NULL ,
[z] [float] NOT NULL ,
[x] [float] NULL
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[tblLog] (
[Log_Index] [int] NOT NULL ,
[Station_Number] [int] NULL ,
[cast_id] [varchar] (10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL ,
[flag] [int] NULL
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[tblStation] (
[Station_Number] [int] NOT NULL
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[tblData] WITH NOCHECK ADD
CONSTRAINT [PK_tblData] PRIMARY KEY CLUSTERED
(
[Log_Index],
[z]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[tblLog] WITH NOCHECK ADD
CONSTRAINT [PK_tblLog] PRIMARY KEY CLUSTERED
(
[Log_Index]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[tblStation] WITH NOCHECK ADD
CONSTRAINT [PK_tblStation] PRIMARY KEY CLUSTERED
(
[Station_Number]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[tblLog] ADD
CONSTRAINT [DF_tblLog_cast_id] DEFAULT ('NA') FOR [cast_id],
CONSTRAINT [DF_tblLog_flag] DEFAULT (0) FOR [flag]
GO
ALTER TABLE [dbo].[tblData] ADD
CONSTRAINT [FK_tblData_tblLog] FOREIGN KEY
(
[Log_Index]
) REFERENCES [dbo].[tblLog] (
[Log_Index]
) ON DELETE CASCADE ON UPDATE CASCADE
GO
ALTER TABLE [dbo].[tblLog] ADD
CONSTRAINT [FK_tblLog_tblStation] FOREIGN KEY
(
[Station_Number]
) REFERENCES [dbo].[tblStation] (
[Station_Number]
) ON DELETE CASCADE ON UPDATE CASCADE
GO
--DATA--
tblStation
[Station_Number]
1
2
3
4
5
6
7
8
9
10
tblLog
Log_Index Station_Number cast_id flag
1 1 full 0
2 1 up 0
3 1 down 1
4 1 up 0
5 1 down 0
6 4 full 0
7 5 full 0
8 5 up 1
9 5 down 1
10 8 full 0
11 8 up 0
12 8 up 0
13 8 down 1
tblData
Log_Index z x
2 1 1
2 2 2
2 3 2
2 4 2
2 5 1
2 6 2
2 7 2
2 8 1
3 1 5
3 2 5
3 3 5
3 4 5
3 5 5
3 6 5
3 7 5
3 8 5
4 1 1
4 2 1
4 3 1
4 4 1
4 5 1
4 6 1
4 7 1
4 8 1
5 1 2
5 2 -999
5 3 -999
5 4 -999
5 5 3
5 6 3
5 7 3
5 8 3
8 1 1
8 2 1
8 3 1
8 4 1
9 1 3
9 2 3
9 3 3
9 4 3
11 1 3
11 2 3
11 3 3
11 4 -999
12 1 1
12 2 1
12 3 -999
12 4 1
13 1 -999
13 2 -999
13 3 -999
13 4 -999CREATE TABLE Stations
(station_number INTEGER NOT NULL PRIMARY KEY);
CREATE TABLE Data
(log_index INTEGER NOT NULL PRIMARY KEY,
z FLOAT NOT NULL,
x FLOAT NULL,
PRIMARY KEY (log_index, z));
CREATE TABLE Log
(log_index INTEGER NOT NULL
REFERENCES Log (log_index)
ON DELETE CASCADE
ON UPDATE CASCADE,
station_number INTEGER NULL
REFERENCES Stations (station_number)
ON DELETE CASCADE
ON UPDATE CASCADE,
cast_id VARCHAR(10) DEFAULT 'NA',
flag INTEGER DEFAULT 0,
PRIMARY KEY (log_index, station_number));
I am going to make a gues beore I go to bed.
SELECT S.station_number,
AVG(D.x) AS x_avg,
COALESCE(STDEV(D.x, 0.0) AS x_stdev,
SUM(L.flag) AS flag_tot,
COUNT(L.flag) AS flag_cnt
FROM Station AS S
LEFT OUTER JOIN
Log AS L
ON S.station_number = L.station_number
AND L.flag = 0
AND L.cast_id <> 'full'
LEFT OUTER JOIN
Data AS D
ON L.log_index = D.log_index
AND D.z <= 3
AND D.x <> -999
GROUP BY S.station_number;|||Hi
Thanks for the reply. Your solution is much more elegant than mine, but
it fails on one point. It returns a count of 7 (indicating how many
rows in table D that are found) NOT a count of 4 (which would indicate
how many rows in table Log). I have found that this works (but is
sloppy) using correlated sub queries:
SELECT TOP 100 PERCENT
tblStation.Station_Number,
AVG(CASE WHEN x <> -999 AND flag=0 and cast_id<>'full' THEN x
END) AS AVG_x,
--ISNULL(STDEV(CASE WHEN x <> -999 AND flag=0 and
cast_id<>'full' THEN x END),0)AS STDEV_x,
(SELECT SUM(CASE WHEN cast_id<>'full' THEN flag END) FROM tblLog WHERE
tblStation.Station_Number=tblLog.Station_Number) AS SUM_flag,
(SELECT COUNT(CASE WHEN cast_id<>'full' THEN flag END) FROM tblLog
WHERE tblStation.Station_Number=tblLog.Station_Number ) AS COUNT_flag,
COUNT(flag) as COUNT_flag2
FROM
((tblStation LEFT JOIN tblLog
ON (tblStation.Station_Number = tblLog.Station_Number))
LEFT JOIN tblData
ON (tblLog.Log_Index = tblData.Log_Index
AND z<=3))
GROUP BY
tblStation.Station_Number
ORDER BY
tblStation.Station_Number
No comments:
Post a Comment