E Sample Validation SQLs

This set of sample SQL commands provides scripts to run using APEX which can help validate your initial dimension and fact loads, especially if it is the first time loading the data and quality is unknown. Do not load data into the platform without performing some level of validation on it first, as this will greatly reduce the time spent reworking and reloading data.



------------------------------------------------
-- Checks for CALENDAR.csv file load
------------------------------------------------
-- Verify initial calendar data before staging it further, row counts should match data file
SELECT * FROM W_MCAL_PERIOD_DTS

-- Check total counts, all counts should be same. This can indirectly check for nulls in required columns.
SELECT count(*),count(MCAL_CAL_ID),count(MCAL_PERIOD_TYPE),count(MCAL_PERIOD_NAME),
count(MCAL_PERIOD),count(MCAL_PERIOD_ST_DT),count(MCAL_PERIOD_END_DT),count(MCAL_QTR),
count(MCAL_YEAR),count(MCAL_QTR_START_DT),count(MCAL_QTR_END_DT),count(MCAL_YEAR_START_DT),
count(MCAL_YEAR_END_DT) FROM W_MCAL_PERIOD_DTS

-- This should not return any rows
SELECT * FROM W_MCAL_PERIOD_DTS WHERE MCAL_CAL_ID IS NULL or MCAL_CAL_ID != 'Retail Calendar~41'

-- Checking duplicate rows, if any. This should not return any rows.
SELECT MCAL_PERIOD_NAME,count(*) FROM W_MCAL_PERIOD_DTS GROUP BY MCAL_PERIOD_NAME having count(MCAL_PERIOD_NAME) > 1


-- Check number of periods per year. Should always be 12.
SELECT MCAL_YEAR,count(MCAL_PERIOD_NAME) FROM W_MCAL_PERIOD_DTS GROUP BY MCAL_YEAR ORDER BY MCAL_YEAR
SELECT MCAL_YEAR,count(MCAL_PERIOD) FROM W_MCAL_PERIOD_DTS GROUP BY MCAL_YEAR ORDER BY MCAL_YEAR

-- After Load procedures completed, check following tables
select count(*) from W_MCAL_PERIOD_D
select count(*) from W_MCAL_DAY_D
select count(*) from W_MCAL_WEEK_D

------------------------------------------------
-- Checks for PRODUCT.csv file load
------------------------------------------------
-- Verify initial product data before staging it further, row counts should match data file
select * from W_PRODUCT_DTS

-- Check total count, all counts should be same.  This can indirectly check for nulls in required columns.
SELECT count(*),count(item),count(distinct(item)),count(item_level),count(tran_level),
count(LVL4_PRODCAT_ID),count(LVL4_PRODCAT_UID),count(LVL5_PRODCAT_ID),count(LVL5_PRODCAT_UID),
count(LVL6_PRODCAT_ID),count(LVL7_PRODCAT_ID),count(LVL8_PRODCAT_ID),count(TOP_PRODCAT_ID),
count(ITEM_DESC),count(LVL4_PRODCAT_DESC),count(LVL5_PRODCAT_DESC),count(LVL6_PRODCAT_DESC),
count(LVL7_PRODCAT_DESC),count(LVL8_PRODCAT_DESC),count(TOP_PRODCAT_DESC) FROM W_PRODUCT_DTS

-- Check individual counts to make sure it aligns with your source data
SELECT count(*),count(ITEM_PARENT),count(distinct(ITEM_PARENT)),count(ITEM_GRANDPARENT),count(distinct(ITEM_GRANDPARENT)) FROM W_PRODUCT_DTS WHERE ITEM_LEVEL = 1
SELECT count(*),count(ITEM_PARENT),count(distinct(ITEM_PARENT)),count(ITEM_GRANDPARENT),count(distinct(ITEM_GRANDPARENT)) FROM W_PRODUCT_DTS WHERE ITEM_LEVEL = 2
SELECT count(*),count(ITEM_PARENT),count(distinct(ITEM_PARENT)),count(ITEM_GRANDPARENT),count(distinct(ITEM_GRANDPARENT)) FROM W_PRODUCT_DTS WHERE ITEM_LEVEL = 3
 
-- Checking duplicate rows, if any. This should not return any rows.
SELECT item,count(1) FROM W_PRODUCT_DTS GROUP BY item having count(1) > 1

-- Check item_level, should not have NULL, should have values only 1,2 or 3. Make sure Count makes sense
SELECT item_level, count(*) FROM W_PRODUCT_DTS GROUP BY item_level ORDER BY 1

-- Check tran_level, should not have NULL, should have only one value for our purpose. Make sure Count makes sense
SELECT tran_level, count(*) FROM W_PRODUCT_DTS GROUP BY tran_level ORDER BY 1

-- After DTS to DS job executed, check following tables for data
SELECT count(*) FROM W_PRODUCT_DS
SELECT count(*) FROM W_PRODUCT_DS_TL
SELECT count(*) FROM W_PROD_CAT_DHS
SELECT count(*) FROM W_DOMAIN_MEMBER_DS_TL

-- Expect records for "MCAT" which is the product hierarchy labels code
SELECT DOMAIN_CODE, DOMAIN_TYPE_CODE,LANGUAGE_CODE, SRC_LANGUAGE_CODE,count(1) 
FROM W_DOMAIN_MEMBER_DS_TL GROUP BY DOMAIN_CODE, DOMAIN_TYPE_CODE,LANGUAGE_CODE, SRC_LANGUAGE_CODE

-- After Load procedures completed, check following tables
select count(*) from w_prod_cat_dh
select count(*) from w_product_d
select count(*) from w_product_d_tl

-- check for MCAT records for hierachy labels, should align with hierachy level counts
select domain_code,count(*) from W_DOMAIN_MEMBER_LKP_TL group by domain_code


------------------------------------------------
-- Checks for ORGANIZATION.csv file load
------------------------------------------------
-- Verify initial location data before staging it further, row counts should match data file
SELECT * FROM W_INT_ORG_DTS

-- Check total count, all counts should be same.  This can indirectly check for nulls in required columns.
SELECT count(*),count(ORG_NUM),count(distinct(ORG_NUM)),count(ORG_TYPE_CODE),count(CURR_CODE),
count(ORG_HIER10_NUM),count(ORG_HIER11_NUM),count(ORG_HIER12_NUM),count(ORG_HIER13_NUM),
count(ORG_TOP_NUM),count(ORG_DESC),count(ORG_HIER10_DESC),count(ORG_HIER11_DESC),
count(ORG_HIER12_DESC),count(ORG_HIER13_DESC),count(ORG_TOP_DESC) FROM W_INT_ORG_DTS

-- Checking duplicate rows, if any. This should not return any rows.
SELECT ORG_NUM,count(1) FROM W_INT_ORG_DTS GROUP BY ORG_NUM having count(1) > 1

-- Check ORG_TYPE_CODE, CURR_CODE should not have nulls
-- ORG_TYPE_CODE should be either "S" for Store or "W" for Warehouse
SELECT ORG_TYPE_CODE, CURR_CODE, count(*) FROM W_INT_ORG_DTS GROUP BY ORG_TYPE_CODE, CURR_CODE ORDER BY 2,1

-- After DTS to DS job executed, check following tables for expected data
SELECT count(*) FROM W_INT_ORG_DS
SELECT count(*) FROM W_INT_ORG_DS_TL
SELECT count(*) FROM W_INT_ORG_DHS
SELECT count(*) FROM W_DOMAIN_MEMBER_DS_TL
-- Expect records for "RTL_ORG" which is the location hierarchy labels code
SELECT DOMAIN_CODE, DOMAIN_TYPE_CODE,LANGUAGE_CODE, SRC_LANGUAGE_CODE,count(1) 
FROM W_DOMAIN_MEMBER_DS_TL GROUP BY DOMAIN_CODE, DOMAIN_TYPE_CODE,LANGUAGE_CODE, SRC_LANGUAGE_CODE

-- Org hierarchy level validation on DHS table before loading it to DH
-- On first loading a new hierarchy, this must return with zero issues in the level structure.  DO NOT proceed if any issues show up.
SELECT '1',
		          'LOCATION org_hier9_num' LEVEL_DESC,
				  location_a.org_hier9_num C_LEVEL,
				  location_a.org_hier10_num P1_LEVEL,
                  location_a.org_hier11_num P2_LEVEL,
				  location_a.org_hier12_num P3_LEVEL,
				  location_a.org_hier13_num P4_LEVEL,
				  location_a.org_top_num P5_LEVEL
             FROM w_int_org_dhs location_a,  w_int_org_dhs location_b
            WHERE location_a.level_name = 'LOCATION'
              AND location_b.level_name = location_a.level_name
              AND location_a.org_hier9_num = location_b.org_hier9_num
              AND (location_a.org_hier10_num <> location_b.org_hier10_num
                   or location_a.org_hier11_num <> location_b.org_hier11_num
	               or location_a.org_hier12_num <> location_b.org_hier12_num
	               or location_a.org_hier13_num <> location_b.org_hier13_num
	               or location_a.org_top_num <> location_b.org_top_num)
        UNION ALL
           SELECT '1',
		          'DISTRICT org_hier10_num' LEVEL_DESC,
				  location_a.org_hier10_num C_LEVEL,
				  null P1_LEVEL,
				  location_a.org_hier11_num P2_LEVEL,
                  location_a.org_hier12_num P3_LEVEL,
                  location_a.org_hier13_num P4_LEVEL,
                  location_a.org_top_num P5_LEVEL				 
		     FROM w_int_org_dhs location_a,  w_int_org_dhs location_b
            where location_a.level_name = 'DISTRICT'
              and location_b.level_name = location_a.level_name
              and location_a.org_hier10_num = location_b.org_hier10_num
              and (location_a.org_hier11_num <> location_b.org_hier11_num
	               or location_a.org_hier12_num <> location_b.org_hier12_num
	               or location_a.org_hier13_num <> location_b.org_hier13_num
	               or location_a.org_top_num <> location_b.org_top_num)
        UNION ALL
           SELECT '1',
		          'REGION org_hier11_num' LEVEL_DESC,
			  	  location_a.org_hier11_num C_LEVEL,
				  NULL P1_LEVEL,
				  NULL P2_LEVEL,
				  location_a.org_hier12_num P3_LEVEL,
				  location_a.org_hier13_num P4_LEVEL,
                  location_a.org_top_num P5_LEVEL	
		     FROM w_int_org_dhs location_a,  w_int_org_dhs location_b
            where location_a.level_name = 'REGION'
              and location_b.level_name = location_a.level_name
              and location_a.org_hier11_num = location_b.org_hier11_num
              and (location_a.org_hier12_num <> location_b.org_hier12_num
	               or location_a.org_hier13_num <> location_b.org_hier13_num
	               or location_a.org_top_num <> location_b.org_top_num)
        UNION ALL
           SELECT '1',
		          'AREA org_hier12_num' LEVEL_DESC,
				  location_a.org_hier12_num C_LEVEL,
				  NULL P1_LEVEL,
				  NULL P2_LEVEL,
				  NULL P3_LEVEL,
				  location_a.org_hier13_num P4_LEVEL,
                  location_a.org_top_num P5_LEVEL				 
		     FROM w_int_org_dhs location_a,  w_int_org_dhs location_b
            where location_a.level_name = 'AREA'
              and location_b.level_name = location_a.level_name
              and location_a.org_hier12_num = location_b.org_hier12_num
              and (location_a.org_hier13_num <> location_b.org_hier13_num
	               or location_a.org_top_num <> location_b.org_top_num)
        UNION ALL
           SELECT '1',
		          'CHAIN org_hier13_num' LEVEL_DESC,
				  location_a.org_hier13_num C_LEVEL,
				  NULL P1_LEVEL,
				  NULL P2_LEVEL,
				  NULL P3_LEVEL,
				  NULL P4_LEVEL,
				  location_a.org_top_num P5_LEVEL
		     FROM w_int_org_dhs location_a,  w_int_org_dhs location_b
            where location_a.level_name = 'CHAIN'
              and location_b.level_name = location_a.level_name
              and location_a.org_hier13_num = location_b.org_hier13_num
              and location_a.org_top_num <> location_b.org_top_num;

-- After Load procedures completed, check following tables for final dimension data
select count(*) from w_int_org_dh
select count(*) from w_int_org_d
select count(*) from w_int_org_d_tl

-- check for RTL_ORG records for the descriptions of hierarchy levels
select domain_code,count(*) from W_DOMAIN_MEMBER_LKP_TL group by domain_code


------------------------------------------------
-- Checks on EXCH_RATE.csv file load
------------------------------------------------
select * from w_exch_rate_dts

select * from w_exch_rate_gs

select * from w_exch_rate_g


------------------------------------------------
-- Checks on ATTR.csv and PROD_ATTR.csv file load
------------------------------------------------
select * from w_attr_dts
select * from w_product_attr_dts
select * from w_rtl_item_grp1_ds
select * from w_rtl_item_grp1_d

------------------------------------------------
-- Check on W_DOMAIN_MEMBER_LKP_TL issues while loading dimensions
------------------------------------------------
--- DOMAIN MEMBER DUPLICATE RECORD ERROR ---
SELECT DOMAIN_CODE,DOMAIN_TYPE_CODE,DOMAIN_MEMBER_CODE,count(1) FROM W_DOMAIN_MEMBER_DS_TL GROUP BY DOMAIN_CODE,DOMAIN_TYPE_CODE,DOMAIN_MEMBER_CODE having count(1) > 1


SELECT DOMAIN_TYPE_CODE, DOMAIN_MEMBER_CODE, DOMAIN_MEMBER_NAME FROM W_DOMAIN_MEMBER_DS_TL WHERE (DOMAIN_CODE,DOMAIN_TYPE_CODE,DOMAIN_MEMBER_CODE) IN
(SELECT DOMAIN_CODE,DOMAIN_TYPE_CODE,DOMAIN_MEMBER_CODE FROM W_DOMAIN_MEMBER_DS_TL GROUP BY DOMAIN_CODE,DOMAIN_TYPE_CODE,DOMAIN_MEMBER_CODE having count(1) > 1)
ORDER BY 1,2,3


------------------------------------------------
-- Checks on SALES.csv file
------------------------------------------------
-- Verify initial sales data before staging it further, check all columns are populated with expected values (i.e. CTX was properly formed)
select * from W_RTL_SLS_TRX_IT_LC_DY_FTS

-- Should match the record count from last loaded SALES.csv file
select count(*) from W_RTL_SLS_TRX_IT_LC_DY_FTS

-- Should match W_RTL_SLS_TRX_IT_LC_DY_FTS count
select count(*) from W_RTL_SLS_TRX_IT_LC_DY_FS

-- Should match or be close W_RTL_SLS_TRX_IT_LC_DY_FS count
select count(*) from W_RTL_SLS_TRX_IT_LC_DY_F

-- Look for sls_trx_id in stage but not final sales table
select * from
(
select fs.fs_trx_id, tg_trx_id
from
(select /*+ parallel */ sls_trx_id fs_trx_id from w_rtl_sls_trx_it_lc_dy_fs) fs left outer join
(select sls_trx_id tg_trx_id from w_rtl_sls_trx_it_lc_dy_f union all select sls_trx_id from e$_w_rtl_sls_trx_it_lc_dy_tmp) tg
on (fs.fs_trx_id = tg.tg_trx_id)
) where tg_trx_id is null;

-- Look for sls_trx_id/prod_it_wid in stage but not final sales table
select * from
(
select fs.prod_it_wid, fs.fs_trx_id,  prod_wid, tg_trx_id
from
(select /*+ parallel */ prod_it_wid, sls_trx_id fs_trx_id from w_rtl_sls_trx_it_lc_dy_tmp) fs left outer join
(select prod_wid, sls_trx_id tg_trx_id from w_rtl_sls_trx_it_lc_dy_f) tg
on (fs.prod_it_wid = tg.prod_wid and fs.fs_trx_id = tg.tg_trx_id)
) where tg_trx_id is null or prod_wid is null;


------------------------------------------------
-- Checks on INVENTORY.csv file
------------------------------------------------
-- Verify initial inventory data before staging it further, check all columns are populated with expected values (i.e. CTX was properly formed)
select * from W_RTL_INV_IT_LC_DY_FTS

-- Should match the record count from last loaded INVENTORY.csv file
select count(*) from W_RTL_INV_IT_LC_DY_FTS

-- Check that data is making it to target tables after load
select count(*) from W_RTL_INV_IT_LC_DY_F
select count(*) from W_RTL_INV_IT_LC_WK_A

-- inventory validation check after rejected records occur (Support needs to run this currently, APEX doesn't provide E$ or TMP tables)
SELECT 'E$_W_RTL_INV_IT_LC_DY_TMP','PROD_IT_NUM' DIMM_NAME,PROD_IT_NUM DIMM_VALUE ,NULL,CHECK_DATE,EFFECTIVE_FROM_DT,EFFECTIVE_TO_DT,'INVALID_PROD_IT_NUM' INVALID_REASON,NULL,NULL
FROM
(SELECT DISTINCT PROD_IT_NUM,TRUNC(CHECK_DATE) CHECK_DATE, NULL EFFECTIVE_FROM_DT, NULL EFFECTIVE_TO_DT FROM E$_W_RTL_INV_IT_LC_DY_TMP WHERE PROD_IT_NUM NOT IN (SELECT PROD_IT_NUM FROM W_PRODUCT_D_RTL_TMP) UNION ALL SELECT DISTINCT DIMM.PROD_IT_NUM, TRUNC(ERR.CHECK_DATE) CHECK_DATE, DIMM.SRC_EFF_FROM_DT EFFECTIVE_FROM_DT, DIMM.SRC_EFF_TO_DT EFFECTIVE_TO_DT from W_PRODUCT_D_RTL_TMP DIMM, E$_W_RTL_INV_IT_LC_DY_TMP ERR WHERE DIMM.PROD_IT_NUM = ERR.PROD_IT_NUM AND (ERR.DAY_DT > DIMM.SRC_EFF_TO_DT OR ERR.DAY_DT < DIMM.SRC_EFF_FROM_DT) AND NOT EXISTS (SELECT 1 FROM W_PRODUCT_D_RTL_TMP DIMM1 WHERE ERR.PROD_IT_NUM = DIMM1.PROD_IT_NUM AND (ERR.DAY_DT <= DIMM1.SRC_EFF_TO_DT AND ERR.DAY_DT >= DIMM.SRC_EFF_FROM_DT)))
UNION ALL
SELECT 'E$_W_RTL_INV_IT_LC_DY_TMP','ORG_NUM' DIMM_NAME,ORG_NUM DIMM_VALUE ,NULL,CHECK_DATE,EFFECTIVE_FROM_DT,EFFECTIVE_TO_DT,'INVALID_ORG_DH_NUM' INVALID_REASON,NULL,NULL
FROM
(SELECT DISTINCT ORG_NUM,TRUNC(CHECK_DATE) CHECK_DATE, NULL EFFECTIVE_FROM_DT, NULL EFFECTIVE_TO_DT FROM E$_W_RTL_INV_IT_LC_DY_TMP WHERE ORG_NUM NOT IN (SELECT ORG_NUM FROM W_INT_ORG_DH_RTL_TMP) UNION ALL SELECT DISTINCT DIMM.ORG_NUM, TRUNC(ERR.CHECK_DATE) CHECK_DATE, DIMM.EFFECTIVE_FROM_DT EFFECTIVE_FROM_DT, DIMM.EFFECTIVE_TO_DT EFFECTIVE_TO_DT from W_INT_ORG_DH_RTL_TMP DIMM, E$_W_RTL_INV_IT_LC_DY_TMP ERR WHERE DIMM.ORG_NUM = ERR.ORG_NUM AND (ERR.DAY_DT > DIMM.EFFECTIVE_TO_DT OR ERR.DAY_DT < DIMM.EFFECTIVE_FROM_DT) AND NOT EXISTS (SELECT 1 FROM W_INT_ORG_DH_RTL_TMP DIMM1 WHERE ERR.ORG_NUM = DIMM1.ORG_NUM AND (ERR.DAY_DT <= DIMM1.EFFECTIVE_TO_DT AND ERR.DAY_DT >= DIMM.EFFECTIVE_FROM_DT)))
UNION ALL
SELECT 'E$_W_RTL_INV_IT_LC_DY_TMP','ORG_NUM' DIMM_NAME,ORG_NUM DIMM_VALUE,NULL,CHECK_DATE,EFFECTIVE_FROM_DT,EFFECTIVE_TO_DT,'INVALID_ORG_NUM' INVALID_REASON,NULL,NULL
FROM
(SELECT DISTINCT ORG_NUM,TRUNC(CHECK_DATE) CHECK_DATE, NULL EFFECTIVE_FROM_DT, NULL EFFECTIVE_TO_DT FROM E$_W_RTL_INV_IT_LC_DY_TMP WHERE ORG_NUM NOT IN (SELECT ORG_NUM FROM W_INT_ORG_D_RTL_TMP) UNION ALL SELECT DISTINCT DIMM.ORG_NUM, TRUNC(ERR.CHECK_DATE) CHECK_DATE, DIMM.EFFECTIVE_FROM_DT EFFECTIVE_FROM_DT, DIMM.EFFECTIVE_TO_DT EFFECTIVE_TO_DT from W_INT_ORG_D_RTL_TMP DIMM, E$_W_RTL_INV_IT_LC_DY_TMP ERR WHERE DIMM.ORG_NUM = ERR.ORG_NUM AND (ERR.DAY_DT > DIMM.EFFECTIVE_TO_DT OR ERR.DAY_DT < DIMM.EFFECTIVE_FROM_DT))
UNION ALL
SELECT 'E$_W_RTL_INV_IT_LC_DY_TMP','DAY_DT' DIMM_NAME,TO_CHAR(DAY_DT,'YYYYMMDD') DIMM_VALUE, NULL,CHECK_DATE,EFFECTIVE_FROM_DT,EFFECTIVE_TO_DT,'INVALID_DAY_DT' INVALID_REASON,NULL,NULL
FROM
(SELECT DISTINCT DAY_DT,TRUNC(CHECK_DATE) CHECK_DATE, NULL EFFECTIVE_FROM_DT, NULL EFFECTIVE_TO_DT FROM E$_W_RTL_INV_IT_LC_DY_TMP WHERE (DATASOURCE_NUM_ID, DAY_DT)  NOT IN (SELECT MDAY.DATASOURCE_NUM_ID,MCAL_DAY_DT FROM W_MCAL_DAY_D MDAY,W_MCAL_CONTEXT_G MTEXT WHERE MDAY.MCAL_CAL_WID=MTEXT.MCAL_CAL_WID AND MTEXT.ORG_ID IN (Select PARAM_VALUE From C_ODI_PARAM Where PARAM_NAME = 'ORG_ID' AND SCENARIO_NAME = 'GLOBAL') AND MTEXT.CALENDAR_ID IN (Select PARAM_VALUE From C_ODI_PARAM Where PARAM_NAME = 'CALENDAR_ID' AND SCENARIO_NAME = 'GLOBAL')));