You are on page 1of 40

Introduction to Snowflake Warehouses |

SNOWFLAKE VIRTUAL WAREHOUSES A virtual warehouse is a cluster of compute resources


in Snowflake. It is often referred to simply as a “warehouse”.

A warehouse provides the below required resources : CPU Memory Temporary storage
Warehouse perform the following operations in a Snowflake session:

Executing SQL SELECT statements (e.g. Retrieving rows from tables and views).

Performing DML operations, such as: Updating rows in tables (DELETE , INSERT , UPDATE
and MERGE).

Loading data into tables (COPY INTO table_name. Unloading data from tables (COPY INTO
location_name

A virtual warehouse is a cluster of compute resources in Snowflake.

It is often referred to simply as a “warehouse”. A warehouse provides the below required


resources :

CPU Memory Temporary storage Warehouse perform the following operations in a Snowflake
session: Executing SQL SELECT statements (e.g. Retrieving rows from tables and views).
Performing DML operations,

such as: Updating rows in tables (DELETE , INSERT , UPDATE and MERGE). Loading data
into tables (COPY INTO table_name.

Unloading data from tables (COPY INTO location_name


In this video, I am going to show you How to Delete/Remove Duplicate Records/Rows from
Snowflake Table. ⌚Timestamps 00:00 Introduction 02:06 Case 1 06:03 Case 2 08:48 Case 3
13:18 Case 4 19:14 Case 5 22:48 Outro --How to Delete Duplicate Records/Rows from
Snowflake Table

create or replace TABLE DEPT ( DEPTNO NUMBER(38,0), DNAME VARCHAR(16777216),


LOC VARCHAR(16777216) );

select * from dept; insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW
YORK'), (20, 'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO'), (40, 'OPERATIONS',
'BOSTON');

insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW YORK'), (20,
'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO') ;

select * from dept order by deptno desc; --Case 1: --Step 1: Create a new table dept_distinct

using DISTINCT Clause --Step 2: Drop existing table DEPT --Step 3: Rename new table with
Original table dept.

CREATE TABLE DEPT_DISTINCT AS SELECT DISTINCT * from DEPT; DROP TABLE dept;
alter table dept_distinct rename to dept; select * from dept order by deptno desc; --

Case 2: --Step 1: Create a new table DEPT_DISTINCT using DISTINCT Clause –

Step 2: ALTER TABLE using SWAP WITH method --Step 3: Drop table created from step 1 --
Prepare data truncate table dept;

insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW YORK'), (20,
'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO'), (40, 'OPERATIONS', 'BOSTON');

insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW YORK'), (20,
'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO') ;
CREATE TABLE DEPT_DISTINCT AS SELECT DISTINCT * from DEPT;

select * from DEPT_DISTINCT; ALTER TABLE DEPT SWAP WITH DEPT_DISTINCT;

select * from DEPT_DISTINCT; DROP TABLE DEPT_DISTINCT;

SELECT * FROM DEPT;

--Case 3: Use ROW_NUMBER Analytic function if we have any one column value with unique
value.

--Step 1: Use delete statement alter table dept add column insert_ts timestamp; truncate table
dept; select * from dept order by deptno, insert_ts desc; DELETE FROM DEPT WHERE (deptno,
insert_ts) not in (SELECT deptno, insert_ts FROM ( SELECT *, ROW_NUMBER() OVER
(PARTITION BY deptno ORDER BY insert_ts desc) RNO FROM DEPT ) WHERE RNO = 1 );
select * from dept order by deptno, insert_ts desc; ---- --Case 4: Use ROW_NUMBER Analytic
function with QUALIFY if we have any one column value with unique value. --Step 1: Use delete
statement truncate table dept; insert into DEPT (DEPTNO, DNAME, LOC,INSERT_TS)
values(10, 'ACCOUNTING', 'NEW YORK',current_date() -1), (20, 'RESEARCH',
'DALLAS',current_date() -1), (30, 'SALES', 'CHICAGO',current_date() -1), (30, 'SALES',
'CHICAGO',current_date() -2), (40, 'OPERATIONS', 'BOSTON',current_date() -1); insert into
DEPT (DEPTNO, DNAME, LOC,INSERT_TS) values(10, 'ACCOUNTING', 'NEW
YORK',current_date()), (20, 'RESEARCH', 'DALLAS',current_date()), (30, 'SALES',
'CHICAGO',current_date()) ; select * from dept order by deptno, insert_ts desc; DELETE FROM
DEPT WHERE (deptno, insert_ts) not in (SELECT deptno, insert_ts FROM ( SELECT *,
ROW_NUMBER() OVER (PARTITION BY deptno ORDER BY insert_ts desc) RNO FROM DEPT
QUALIFY RNO=1 ) ); select * from dept order by deptno, insert_ts desc; ---- here is problem if no
column with unique value truncate table dept; insert into DEPT (DEPTNO, DNAME,
LOC,INSERT_TS) values(10, 'ACCOUNTING', 'NEW YORK',current_date() -1), (20,
'RESEARCH', 'DALLAS',current_date() -1), (30, 'SALES', 'CHICAGO',current_date() -1), (40,
'OPERATIONS', 'BOSTON',current_date() -1), (40, 'OPERATIONS', 'BOSTON',current_date() -
1); insert into DEPT (DEPTNO, DNAME, LOC,INSERT_TS) values(10, 'ACCOUNTING', 'NEW
YORK',current_date()), (20, 'RESEARCH', 'DALLAS',current_date()), (30, 'SALES',
'CHICAGO',current_date()) ; select * from dept order by deptno, insert_ts desc; DELETE FROM
DEPT WHERE (deptno, insert_ts) not in (SELECT deptno, insert_ts FROM ( SELECT *,
ROW_NUMBER() OVER (PARTITION BY deptno ORDER BY insert_ts desc) RNO FROM DEPT
QUALIFY RNO=1 ) ) ; select * from dept order by deptno, insert_ts desc; -- in this case we have
to use distinct option -- Case 5: ---GROUP BY Clause to Remove Duplicate alter table dept drop
column insert_ts; truncate table dept; insert into DEPT (DEPTNO, DNAME, LOC) values(10,
'ACCOUNTING', 'NEW YORK'), (20, 'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO'), (40,
'OPERATIONS', 'BOSTON') ; insert into DEPT (DEPTNO, DNAME, LOC) values(10,
'ACCOUNTING', 'NEW YORK'), (20, 'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO') ; select
* from dept order by deptno desc; CREATE TABLE dept_group_by as SELECT deptno, dname,
loc FROM DEPT group by deptno, dname, loc ; select * from dept_group_by order by deptno
desc; Alter table dept swap with dept_group_by; select * from dept; select * from dept_group_by;
drop table dept_group_by;

How to Delete Duplicate Records/Rows from Snowflake Table create or replace TABLE DEPT
( DEPTNO NUMBER(38,0), DNAME VARCHAR(16777216), LOC VARCHAR(16777216) );
select * from dept; insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW
YORK'), (20, 'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO'), (40, 'OPERATIONS',
'BOSTON'); insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW
YORK'), (20, 'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO')
--Case 1: --Step 1: Create a new table dept_distinct using DISTINCT Clause

--Step 2: Drop existing table DEPT

--Step 3: Rename new table with Original table dept.

CREATE TABLE DEPT_DISTINCT AS SELECT DISTINCT * from DEPT;

DROP TABLE dept; alter table dept_distinct rename to dept; select * from dept order by deptno
desc; --

Case 2: --Step 1: Create a new table DEPT_DISTINCT using DISTINCT Clause

--Step 2: ALTER TABLE using SWAP WITH method

--Step 3: Drop table created from step 1

select * from dept order by deptno, insert_ts desc; DELETE FROM DEPT WHERE (deptno,
insert_ts) not in (SELECT deptno, insert_ts FROM ( SELECT *, ROW_NUMBER() OVER
(PARTITION BY deptno ORDER BY insert_ts desc) RNO FROM DEPT QUALIFY RNO=1 ) ) ;

select * from dept order by deptno, insert_ts desc; -- in this case we have to use distinct option --
Case 5: ---GROUP BY Clause to Remove Duplicate alter table dept drop column insert_ts;
truncate table dept; insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING',
'NEW YORK'), (20, 'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO'), (40, 'OPERATIONS',
'BOSTON') ;

insert into DEPT (DEPTNO, DNAME, LOC) values(10, 'ACCOUNTING', 'NEW YORK'), (20,
'RESEARCH', 'DALLAS'), (30, 'SALES', 'CHICAGO') ;

select * from dept order by deptno desc; CREATE TABLE dept_group_by as SELECT deptno,
dname, loc FROM DEPT group by deptno, dname, loc ;
select * from dept_group_by order by deptno desc; Alter table dept swap with dept_group_by;
select * from dept; select * from dept_group_by; drop table dept_group_by;

difference between regular schemas and managed schemas in snowflake *Regular Schema*
use role sysadmin; drop database if exists test1; create or replace database test1; create or
replace schema my_schema ; use role securityadmin; create or replace role dev_engineer_fr;
create or replace role analyst_fr; create or replace role dev_service_elt_fr; use role sysadmin;
grant usage on database test1 to role dev_engineer_fr; grant usage on schema
test1.my_schema to role dev_engineer_fr; grant create table on schema test1.my_schema to
role dev_engineer_fr; grant usage on database test1 to role dev_service_elt_fr; grant usage on
schema test1.my_schema to role dev_service_elt_fr; grant create table on schema
test1.my_schema to role dev_service_elt_fr; grant usage on database test1 to role analyst_fr;
grant usage on schema test1.my_schema to role analyst_fr; grant usage on warehouse
compute_wh to role dev_engineer_fr; grant usage on warehouse compute_wh to role analyst_fr;
grant usage on warehouse compute_wh to role dev_service_elt_fr; use role securityadmin; grant
role dev_engineer_fr to user venkat; grant role analyst_fr to user venkat; grant role
dev_service_elt_fr to user venkat; use role dev_engineer_fr; create or replace table emp( empid
number , ename varchar, sal float); select current_role(); grant select on emp to role analyst_fr;
grant all on emp to role dev_service_elt_fr; use role sysadmin; grant select on
test1.my_schema.emp to role analyst_fr; grant select on emp to role analyst_fr; use role
sysadmin; grant select on test.managed_schema.emp to role sysadmin; select * from
test.managed_schema.emp; *Managed Schema* use role sysadmin; drop database if exists
test; create database test; create schema managed_schema with managed access; use role
securityadmin; --create role dev_engineer_fr; --create role analyst_fr; grant usage on database
test to role dev_engineer_fr; grant usage on schema test.managed_schema to role
dev_engineer_fr; grant create table on schema test.managed_schema to role dev_engineer_fr;
grant usage on database test to role dev_service_elt_fr; grant usage on schema
test.managed_schema to role dev_service_elt_fr; grant create table on schema
test.managed_schema to role dev_service_elt_fr; grant usage on database test to role
analyst_fr; grant usage on schema test.managed_schema to role analyst_fr; grant usage on
warehouse compute_wh to role dev_engineer_fr; grant usage on warehouse compute_wh to role
dev_service_elt_fr; grant usage on warehouse compute_wh to role analyst_fr; grant role
dev_engineer_fr to user venkat; grant role dev_service_elt_fr to user venkat; grant role analyst_fr
to user venkat; use role dev_engineer_fr; use database test; use schema managed_schema;
create or replace table emp( empid number , ename varchar, sal float); grant select on emp to
role analyst_fr; use role sysadmin; grant select on test.managed_schema.emp to role analyst_fr;
drop table test.managed_schema.emp; select * from test.managed_schema.emp; use role
dev_engineer_fr; select * from test.managed_schema.emp; drop table
test.managed_schema.emp;
Working with CTEs (Common Table Expressions) in Snowflake

Code: ----------- DROP DATABASE IF EXISTS RAMU; CREATE DATABASE IF NOT EXISTS
RAMU; USE RAMU; CREATE TABLE Smiths (Name VARCHAR(100),Father VARCHAR(100) ,
Mother VARCHAR(100));

Insert Into Smiths values ('Roger',NULL,NULL),('My',NULL,NULL),

('Jane','Roger','My'),('Mike',NULL,NULL), ('Matt','Roger','My'),('Jill','Mike','Jane'),
('John','Mike','Jane');

select * from Smiths; select FATHER,MOTHER from Smiths where NAME='John'; with
hellotesting as (select FATHER,MOTHER from Smiths where NAME='John')

select s.Name,s.FATHER as original_father,s.MOTHER as original_mother,p.FATHER as


cte_father,p.MOTHER as cte_mother from Smiths s cross join hellotesting p;

with hellotesting as (select FATHER,MOTHER from Smiths where NAME='Jill') select


s.FATHER,s.MOTHER from Smiths s cross join hellotesting p where (s.Name=p.FATHER or
s.Name=p.MOTHER);

--Multilevel CTE with parents as (select FATHER,MOTHER from Smiths where NAME='Jill'),
grandparents as (select FATHER,MOTHER from Smiths where (NAME =(select FATHER FROM
parents) or NAME=(select MOTHER FROM parents)))

select FATHER,MOTHER from grandparents;

how to execute multiple SQL statements in bulk, using a concatenated select statement and a
stored procedure.

use role accountadmin;

use warehouse compute_wh;

use database demo_db;

use schema public;

create or replace table test1(a number);

create or replace table test2(a number);

insert into test1 values (1); insert into test2 values (2);
select * from test1;

select * from test2;

create or replace view test1_vw as select * from test1;

create or replace view test2_vw as select * from test2; select * from information_schema.tables
where TABLE_SCHEMA = 'PUBLIC' and table_type like '%TABLE' and table_name like 'TEST
%';

select 'truncate table if exists ' || TABLE_SCHEMA ||'.'|| table_name ||';' as sqlcommand from
information_schema.tables where TABLE_SCHEMA = 'PUBLIC' and table_type like '%TABLE'
and table_name like 'TEST%' ;

You might also like