Welcome to Scribd!

Skip carousel

Conv

Uploaded by

Hung Nguyen

0% found this document useful (0 votes)

8 views4 pages

Copyright

Available Formats

DOCX, PDF, TXT or read online from Scribd

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Report this Document

Copyright:

Available Formats

Download as DOCX, PDF, TXT or read online from Scribd

Flag for inappropriate content

0% found this document useful (0 votes)

8 views4 pages

Conv

Uploaded by

Hung Nguyen

Copyright:

Available Formats

Download as DOCX, PDF, TXT or read online from Scribd

Flag for inappropriate content

Jump to Page

You are on page 1of 4

Search inside document

#define ROWS_BLOCKDIM_X 16

#define ROWS_BLOCKDIM_Y 4

#define ROWS_RESULT_STEPS 8

#define ROWS_HALO_STEPS 1

global void convolutionRowsKernel(

float *d_Dst,

float *d_Src,

int imageW,

int imageH,

int pitch

){

shared float s_Data[ROWS_BLOCKDIM_Y][(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X];

//Offset to the left halo edge

const int baseX = (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x;

const int baseY = blockIdx.y * ROWS_BLOCKDIM_Y + threadIdx.y;

d_Src += baseY * pitch + baseX;

d_Dst += baseY * pitch + baseX;

//Load main data

#pragma unroll

for(int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++)

s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = d_Src[i * ROWS_BLOCKDIM_X];

//Load left halo

#pragma unroll

for(int i = 0; i < ROWS_HALO_STEPS; i++)

s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (baseX >= -i * ROWS_BLOCKDIM_X ) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;

//Load right halo

#pragma unroll

for(int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++)

s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (imageW - baseX > i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0;

//Compute and store results

__syncthreads();

#pragma unroll

for(int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++){

float sum = 0;

#pragma unroll

for(int j = -KERNEL_RADIUS; j <= KERNEL_RADIUS; j++)

sum += c_Kernel[KERNEL_RADIUS - j] * s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j];

d_Dst[i * ROWS_BLOCKDIM_X] = sum;

}
#define COLUMNS_BLOCKDIM_X 16

#define COLUMNS_BLOCKDIM_Y 8

#define COLUMNS_RESULT_STEPS 8

#define COLUMNS_HALO_STEPS 1

global void convolutionColumnsKernel(

float *d_Dst,

float *d_Src,

int imageW,

int imageH,

int pitch

){

shared float s_Data[COLUMNS_BLOCKDIM_X][(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 1];

//Offset to the upper halo edge

const int baseX = blockIdx.x * COLUMNS_BLOCKDIM_X + threadIdx.x;

const int baseY = (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y;

d_Src += baseY * pitch + baseX;

d_Dst += baseY * pitch + baseX;

//Main data

#pragma unroll

for(int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++)

s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = d_Src[i * COLUMNS_BLOCKDIM_Y * pitch];

//Upper halo

#pragma unroll

for(int i = 0; i < COLUMNS_HALO_STEPS; i++)

s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (baseY >= -i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] : 0;

//Lower halo

#pragma unroll

for(int i = COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS + COLUMNS_HALO_STEPS; i++)

s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y]= (imageH - baseY > i * COLUMNS_BLOCKDIM_Y) ? d_Src[i * COLUMNS_BLOCKDIM_Y * pitch] :

//Compute and store results

__syncthreads();

#pragma unroll

for(int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++){

float sum = 0;

#pragma unroll

for(int j = -KERNEL_RADIUS; j <= KERNEL_RADIUS; j++)

sum += c_Kernel[KERNEL_RADIUS - j] * s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j];

d_Dst[i * COLUMNS_BLOCKDIM_Y * pitch] = sum;

5 6314519115386063645
Document16 pages
5 6314519115386063645
Tharun Pattela
No ratings yet
Assignment - 5 111903109
Document3 pages
Assignment - 5 111903109
Laugh Louder
No ratings yet
CUDA
Document3 pages
CUDA
killua gojo
No ratings yet
Arduino C Code For Image Dehazing
Document25 pages
Arduino C Code For Image Dehazing
nasim_majoka803
No ratings yet
E Mirror Grid
Document2 pages
E Mirror Grid
Sarievo
No ratings yet
ADXL345 Sensor I2C Interface
Document5 pages
ADXL345 Sensor I2C Interface
avi kishu
No ratings yet
CG ASsignment
Document20 pages
CG ASsignment
Goyal Aditya
No ratings yet
Test Ads7841.c
Document2 pages
Test Ads7841.c
Sandeep Kashyap
No ratings yet
JM International School: C++ Practical File
Document39 pages
JM International School: C++ Practical File
Solanki
No ratings yet
HB - Games Test
Document4 pages
HB - Games Test
Marcelino López
No ratings yet
C/C++ Source Codes: Programme For E.G. of Width and Setew Functions
Document10 pages
C/C++ Source Codes: Programme For E.G. of Width and Setew Functions
Ismail Wizzy
No ratings yet
CN Lab Manual1
Document36 pages
CN Lab Manual1
kumarishu125
No ratings yet
Running Text
Document7 pages
Running Text
Satu Satu
No ratings yet
Practicles - Solution Distributed System (Google Docs)
Document19 pages
Practicles - Solution Distributed System (Google Docs)
Chhavi garg
No ratings yet
Message
Document9 pages
Message
Dharma Sandoval
No ratings yet
Accessing EDID Information From UEFI Shell
Document7 pages
Accessing EDID Information From UEFI Shell
Finnbarr P. Murphy
No ratings yet
All Ds Solution
Document42 pages
All Ds Solution
mukul_2526
100% (1)
Tag Bizdev
Document70 pages
Tag Bizdev
rijuvan
No ratings yet
Esp32 Wifi Version - Ino
Document5 pages
Esp32 Wifi Version - Ino
Ziper Kjj
No ratings yet
Budhha
Document8 pages
Budhha
abhisheksszz
No ratings yet
MCA Mini Project Documentation Tikamchand
Document8 pages
MCA Mini Project Documentation Tikamchand
Shripad Tanksal
No ratings yet
Practical File of Computer Science
Document61 pages
Practical File of Computer Science
Ashutosh Kumar
No ratings yet
Javascript For Generating Snow Particles
Document2 pages
Javascript For Generating Snow Particles
BalakrishnaPrasadGanne
No ratings yet
Pocket C
Document65 pages
Pocket C
PauloConstantino
No ratings yet
Program 9: Create A Database Regarding Its Indoor Patients
Document13 pages
Program 9: Create A Database Regarding Its Indoor Patients
Neeraj Falwariya
No ratings yet
A 9 SysytemCallLab
Document6 pages
A 9 SysytemCallLab
daveharsh251102
No ratings yet
Jss Academy of Technical Education: Multithreading
Document11 pages
Jss Academy of Technical Education: Multithreading
Trishala Kumari
No ratings yet
3D Reconstruction of Medical Image Based On OpenGL JOC-2902-24 Form
Document10 pages
3D Reconstruction of Medical Image Based On OpenGL JOC-2902-24 Form
Luminita Popa
No ratings yet
New1 Solution
Document17 pages
New1 Solution
Chhavi garg
No ratings yet
Computer Architecture and Organization
Document20 pages
Computer Architecture and Organization
Sabbasani Sai Bhaskar Reddy
No ratings yet
Vulnerability Assessment Report
Document23 pages
Vulnerability Assessment Report
Asim Arunava Sahoo
No ratings yet
Run C
Document18 pages
Run C
craig.yagami
No ratings yet
Computer Network LAB 5
Document19 pages
Computer Network LAB 5
zeeshanahmad12030
No ratings yet
Script
Document7 pages
Script
riswanto hanafi
No ratings yet
Setup: "Ardudroid 0.12 Alpha by Techbitar (2013) "
Document4 pages
Setup: "Ardudroid 0.12 Alpha by Techbitar (2013) "
luismena09051982
No ratings yet
Main Socket
Document6 pages
Main Socket
Kiên Lê Hữu
No ratings yet
Formatted Console I/O Operation Coding
Document29 pages
Formatted Console I/O Operation Coding
aloneface
No ratings yet
GameGuard StringDecryption
Document6 pages
GameGuard StringDecryption
ZzcarloszZ
No ratings yet
1.create A Single Linked List and Check Whether It Is Palindrome
Document11 pages
1.create A Single Linked List and Check Whether It Is Palindrome
D21PA13 - SWETHA N
No ratings yet
Rezolvare:: - de Facut Extrude in Directx
Document9 pages
Rezolvare:: - de Facut Extrude in Directx
Taisia Negară
No ratings yet
Sorting Methods
Document6 pages
Sorting Methods
iman
No ratings yet
10 Programs
Document21 pages
10 Programs
Aakash Kansal
No ratings yet
Count
Document1 page
Count
João Paulo Calizotti
No ratings yet
Assignment 23: Sorting of An Unsorted Array by Using Radix Sort
Document8 pages
Assignment 23: Sorting of An Unsorted Array by Using Radix Sort
Ln Amitav Biswas
No ratings yet
I 2 C
Document3 pages
I 2 C
Trieu Huynh
No ratings yet
Exp 4
Document9 pages
Exp 4
05DAVEHET
No ratings yet
CN Codes
Document39 pages
CN Codes
Bhavesh Gaba
No ratings yet
Lab2 Grafica
Document10 pages
Lab2 Grafica
Mihai
No ratings yet
Input: Output: 1. Sub String Program
Document8 pages
Input: Output: 1. Sub String Program
SWETHA
No ratings yet
Mulmatrix Cu
Document3 pages
Mulmatrix Cu
farhanhubble
No ratings yet
C++ Program
Document31 pages
C++ Program
Mani Singh
No ratings yet
3D Car Game
Document11 pages
3D Car Game
Xarope
No ratings yet
Centroide C
Document2 pages
Centroide C
nicolasmatias.fdz
No ratings yet
Lab Assignment-3 19BCT0015 CH - Tharun Tej: Code
Document20 pages
Lab Assignment-3 19BCT0015 CH - Tharun Tej: Code
Tharun Tej
No ratings yet
Relational, Logical and Bitwise Operators
Document40 pages
Relational, Logical and Bitwise Operators
ganapathy2010sv
No ratings yet
Miningone Id Pass
Document4 pages
Miningone Id Pass
R. V. Banna
No ratings yet
Nis Code
Document27 pages
Nis Code
Amruta Avhale
No ratings yet
Oops
Document9 pages
Oops
Jayaprakash Manoharan
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Build your own Blockchain: Make your own blockchain and trading bot on your pc
From Everand
Build your own Blockchain: Make your own blockchain and trading bot on your pc
Magelan Cybersecurity
No ratings yet
Birlasoft Resume Sample
Document3 pages
Birlasoft Resume Sample
Prince Kumar Chaudhary
No ratings yet
Name: Marjiesel Pearlbhel N. Cayanan, Zekhyna Nhatallie Reyes, Clarence Fitz Javate Section: 11 STEM 28 WW4.1 - Truthtable
Document3 pages
Name: Marjiesel Pearlbhel N. Cayanan, Zekhyna Nhatallie Reyes, Clarence Fitz Javate Section: 11 STEM 28 WW4.1 - Truthtable
Clarence Javate
No ratings yet
Mohammed Jaber
Document768 pages
Mohammed Jaber
Mohammed Omar Jaber
No ratings yet
The Impact of Technology On Learning Environmnent
Document7 pages
The Impact of Technology On Learning Environmnent
Ahmad Ali
75% (4)
Finite Element in PLS-CADD
Document11 pages
Finite Element in PLS-CADD
Muhammad usman hayat
No ratings yet
MiniMate Manual
Document58 pages
MiniMate Manual
alexandly
100% (1)
SO Snippet ENASE
Document10 pages
SO Snippet ENASE
amanswaraj007
No ratings yet
Example 2:: Find The Zeroes of The Quadratic Polynomial X
Document8 pages
Example 2:: Find The Zeroes of The Quadratic Polynomial X
Shafeequ Rahman
No ratings yet
LCD TV Service Manual: MODEL: RZ-20LA60
Document26 pages
LCD TV Service Manual: MODEL: RZ-20LA60
gmuita
No ratings yet
Intro Stats 5th Edition by Richard D de Veaux Ebook PDF
Document42 pages
Intro Stats 5th Edition by Richard D de Veaux Ebook PDF
delmar.jackson884
97% (36)
Data Checking PDF
Document9 pages
Data Checking PDF
denzelfawn
No ratings yet
Uor It
Document57 pages
Uor It
Umashankar Sharma
No ratings yet
Config Cisco 881 Isla Partida
Document4 pages
Config Cisco 881 Isla Partida
Johan Frederick Foitzick
No ratings yet
Slot07 08 09 Recursion
Document82 pages
Slot07 08 09 Recursion
Công Quân
No ratings yet
Ministry of Communications & Informatics: Libyan National Frequency Plan (LNFP)
Document255 pages
Ministry of Communications & Informatics: Libyan National Frequency Plan (LNFP)
Ali Abushhiwa
No ratings yet
François Roche - R&Sie (N) Architects
Document14 pages
François Roche - R&Sie (N) Architects
EmersonAagaard
No ratings yet
XML Web Services
Document457 pages
XML Web Services
Soner Gönül
No ratings yet
Amateur Photographer - 11 February 2017
Document84 pages
Amateur Photographer - 11 February 2017
Aruna Premarathne
100% (1)
Tool Design
Document29 pages
Tool Design
Pavan Teja
No ratings yet
HQ Notification For Port Blair
Document29 pages
HQ Notification For Port Blair
Balaram Panda
No ratings yet
Digital Design and Computer Architecture, 2: Edition
Document135 pages
Digital Design and Computer Architecture, 2: Edition
Сергей Капуста
No ratings yet
PDF NBR 13248 2014 Cabos de Potencia e Condutores Isolados Sem Cobertura DL
Document34 pages
PDF NBR 13248 2014 Cabos de Potencia e Condutores Isolados Sem Cobertura DL
André Luiz
No ratings yet
User Manual: Integrated Stall Rental E-Government Management System (ISRMS)
Document20 pages
User Manual: Integrated Stall Rental E-Government Management System (ISRMS)
Celesamae Tangub Vicente
No ratings yet
1581 Service Manual 314982-01 (1987 Jun) PDF
Document22 pages
1581 Service Manual 314982-01 (1987 Jun) PDF
Natasya
No ratings yet
Value Stream LEAN
Document8 pages
Value Stream LEAN
Joven Castillo
No ratings yet
Add Disk and Create A Partition
Document2 pages
Add Disk and Create A Partition
aitlhaj
No ratings yet
IV Pump Flo-Gard 6301
Document48 pages
IV Pump Flo-Gard 6301
Dba Biomedtech
No ratings yet
Ecological Modelling and Energy DSS
Document325 pages
Ecological Modelling and Energy DSS
Nàpoles Raymundo
No ratings yet
Social Interaction Among Adolescents Who Use Social Media: Nabila Rizki Amanda
Document6 pages
Social Interaction Among Adolescents Who Use Social Media: Nabila Rizki Amanda
Rosalinda Valguna
No ratings yet