You are on page 1of 64

3.

SEARCHING AND SORTING


TECHNIQUES
English course 3

3.1. Searching methods


Generally the problem is to search objects using
the value of a field (key) associated to any object
If the objects are not ordered we have only the
direct search possibility
But if, objects are ordered, the finding process
will be faster
In our approach we consider that objects are
organized on one dimensional arrays and for
key is defined an ordered relation

The searching function that will return the position from the array
if the element will be found, or value (-1) if we did not found the
value (using divide et impera):
int CautareBinara(int *p, int inc, int sfr, int val)//recursive
{
int mij;
mij = (inc + sfr)/2;
if(p[mij] == val)
return mij;
if(inc <= sfr) {
if(p[mij] > val)
sfr = mij - 1;
else
inc = mij + 1;
return CautareBinara(p, inc, sfr, val);
}
return -1;

Binary serach (iterative solution)


int CautareBinara(int *p, int n, int val)//iterative
{
int inc, sfr, mij;
inc = 0;
sfr = n-1;
mij = (inc + sfr)/2;
while((inc <= sfr) && (val != p[mij]))
{
if(val < p[mij]) sfr = mij - 1;
else
inc = mij + 1;
mij = (inc + sfr) / 2;
}
if(p[mij] == val)
return mij;
else
return -1;
}

Standard library functions


The standard library (stdlib.h/ search.h) contains
some searching functions
For unordered arrays:
void * lfind(const void *key, const void *base,
size_t *num, size_t width,
int (*fcmp)(const void *, const void*));
void * lsearch(const void *key, void *base,
size_t *num, size_t width,
int (*fcmp)(const void *, const void *));
5

For success the functions will return the address of


the first element that have the searching key
Otherwise :
lfind( ) will return null pointer
lsearch( ) will append the element at the end of
the array and will return a pointer to this added
element
The comparison function fcmp( ) is provided by the
user and must return 0 in case of equality of
elements, different value otherwise
The other parameters are: key the address of the
key, base the base address of the array, num the
address with the number of elements, width the
dimension of one element
New compilers for lfind() and lsearch() offers
_lfind() and _lfind_s(), respectively _lsearch() and
_lsearch_s() methods, and the header file for the 6
functions is <search.h>.

#define DIM 12
int cmp(char *arg1, char *arg2);
int addelem(char *key, char **tab, int nelem);
void main(void)
{
char *luni[DIM] = {"ian", "feb", "mar", "apr", "mai", "iun" };
int i, nluni=6;
char *key = "iul";
if (addelem(key, luni, nluni))
printf("Luna %s este deja in tablou.\n", key);
else {
nluni++;
printf("Luna \"%s\" a fost adaugata in tablou : ", key);
for (i = 0; i < nluni; i++)
printf("%s, ", luni[i]);
}
}

int addelem(char *key, char **tab, int nelem)


{
int oldn = nelem;
lsearch(&key, tab, (size_t *)&nelem, sizeof(char *),
(int(*)(const void *,const void *))cmp);
return(nelem == oldn);
}
int cmp(char *arg1, char *arg2)
{
return(strcmp(arg1, arg2));
}
8

For ordered array we have (stdlib.h / search.h):


void *bsearch(const void *key, const void *base,
size_t nelem, size_t width,
int (*fcmp)(const void*, const void*));
The parameters have the same significance as for
previous functions, the third being the number of
elements, size_t nelem
The array must be in increasing ordered and the
comparison method must return:
A negative value if *v1 < *v2
zero if the elements are equal
A positive value if *v1 > *v2

// utilizarea functiei de biblioteca bsearch()


#include <stdlib.h>
#include <stdio.h>
#include <conio.h>
int compare_int(int *a, int *b);
int compare_float(float *a, float *b);
void main(void){
int int_values[] = {1, 2 , 3, 4, 5};
float float_values[] = {1.1, 2.2, 3.3, 4.4, 5.5};
int *int_ptr, int_value = 2, num;
float *float_ptr, float_value = 33.3;
num = sizeof(int_values)/sizeof(int);

//apel la functia de bibioteca bsearch() pentru sirul de numere


intregi
int_ptr = (int *)bsearch(&int_value, int_values, num,
sizeof(int),(int (*) (const void *, const void *)) compare_int);
if (int_ptr) printf("Valoarea %d a fost gasita!\n", int_value);
else printf("Valoarea %d nu a fost gasita!\n", int_value);
num = sizeof(float_values)/sizeof(float);
//apel la functia de bibioteca bsearch() pentru sirul de numere
reale
float_ptr = (float *)bsearch(&float_value, float_values, num,
sizeof(float),(int (*) (const void *, const void *)) compare_float);
if (float_ptr) printf("Valoarea %3.1f a fost gasita!\n",
float_value);//end if
else printf("Valoarea %3.1f nu a fost gasita!\n", float_value);
_getch();
}//end main()

int compare_int(int *a, int *b)


{
return(*a - *b);
}//end compare_int()
int compare_float(float *a, float *b)
{
if(*a < *b) return -1;
if(*a > *b) return 1;
return 0;
}//end compare_float()

3.2. Sorting methods


Sorting represents a process to re-arrange objects in a
specific order, by permutations
For a set of objects S={a1, a2, ..., an}, by sorting will result
the set S1={ak1, ak2, ..., akn}, so that, considering an
ordering function f, the following relation will be
respected, <, being the imposed relation:
f(ak1) < f(ak2) <...< f(akn)
The sorting process is realized considering an
associated key to objects
The sorting efficiency is evaluated by:
Number of key comparisons
Number of objects permutations
These operations depends on the number of objects
from the set

13

Sorting methods classification


Interchange sorting:

Selection sorting:
Insertion sorting:
Interclassing sorting:
No comparison sorting:

- Bubble Sort
- Cocktail Sort
- Comb Sort
- Quick Sort
- Selection Sort
- Heap Sort
- Insertion Sort
- Shell Sort
- Merge Sort
-Radix sort
14

Simple sorting methods

These methods will process n*n comparisons

The sorting process is "in situ" (same place)

If n is of hundreds/thousands order, the


computing time is very close for each method.
In this case the complexity method will be
considered in the selection of the method that
will be implemented

15

Interchange sorting (bubble sort)


We consider neighbor pairs of elements that are
processed: (0,1), (1,2), (2,3),

If the elements do not respect the order, we


interchange them
After the first iteration the greatest value will
arrive on the last position
We realize more iterations to order the entire
array, and at each iteration the processed array
is shorter because the last elements are ordered
16

Example bubble-sort:
Example : 9 7 5 6 2
7 9 5 6 2 -> 7 5 9 6 2 -> 7 5 6 9 2 -> 7 5 6 2 9
5 7 6 2 9 -> 5 6 7 2 9 -> 5 6 2 7 9
5 6 2 7 9 -> 5 2 6 7 9
5 2 6 7 9 -> 2 5 6 7 9
Remark: after the first iteration on the last position
will be the greatest value, at the second iteration on
the penultimate position will be the greatest value
from the remaining array, etc
17

void SortBubble(int *p, int n)


{
int i, j, temp;
for(i=0; i<n; i++)
// parcurgeri
{
for(j=1; j<n-i; j++) // prelucrare sir curent
{
if(p[j-1] > p[j])
{
// interschimbare
temp = p[j-1];
p[j-1] = p[j];
p[j] = temp;
}
}
}
}

18

The algorithm must be improved remarking that


after an iteration with no interchange elements
the sorting process is finished

Example : 9 2 5 6 7:
9 2 5 6 7 -> 2 9 5 6 7 -> 2 5 9 6 7 -> 2 5 6 9 7
-> 2 5 6 7 9
25679

In this case a flag variable to monitories the


process will be introduced
19

void SortBubble(int *p, int n) // varianta for


{
int i, j, temp, flag;
for(i=0; i<n; i++)
{
flag = 0;
for(j=1; j<n-i; j++)
{
if(p[j-1] > p[j]) {
// interschimbare
temp = p[j];
p[j] = p[j-1];
p[j-1] = temp;
flag = 1;
}
}
// daca nu s-a facut nici o interschimbare
if(flag == 0) break;
}
}

20

//bubble sort (do-while variant)


void SortBubbleD(int *p, int n)
{
int i, j, temp, flag;
do{
flag = 0;
for(j=0; j<n-1; j++) {
if(p[j] > p[j+1]) {
temp = p[j];
p[j] = p[j+1];
p[j+1] = temp;
flag = 1;
}
}//for
}while(flag != 0);
}
21

Simple selection sorting


We search the smallest element and we
transfer it by interchange on the first position of
the array
Then we consider the array with the elements
2,3,...,N and we search the smallest value that
will be transferred on the first position of the
current array,...
Example : 9 7 5 6 2
9 7 5 6 2 -> 2 7 5 6 9 -> 2 5 7 6 9 -> 2 5 6 7 9
22

void SortSel(int *p, int n)


{

int i, j, pozmin, temp;


for(i=0; i<n; i++)

// parcurgeri

{
// cautare pozitie cel mai mic element din sirul curent

pozmin = i;
for(j=i+1; j<n; j++) {
if(p[pozmin] > p[j])
pozmin = j;
}
// interschimbare cu elementul de pe prima pozitie

temp = p[pozmin];
p[pozmin] = p[i];
p[i] = temp;
}
}

23

Simple insertion sorting


We consider step by step arrays composed by
the first 2,3,...,N elements from the initial array
We verify that these arrays are ordered by
transferring the new added element (2,3,...) on
the corresponding position
This thing involves to move on the right, with
one position, of the elements that have the key
value greater than the key of the new element,
so that this element to be before those
elements, but after the elements that have the
key value lower than the new element
24

Example simple insertion:


Example : 9 7 5 6 2
97
->
79
795
->
579
5 7 9 6 ->
5679
5 6 7 9 2 ->
25679

25

void SortIns(int *p, int n)


{
int i, j, temp;
for(i=1; i<n; i++)
{
temp = p[i];
for(j=i-1; j>=0; j--)
// sirul curent
{
if(p[j] > temp)
p[j+1] = p[j];
// deplasare dreapta
else
break;
}
p[j+1] = temp;
}
}
26

Remark:
The position where will be transferred the new
element is sequential searched (on an ordered
sub-array), so the algorithm may be improved
(analyzing elements being sorted), by binary
search

27

void SortIns(int *p, int n)


{
int i, j, temp, inc, mij, sfr;
for(i=1; i<n; i++) {
temp = p[i];
// cautarea binara
inc = 0;
sfr = i-1;
mij = (inc + sfr)/2;
while(inc <= sfr) {
if(p[mij] > temp)
sfr = mij-1;
else
inc = mij+1;
mij = (inc + sfr)/2;
}
for(j=i-1; j>=inc; j--)
p[j+1] = p[j];
// deplasare dreapta
p[inc] = temp;
}
}

28

// main() function for the simple sorting


algorithms
void main(){
int sir[20], n;
printf("Introdu numarul maxim de elemente (<20)\n");
scanf("%d",&n);
for(int i=0; i<n; i++)
scanf("%d", &sir[i]);
SortSimple(sir, n);
printf(\nSirul ordonat este: );
for(int i=0; i<n; i++)
printf("%d ", sir[i]);
29
}//main

Shell Sort Algorithm


ShellSort is performing simple sorting algorithm,
based on insertion sorting (InsertSort)
The algorithm uses arrays of length N, being of
class O(N2)
The algorithm is performing better than the usual
O(N2) class algorithms: InsertSort, BubbleSort,
SelSort, etc., being 2 times faster than
InsertSort, the closest competitor of class O(N2)
ShellSort is not an in situ sorting algorithm.

30

Shell Sort algorithm description


ShellSort algorithm will realize movements on
great distances, by sorting elements that are at
a great distance using the insertion method.
After this sorting step we continue with elements
that are at lower distance, ...
We consider the n-sorting notion: the n-th
element sorting.
The diminishing process of the intervals
sequences is realized using a sequence of
numbers named intervals sequence or space
sequence.
Usual it is used the Knuth sequence: h=3*h+1
(1, 4, 13, 40, 121, 364,)
31

Example Shell Sort

h=4

7
*
2
2

2
*
2
2

10

2
*
7

10
*
5

9
*
6

5
*
3

7
*
4

5
*
10

10

6
*
9

10
*
5

4
*
7

3
*
10

32

h=1
2

1
*

10

10

4
*
6

10

5
*
6

10

7
*
9

10

33

void ShellSort(int *p, int max)


{ int inner, outer;
int temp;
int h=1;
while(h <= n/3)
h = 3*h + 1;
while(h > 0) {
for(outer=h; outer < n; outer++) {
temp = p[outer];
inner = outer;
while(inner > h-1 && p[inner-h] >= temp) {
p[inner] = p[inner-h];
inner -= h;
}
p[inner] = temp;
}//for
h = (h-1)/3;
}//while
}

34

Example 1: How to use a sorting function:


#include <iostream>
using namespace std;

void SortIns(int *p, int n);


void main( )
{
int i, n, *tab;
cout <<"Cate numere: ";
cin >> n;
tab = new int[n];
if(tab != 0) {
cout << "Introduceti " << n << " numere intregi:" << endl;
for(i=0; i<n; i++)
{
cout << "\tNumarul "<<(i+1)<<": ";
cin >> *tab++;
}

35

tab -= n;//revin la inceputul tabloului


SortIns(tab, n);
cout << "\nNumerele sortate sunt: "<<endl;
for(i=0;i<n;i++)
cout << *tab++ <<" ";
cout << endl;
tab -= n; //revin la inceputul tabloului
delete [ ] tab;
}
}
void SortIns(int *p, int n)
{
// corp functie de sortare
}

36

Example 2

#include <iostream>
using namespace std;

void BubbleSort (char **names, const int size);


void main(void)
{
int dimc = 6;
char *tabc[] = {"abc", "xyz", "acd", "axyz", "bc", "eltcti"};
BubbleSort(tabc, dimc); //sortare crescatoare dupa cod

cout << "\nSirurile sortate: ";


for(int i=0;i<dimc; i++)
cout << tabc[i] << ", ";
cout << endl;
}
37

void BubbleSort (char **names, const int size)


{
bool swapped; //int swaped;
do {
swapped = false; //0
for (int i = 0; i < size-1; ++i)
{
if (strcmp(names[i], names[i+1]) > 0 )
{
char *temp = names[i];
names[i] = names[i+1];
names[i+1] = temp;
swapped = true; //1
}
}
} while (swapped);
}

38

Example 3
#include <iostream>
using namespace std;

int fcmp(char *s1, char *s2);


void BubbleSort (char **names, const int size);
void main(void)
{
int dimc = 6;
char *tabc[] = {"abc", "xyz", "acd", "axyz", "bc", "eltcti"};
BubbleSort(tabc, dimc); //sortare crescatoare dupa dimensiune
cout << "\nSirurile sortate: ";
for(int i=0;i<dimc; i++)
cout << tabc[i] << ", ";
cout << endl;

}
39

void BubbleSort (char **names, const int size)


{
bool swapped;
do {
swapped = false;
for (int i = 0; i < size-1; ++i) {
if (fcmp(names[i], names[i+1]) > 0 ) {
char *temp = names[i];
names[i] = names[i+1];
names[i+1] = temp;
swapped = true;
}
}
} while (swapped);
}

int fcmp(char *s1, char *s2)


{
return(strlen(s1)-strlen(s2));
// return strcmp(s1, s2);
}

40

Homework:
Modify these sorting function to include the
comparison function in the calling of sorting function
void Sort(char **tab, int n, int(*fcmp)(char *s1, char *s2));

41

42

Advanced sorting methods

Are algorithms that allow to reduce the number


of comparisons till nlog(n)

The complexity is greater than simple sorting


methods, expressed by recursion, specialized
data structures, or more arrays used in the
sorting process

The efficiency of these algorithms appears


when we consider huge arrays, from thousands
(analyzing the ratio n/log(n))
43

Interclassing sort (Merge sort)


Method principle: considering two ordered
arraya we obtain the third sorted array that will
contain elements from both initial arrays.
This sorting method uses Divide et Impera
method.
We divide the initial unsorted array in smalest
sequences of elements so that each sequence
to be ordered at a moment and interclassed with
other corresponding sequence from the array.
Practically the interclassing process will start
when we have a 2 elements sequence. That
once ordered, will be interclassed with other
corresponding sequence, after that we merge
pairs of 4 elements, 8 elements, etc.
44

45

#include<iostream>
using namespace std;
void interclas(int *a,int i,int m,int j);
// i- pozitia de inceput a primului subsir, m- pozitia de sfarsit a primului
subsir,
// j- pozitia de sfarsit al celui de-al doilea subsir
void divimp(int *a,int i,int j);
// i- indicele primului element din sir,
// j - indicele ultimului element din sir
#define DIM 1000
void main()
{
int a[DIM],n;
cout<<"n="; cin>>n;
for(int i=0;i<n;i++)
{
cout<<"a["<<i<<"]=";
cin>>a[i];
}

46

divimp(a,0,n-1);
for(int i=0;i<n;i++)
cout<<a[i]<<' ';
} //main

void divimp(int *a,int i,int j)


{
if (i<j)
{
int m=(i+j)/2;
divimp(a,i,m); // apel functie pentru prima jumatate a sirului
divimp(a,m+1,j); // apel functie pentru a doua jumatate a sirului
interclas(a,i,m,j); // interclasarea celor 2 subsiruri
}
}
47

void interclas(int *a,int i,int m,int j)


{
int b[DIM];
int x=i; // pentru deplasarea in primul subsir
int k=0;
int y=m+1; // pentru deplasarea in al doilea subsir
while(x<=m && y<=j)
if (a[x]<a[y])
b[k++]=a[x++];
else
b[k++]=a[y++];
while (x<=m)
b[k++]=a[x++]; // restul elementelor din primul subsir
while (y<=j)
b[k++]=a[y++]; // restul elementelor din al doilea subsir
int t=i;
for (k=0;k<(j-i)+1;k++)
a[t++]=b[k]; // mut elementele sortate in sirul initial
}

48

Remarks:
The number of elementary executed operations
are of order O(nlog(n)).
For arrays with a huge number of components
the computing time is lower than the time used
for simple sorting algorithms as selection,
insertion or bubble where the complexity is of
order O(n2).

The merge sort algorithm uses twice more memory


than the simple algorithms because will use a
supplementary space for the auxiliary array.
49

QuickSort algorithm
Is based on the partition notion
Partitioning data represents to divide data in 2
groups, one group with values greater than an
imposed value, pivot, the other group with
values less than pivot.
Steps of partitioning:
We consider an element x of the array named
pivot element
We browse the array from left till we find an
element ai greater than x
We browse the array from right till we find an
element aj less than x
50

we interchange ai with aj
We update i and j by increment, respective by
decrement
We repeat the previous steps till the browsing
are meet somewhere in the middle of the array
In this moment the array is partitioned:
In the left side of x are only elements less than x
In the right side of x are only elements greater than x
a[k] <= x,
k = 0,...,i-1
a[k] >= x,
k = j+1,...,n
a[k] = x,
k = j+1,...,i-1

51

After the first partitioning, the array is not yet ordered


So we continue in the same mode in the left and right side
of the pivot.

QuickSort algorithm has a recursive nature

QuickSort is
{
If (right-left) == 0 then
Return
Else
pivot = Tablou[right];
partition = Partitionare(left, right, pivot)
QuickSort(left, partition-1);
QuickSort(partition+1, right);
EndIf
}

52

Example : pivot is the middle element

48

88

57

71

60

42

83

73

48

65

48

57

71

60

42

83

73

88

65

57

42

48

57

48

42

42

48

60

42

60

71

71

83

73

88

65

65

73

88

83

57

60

65

71

73

83

88

57

60

65

71

73

83

88

Example : pivot is the last element


88

57

71

60

42

83

73

48

65

48

57

71

60

42

83

73

88

65

48

57

42

60

71

83

73

88

65

48

57

42

60

48

57

42

48

57

42

57

42

48

65

83

60

65

71

73

42

60

65

71

73

83

88

48

60

65

71

73

83

88

60

65

71

73

83

88

57

73

88

71
88

83

void QuickSort(int *p, int prim, int ultim)


{
int inc, sfr, pivot, temp;
inc = prim;
sfr = ultim;
pivot = p[ultim];
// partitionare
do {
while(p[inc] < pivot)
inc++;
while(p[sfr] > pivot)
sfr--;

55

if(inc < sfr) {


temp = p[inc];
p[inc] = p[sfr];
p[sfr] = temp;
}
if(inc <= sfr) {
sfr--;
inc++;
}
}while(inc < sfr);
// apel recursiv
if(prim < sfr)
QuickSort(p, prim, sfr);
if(inc < ultim)
QuickSort(p, inc, ultim);
}
56

How to choose the pivot value:


The pivot must represent a key value from the
array
Any value can be chosen but it is recommended
to avoid the lowest or biggest value

Usual values:
last element from the partitioned array
Median value between the first, last and the
middle element of the array (half of the values
from the array are less than the median value
and half are greater)
57

Standard library (stdlib.h / search.h) offers the


function:
void qsort(void *base, size_t nelem, size_t width,
int(*fcmp)(const void *, const void *));

The compare function, defined by the user for


increasing sorting must return a value:
< 0 if
0 if
> 0 if

*v1 < *v2


*v1 == *v2
*v1 > *v2

For decreasing order the negative and positive


returned values will be reversed. The source one
dimensional array can contain numerical,
characters and structures values.

58

//Example int aray sorted with qsort()


#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
int compara(const void* val1, const void* val2);
int (*fcmp)(const void*, const void*);
#define DIM 20
void main(void)
{
int i,n,tab[DIM];
printf("Introdu dimensiune tablou: ");
scanf("%d",&n);
printf("Introdu elemente tablou:\n");
for(i=0;i<n;i++)
{
printf("tab[%d] = ",i);
scanf("%d",&tab[i]);
}
fcmp=compara;
//fcmp va avea adresa functiei de comparare
qsort(tab,n,sizeof(i),fcmp);
puts("Tabloul ordonat este");
for(i=0;i<n;i++){
printf("%d ",tab[i]);
}
_getch();
}//main

59

int compara(const void * val1, const void* val2)


{
//return((*(int*)val1) - (*(int*)val2)); //ordonare crescatoare
return((*(int*)val2) - (*(int*)val1)); //ordonare descrescatoare
} //compara

60

//Example struct sorted with qsort()


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
struct datac {
int an;
int luna;
int zi;
};
struct pers {
char numep[12];
struct datac datan;
};
int cmp( const struct pers *a, const struct pers *b);

61

void main(void)
{
struct pers angaj[ ] = {
{"x1", {1980, 6,6}},
{"x2", {1960, 5, 5}},
{"x3", {1960, 1,5}},
{"x4", {1961, 12, 32}},
{"x5", {1980, 2, 29}}
};
int i;
int nang = sizeof(angaj)/sizeof(struct pers);
// apel functie de sortare
qsort((void *)angaj, nang, sizeof(angaj[0]), (int (*)(const void*, const
void*))cmp);
printf("Datele sortate :\n");
for (i = 0; i < nang; i++) {
printf("\t%s, %d, %d, %d\n", angaj[i].numep, angaj[i].datan.an,
angaj[i].datan.luna, angaj[i].datan.zi);

}
_getch();
}

62

int cmp(const struct pers *a, const struct pers *b)


{
if((a->datan).an > (b->datan).an)
return 1;
else
if((a->datan).an < (b->datan.an)) return -1;
else {
if((a->datan).luna > (b->datan).luna) return 1;
else
if((a->datan).luna < (b->datan).luna) return -1;
else {
if((a->datan).zi > (b->datan).zi)
return 1;
else
if((a->datan).zi < (b->datan).zi)
return -1;
else
return 0;
}
}
}
63

64