# Ad-hoc algorithm for copy-move forgery detection in images. # Implemented by - vasiliauskas.agnius@gmail.

com # Robust match algorithm steps: # 1. Blur image for eliminating image details # 2. Convert image to degraded palette # 3. Decompose image into small NxN pixel blocks # 4. Alphabetically order these blocks by their pixel values # 5. Extract only these adjacent blocks which have small absolute color difference # 6. Cluster these blocks into clusters by intersection area among blocks # 7. Extract only these clusters which are bigger than block size # 8. Extract only these clusters which have similar cluster, by using some sort of similarity function (in this case Hausdorff distance between clusters) # 9. Draw discovered similar clusters on image import sys from PIL import Image, ImageFilter, ImageDraw import operator as op from optparse import OptionParser def Dist(p1,p2): """ Euclidean distance between 2 points """ x1, y1 = p1 x2, y2 = p2 return (((x1-x2)*(x1-x2)) + ((y1-y2)*(y1-y2)))**0.5 def intersectarea(p1,p2,size): """ Given 2 boxes, this function returns intersection area """ x1, y1 = p1 x2, y2 = p2 ix1, iy1 = max(x1,x2), max(y1,y2) ix2, iy2 = min(x1+size,x2+size), min(y1+size,y2+size) iarea = abs(ix2-ix1)*abs(iy2-iy1) if iy2 < iy1 or ix2 < ix1: iarea = 0 return iarea def Hausdorff_distance(clust1, clust2, forward, dir): """ Function measures distance between 2 sets. (Some kind of non-similarity between 2 sets if you like). It is modified Hausdorff distance, because instead of max distance - average distance is taken. This is done for function being more error-prone to cluster coordinates. """ if forward == None: return max(Hausdorff_distance(clust1,clust2,True,dir),Hausdorff_distance(clust1,clust2,False, dir)) else: clstart, clend = (clust1,clust2) if forward else (clust2,clust1) dx, dy = dir if forward else (-dir[0],-dir[1]) return sum([min([Dist((p1[0]+dx,p1[1]+dy),p2) for p2 in clend]) for p1 in clstart])/len(clstart) def hassimilarcluster(ind, clusters): """ For given cluster tells does it have twin cluster in image or not. """ item = op.itemgetter global opt

mode == 'L'.dy)) if specdist <= int(opt.xs+size): for y in range(ys. palette): """ Convert given color into palette color.key=item(0))[0] ty = min(clusters[ind]. palcolors): """ Convert given image into custom palette colors """ assert image.convert('L') if image. cy .rgsim): found = True break return found def blockpoints(pix.getdata())]) def getparts(image.key=item(1))[1] dx.None.palcolors[i+1]) for i in range(len(palcolors)-1)] image. ys = coords for x in range(xs.putdata([colortopalette(c.key=item(0))[0] cy = min(cl.pal) for c in list(image.load() for x in range(w-block_len): for y in range(h-block_len): data = list(blockpoints(pix.(dx. coords.ty specdist = Hausdorff_distance(clusters[ind]. h = img. [x for x in range(256) if x%int(opt.y] def colortopalette(color. global opt for n in range(int(opt.imblev)): img = img.mode != 'L' else image w. (x. """ img = image.ys+size): yield pix[x. """ for a.cl. """ xs.b in palette: if color >= a and color < b: return b def imagetopalette(image.filter(ImageFilter. cl in enumerate(clusters): if i != ind: cx = min(cl.append(data) parts = sorted(parts) return parts def similarparts(imagparts): """ .tx.SMOOTH_MORE) # Converting image to custom palette imagetopalette(img.found = False tx = min(clusters[ind]. dy = cx .y)] parts. block_len)) + [(x.impalred) == 0]) pix = img.size parts = [] # Bluring image for abandoning image details and noise. "Only grayscale images supported !" pal = [(palcolors[i]. block_len): """ Decompose given image into small blocks of data. size): """ Generator of pixel colors of given block.y).key=item(1))[1] for i.

blcoldev): if difs <= int(opt.4) >= float(opt.append(imagparts[i+1]) return dupl def clusterparts(parts. This is done by clustering blocks at first and after that filtering out small clusters and clusters which doesn`t have twin cluster in image. block_len): """ Further filtering out non essential blocks.min(clust.y)) fc.key=item(1))[1]))/(block_len*1.y). """ parts = sorted(parts.imagparts[i+1][:l])) mean = float(sum(imagparts[i][:l])) / l dev = float(sum(abs(mean-val) for val in imagparts[i][:l])) / l if dev/mean >= float(opt.max(clust. (max(clust.Return only these blocks which are similar by content. """ dupl = [] global opt l = len(imagparts[0])-1 for i in range(len(imagparts)-1): difs = sum(abs(x-y) for x.blsim): if imagparts[i] not in dupl: dupl.append(imagparts[i]) if imagparts[i+1] not in dupl: dupl.append([(x.rgsize)] # filter out clusters.append(k) break # if this is new cluster if not fc: clusters.yc). y = parts[i][-1] # detect box already in cluster fc = [] for k. which doesn`t have identical twin cluster .key=item(0))[0].itemgetter # filter out small clusters clusters = [clust for clust in clusters if Dist((min(clust.block_len) intrat = float(ar)/(block_len*block_len) if intrat > float(opt.(x. key=op.key=item(1))[1]).cl in enumerate(clusters): for xc.key=item(0))[0].len(parts)): x.append((x.y)]) else: # re-clustering boxes if in several clusters at once while len(fc) > 1: clusters[fc[0]] += clusters[fc[-1]] del clusters[fc[-1]] del fc[-1] item = op.itemgetter(-1)) global opt clusters = [[parts[0][-1]]] # assign all parts to clusters for i in range(1.yc in cl: ar = intersectarea((xc.y in zip(imagparts[i][:l].blint): if not fc: clusters[k].