# Ad-hoc algorithm for copy-move forgery detection in images. # Implemented by - vasiliauskas.agnius@gmail.

com # Robust match algorithm steps: # 1. Blur image for eliminating image details # 2. Convert image to degraded palette # 3. Decompose image into small NxN pixel blocks # 4. Alphabetically order these blocks by their pixel values # 5. Extract only these adjacent blocks which have small absolute color difference # 6. Cluster these blocks into clusters by intersection area among blocks # 7. Extract only these clusters which are bigger than block size # 8. Extract only these clusters which have similar cluster, by using some sort of similarity function (in this case Hausdorff distance between clusters) # 9. Draw discovered similar clusters on image import sys from PIL import Image, ImageFilter, ImageDraw import operator as op from optparse import OptionParser def Dist(p1,p2): """ Euclidean distance between 2 points """ x1, y1 = p1 x2, y2 = p2 return (((x1-x2)*(x1-x2)) + ((y1-y2)*(y1-y2)))**0.5 def intersectarea(p1,p2,size): """ Given 2 boxes, this function returns intersection area """ x1, y1 = p1 x2, y2 = p2 ix1, iy1 = max(x1,x2), max(y1,y2) ix2, iy2 = min(x1+size,x2+size), min(y1+size,y2+size) iarea = abs(ix2-ix1)*abs(iy2-iy1) if iy2 < iy1 or ix2 < ix1: iarea = 0 return iarea def Hausdorff_distance(clust1, clust2, forward, dir): """ Function measures distance between 2 sets. (Some kind of non-similarity between 2 sets if you like). It is modified Hausdorff distance, because instead of max distance - average distance is taken. This is done for function being more error-prone to cluster coordinates. """ if forward == None: return max(Hausdorff_distance(clust1,clust2,True,dir),Hausdorff_distance(clust1,clust2,False, dir)) else: clstart, clend = (clust1,clust2) if forward else (clust2,clust1) dx, dy = dir if forward else (-dir[0],-dir[1]) return sum([min([Dist((p1[0]+dx,p1[1]+dy),p2) for p2 in clend]) for p1 in clstart])/len(clstart) def hassimilarcluster(ind, clusters): """ For given cluster tells does it have twin cluster in image or not. """ item = op.itemgetter global opt

mode == 'L'.dy)) if specdist <= int(opt.xs+size): for y in range(ys. palette): """ Convert given color into palette color.key=item(0))[0] ty = min(clusters[ind]. palcolors): """ Convert given image into custom palette colors """ assert image.convert('L') if image. cy .rgsim): found = True break return found def blockpoints(pix.getdata())]) def getparts(image.key=item(1))[1] dx.None.palcolors[i+1]) for i in range(len(palcolors)-1)] image. ys = coords for x in range(xs.putdata([colortopalette(c.key=item(0))[0] cy = min(cl.pal) for c in list(image.load() for x in range(w-block_len): for y in range(h-block_len): data = list(blockpoints(pix.(dx. coords.ty specdist = Hausdorff_distance(clusters[ind]. h = img. [x for x in range(256) if x%int(opt.y] def colortopalette(color. global opt for n in range(int(opt.imblev)): img = img.mode != 'L' else image w. (x. """ img = image.ys+size): yield pix[x. """ for a.cl. """ xs.b in palette: if color >= a and color < b: return b def imagetopalette(image.filter(ImageFilter. cl in enumerate(clusters): if i != ind: cx = min(cl.append(data) parts = sorted(parts) return parts def similarparts(imagparts): """ .tx.SMOOTH_MORE) # Converting image to custom palette imagetopalette(img.found = False tx = min(clusters[ind]. dy = cx .y)] parts. block_len)) + [(x.impalred) == 0]) pix = img.size parts = [] # Bluring image for abandoning image details and noise. "Only grayscale images supported !" pal = [(palcolors[i]. block_len): """ Decompose given image into small blocks of data. size): """ Generator of pixel colors of given block.y).key=item(1))[1] for i.

blcoldev): if difs <= int(opt.4) >= float(opt.append(imagparts[i+1]) return dupl def clusterparts(parts. This is done by clustering blocks at first and after that filtering out small clusters and clusters which doesn`t have twin cluster in image. block_len): """ Further filtering out non essential blocks.min(clust.y)) fc.key=item(1))[1]))/(block_len*1.y). """ parts = sorted(parts.imagparts[i+1][:l])) mean = float(sum(imagparts[i][:l])) / l dev = float(sum(abs(mean-val) for val in imagparts[i][:l])) / l if dev/mean >= float(opt.max(clust. (max(clust.Return only these blocks which are similar by content. """ dupl = [] global opt l = len(imagparts[0])-1 for i in range(len(imagparts)-1): difs = sum(abs(x-y) for x.blsim): if imagparts[i] not in dupl: dupl.append(imagparts[i]) if imagparts[i+1] not in dupl: dupl.append([(x.rgsize)] # filter out clusters.append(k) break # if this is new cluster if not fc: clusters.yc). y = parts[i][-1] # detect box already in cluster fc = [] for k. which doesn`t have identical twin cluster .key=item(0))[0].itemgetter # filter out small clusters clusters = [clust for clust in clusters if Dist((min(clust.block_len) intrat = float(ar)/(block_len*block_len) if intrat > float(opt.(x. key=op.key=item(1))[1]).cl in enumerate(clusters): for xc.key=item(0))[0].len(parts)): x.append((x.y)]) else: # re-clustering boxes if in several clusters at once while len(fc) > 1: clusters[fc[0]] += clusters[fc[-1]] del clusters[fc[-1]] del fc[-1] item = op.itemgetter(-1)) global opt clusters = [[parts[0][-1]]] # assign all parts to clusters for i in range(1.yc in cl: ar = intersectarea((xc.y in zip(imagparts[i][:l].blint): if not fc: clusters[k].

default=5) cmd. help='Block similarity threshold..exit() print 'Analyzing image.new('RGB'.crop((x.cy in cl]) + block_len cy2 = max([cy for cx.clusters)] return clusters def marksimilar(image. please wait. """ global opt blocks = [] if clust: draw = ImageDraw. (can take some minutes)' block_len = 15 im = Image.open(args[0]) lparts = getparts(im.append((x. (default: %default)'.split('.cy1. '--rgsize'. 'cyan') for cl in clust: for x. block_len) out = args[0]. (default: %default)'.y. block_len) if int(opt.5) blocks. '--rgsim'.cx2. (default: %default)'. '--blsim'.save(out) .size).help='Image palette reduction factor. default=1.x+size. default=8) cmd.help='Region size threshold.default=200) cmd.0.imauto): for cl in clust: cx1 = min([cx for cx. help='Block intersection threshold. default=1) cmd.print_help() sys.y+size)) if int(opt. help='Region similarity threshold. help='Automatically search identical regions.add_option(''. '--imblev'. block_len) dparts = similarparts(lparts) cparts = clusterparts(dparts.im)) for bl in blocks: x. help='Block color deviation threshold. '--imauto'. default=0.y.jpg' im.add_option(''. (default: %default)'. (default: %default)'.add_option(''.help='Blur level for degrading image details.5) cmd.cy in cl]) cy1 = min([cy for cx. (size.cy in cl]) + block_len draw.outline="magenta") return image if __name__ == '__main__': cmd = OptionParser("usage: %prog image_file [options]") cmd.2) cmd.parse_args() if not args: cmd.y in cl: im = image.mask.clust in enumerate(clusters) if hassimilarcluster(x. '--blint'. clust. default=0. cparts.blend(im.add_option(''. (default: %default)'. '--impalred'.im = bl image.x+size. default=15) cmd.')[0] + '_analyzed.y+size)) im = Image.imauto) else [[elem[-1] for elem in dparts]] im = marksimilar(im.paste(im.2) opt.(x.add_option(''.cy2]. args = cmd. (default: %default)'.add_option(''..clusters = [clust for x. '--blcoldev'.add_option(''. size): """ Draw discovered similar image regions. (default: %default)'.cy in cl]) cx2 = max([cx for cx.add_option(''.Draw(image) mask = Image.y.y.rectangle([cx1.

imauto) else 0. len(cparts) if int(opt.print 'Done. 'identical regions' print 'Output is saved in file -'. out . Found'.