You are on page 1of 6

Aim:-Conflation Algorithm

#include<stdio.h>
#include<conio.h>
#include<stdlib.h>
#include<string.h>
void orig_file();
void punct_remove();
void freq_words_remove();
void suffix_strip();
void equi_stem();
void stem();
void main()
{
int ans,ch;
FILE *fp,*fp1,*fp2,*fp3,*fp4,*fp5,*fp6,*fp7;
clrscr();
fp=fopen("Filenew.txt","r+");
fp1=fopen("Wo_punct.txt","w+");
fp2=fopen("Stops.txt","r+");
fp3=fopen("Wo_freq.txt","w+");
fp4=fopen("Suffix_list.txt","r+");
fp5=fopen("Wo_suffix.txt","w+");
fp6=fopen("Doc_copy.txt","w+");
fp7=fopen("Doc_rep.txt","w+");
if(fp==NULL || fp1==NULL || fp2==NULL || fp3==NULL || fp4==NULL || fp5==NULL
|| fp6==NULL || fp7==NULL)
{
printf("\nError in opening file!!!\n");
getch();
exit(0);
}
printf("\t\t --------CONFLATION ALGORTIHM SIMULATION--------\n");
do
{
printf("\nMENU:\n=====\n");
printf("\n1.Display the Original File contents.");
printf("\n2.Remove Punctuation Marks.");
printf("\n3.Remove the High Frequency Words.");
printf("\n4.Perform Suffix Stripping.");
printf("\n5.Detect Equivalent Stems.");
printf("\n6.Document Representation of the original file.");
printf("\n7.Exit.");
printf("\nEnter your Choice: ");
scanf("%d",&ch);
switch(ch)
{
case 1: printf("\n\t\t\t\tORIGINAL DOCUMENT\n");
printf("\t\t\t\t-----------------\n");
orig_file(fp);
break;
case 2: printf("\n\t\t\tAfter removal of Punctuation Marks\n");
printf("\t\t\t----------------------------------\n");
punct_remove(fp,fp1);
break;
case 3: printf("\n\t\t\tAfter Removal of High Frequency
Words\n");

printf("\t\t\t-------------------------------------\n");
freq_words_remove(fp1,fp2,fp3);
break;
case 4: printf("\n\t\t\tAfter Suffix Stripping\n");
printf("\t\t\t----------------------\n");
suffix_strip(fp3,fp4,fp5);
break;
case 5: printf("\n\t\t\tAfter Detecting Equivalent Stems\n");
printf("\t\t\t--------------------------------\n");
equi_stem(fp5,fp6,fp7);
stem(fp6,fp7);
break;
case 6: printf("\n\t\t\t DOCUMENT REPRESENTATION\n");
printf("\t\t\t -----------------------\n");
orig_file(fp7);
break;
case 7: exit(1);
default: printf("\nINVALID INPUT!!!");
break;
}
printf("\n\nDo You Want To Continue?\n1.YES\t\t0.NO\n");
scanf("%d",&ans);
}while(ans==1);
fclose(fp);
fclose(fp1);
fclose(fp2);
fclose(fp3);
fclose(fp4);
fclose(fp5);
fclose(fp6);
fclose(fp7);
getch();
}
void orig_file(FILE *fp)
{
char al;
rewind(fp);
al=fgetc(fp);
while(al != EOF)
{
printf("%c",al);
al=fgetc(fp);
}
}
void punct_remove(FILE *fp,FILE *fp1)
{
char punct;
rewind(fp);
rewind(fp1);
while(!feof(fp))
{
punct = getc(fp);
if(punct!='.'&&punct!=','&&punct!='!'&&punct!='?'&&punct!=':'&&punct!
='\''&&punct!='"'&&punct!=';'&&punct!='-'&&punct!='('&&punct!=')'&&punct!
='['&&punct!=']')
{
fputc(punct,fp1);
}
else
{
fputc(' ',fp1);
} }
orig_file(fp1);
}
void freq_words_remove(FILE *fp1,FILE *fp2,FILE *fp3)
{
char dword[15], stopword[15];
int comp,i,flag=0;
fp1=fopen("Wo_punct.txt","r");
rewind(fp1);
rewind(fp2);
rewind(fp3);
fscanf(fp1,"%s",dword);
while(!feof(fp1))
{
rewind(fp2);
fscanf(fp2,"%s",stopword);
for(i=0;i<635;i++)
{
flag=0;
comp = stricmp(dword,stopword);
if(comp==0)
{
flag=1;
break;
}
fscanf(fp2,"%s",stopword);
}
if(flag==0)
{
fprintf(fp3,"%s",dword);
fputc(' ',fp3);
}
fscanf(fp1,"%s",dword);
}
orig_file(fp3);
}
void suffix_strip(FILE *fp3,FILE *fp4,FILE *fp5)
{
char c,sword[15]="\0",suffix[8]="\0",*comp=NULL;
char *revpos=NULL,newword[15]="\0";
int i,j,length,suf_length,sub_length=0;
fp3=fopen("Wo_freq.txt","r");
rewind(fp3);
rewind(fp4);
rewind(fp5);
fscanf(fp3,"%s",sword);
while(!feof(fp3))
{
rewind(fp4);
fscanf(fp4,"%s",suffix);
for(i=0;i<8;i++)
{
comp = strstr(sword,suffix);
if(comp!=NULL)
{
length = strlen(sword);
suf_length = strlen(suffix);
sub_length = strlen(comp);
c=suffix[0];
if(i==0 || i==1 || i==2 || i==3 || i==4 || i==5)
{
printf("Suffix '%s' is a substring of
'%s'\n\n",comp,sword);
if(length>=4)
{
revpos = strrchr(sword,c);
if(strlen(revpos)==suf_length)
{
j=0;
while(sword[j]!= *revpos)
{
newword[j] = sword[j];
j++;
}
newword[j]='\0';
fprintf(fp5,"%s",newword);
fputc(' ',fp5);
}
}
}
else if(i==6 && sub_length==1)
{
printf("Suffix '%s' is a substring of
'%s'\n\n",comp,sword);
if(length>=4)
{
revpos = strrchr(sword,c);
if(strlen(revpos)==suf_length)
{
j=0;
while(sword[j]!= c)
{
newword[j] = sword[j];
j++;
}
newword[j]='\0';
fprintf(fp5,"%s",newword);
fputc(' ',fp5);
}
}
}
else if(i==7)
{
printf("Suffix '%s' is a substring of
'%s'\n\n",comp,sword);
if(length>=4)
{
revpos = strrchr(sword,c);
if((strlen(revpos)==suf_length+1) ||
(strlen(revpos)==suf_length))
{
j=0;
while(sword[j] != c)
{
newword[j] = sword[j];
j++;
}
if(strlen(revpos)==suf_length+1)
{
newword[j]='l';
newword[++j]='\0';
}
if(strlen(revpos)==suf_length)
{
newword[j]='\0';
}
fprintf(fp5,"%s",newword);
fputc(' ',fp5);
}
}
}
else
{
fprintf(fp5,"%s",sword);
fputc(' ',fp5);
}
break;
}
fscanf(fp4,"%s",suffix);
}
if(comp==NULL)
{
fprintf(fp5,"%s",sword);
fputc(' ',fp5);
}
fscanf(fp3,"%s",sword);
}
orig_file(fp5);
}
void equi_stem(FILE *fp5,FILE *fp6)
{
char word1[15]="\0",nextword[15]="\0";
int comp,flag=0;
rewind(fp5);
rewind(fp6);
fscanf(fp5,"%s",word1);
fprintf(fp6,"%s",word1);
fputc(' ',fp6);
while(!feof(fp5))
{
flag=0;
fscanf(fp5,"%s",word1);
rewind(fp6);
fscanf(fp6,"%s",nextword);
while(!feof(fp6))
{
comp = stricmp(word1,nextword);
if(comp==0)
{
flag=0;
break;
}
if(comp!=0)
{
flag=1;
}
fscanf(fp6,"%s",nextword);
}
if(flag==1)
{
fprintf(fp6,"%s",word1);
fputc(' ',fp6);
}
}
printf("\n\t\t\tAfter Removing Repeated Words\n");
printf("\t\t\t-----------------------------\n");
orig_file(fp6); }
void stem(FILE *fp6,FILE *fp7)
{
char word1[15]="\0",rep_word[15]="happy";
printf("\n\n\t\t\tAfter Removing Equivalent Stems\n");
printf("\t\t\t-------------------------------\n");
rewind(fp6);
fscanf(fp6,"%s",word1);
while(!feof(fp6))
{
if(strstr(word1,"happiness"))
{
fprintf(fp7,"%s",rep_word);
fputc(' ',fp7);
}
else
{
fprintf(fp7,"%s",word1);
fputc(' ',fp7);
}
fscanf(fp6,"%s",word1);
}
orig_file(fp7);
}

You might also like