You are on page 1of 3

public ArrayList[] hitungTF() { ArrayList[] tf = new ArrayList[2]; tf[0] = new ArrayList(); tf[1] = new ArrayList();

// key // value

foreach (String word in termList) { if (tf[0].Contains(word)) { int idx = tf[0].IndexOf(word); tf[1][idx] = (int)tf[1][idx] + 1; } else { tf[0].Add(word); tf[1].Add(1); } } return tf; } public void insertTFToDB(ArrayList[] tf, String fileName, String className) { // memasukkan term frequency ke database db.executeNonQuery("ALTER TABLE `tb_tf` ADD COLUMN `"+fileName+ "` SMALLINT UNSIGNED NOT NULL DEFAULT '0';"); db.executeNonQuery("ALTER TABLE `tb_vector_w` ADD COLUMN `" + fileName + "` DOUBLE NOT NULL DEFAULT '0.0';"); db.executeNonQuery("INSERT INTO `tb_cat_doc` VALUES('"+fileName+"','"+className+"');"); for (int i = 0; i < tf[0].Count; i++) { if (db.hasRows("SELECT * FROM `tb_term` WHERE `term`='" + tf[0][i] + "';")) { db.executeNonQuery("UPDATE `tb_tf` SET `"+fileName+"`="+tf[1][i]+ " WHERE `term`='"+tf[0][i]+"';"); } else { db.executeNonQuery("INSERT INTO `tb_term` VALUES('" + tf[0][i] + "');"); db.executeNonQuery("INSERT INTO `tb_tf` (`term`, `"+fileName+ "`) VALUES('"+tf[0][i]+"', "+tf[1][i]+");"); db.executeNonQuery("INSERT INTO `tb_vector_w` (`term`) VALUES('" + tf[0][i] + "');"); } } } public void hitungTermWeightDokLatih() { ArrayList[] document = db.executeReader("SELECT `no_doc` FROM `tb_cat_doc`;"); ArrayList[] term = db.executeReader("SELECT `term` FROM `tb_term` ORDER BY `term` ASC;"); for (int i = 0; i < document[0].Count; i++) { Double sumTfIdf = 0.0; for (int j = 0; j < term[0].Count; j++) { ArrayList[] tmpTF = db.executeReader("SELECT `"+document[0][i]+ "` FROM `tb_tf` WHERE `term`='"+term[0][j]+"';");

Double tfIdf = 0.0; if (!tmpTF[0][0].ToString().Equals("0")) { int sumNi = 0; for (int k = 0; k < document[0].Count; k++) { ArrayList[] tmpNi = db.executeReader("SELECT `" + document[0][k] + "` FROM `tb_tf` WHERE `term`='" + term[0][j] + "';"); if (!tmpNi[0][0].ToString().Equals("0")) sumNi++; } tfIdf = (1 + Math.Log10(Double.Parse(tmpTF[0][0].ToString()))) * Math.Log10((double)document[0].Count / sumNi); } sumTfIdf += (tfIdf * tfIdf); // insert to table tb_vector_w db.executeNonQuery("UPDATE `tb_vector_w` SET `" + document[0][i] + "`=" + tfIdf + " WHERE `term`='" + term[0][j] + "';"); } setIncrementProgressBar(3); sumTfIdf = Math.Sqrt(sumTfIdf); // update table tb_vector_w for (int j = 0; j < term[0].Count; j++) { db.executeNonQuery("UPDATE `tb_vector_w` SET `" + document[0][i] + "`=`" + document[0][i] +"`/" + sumTfIdf + " WHERE `term`='" + term[0][j] + "';"); } setIncrementProgressBar(2); } } public ArrayList[] hitungTermWeightDokUji(ArrayList[] tf) { ArrayList[] termWeight = new ArrayList[2]; termWeight[0] = new ArrayList(); // key = term termWeight[1] = new ArrayList(); // value = w ArrayList[] document = db.executeReader("SELECT `no_doc` FROM `tb_cat_doc`;"); ArrayList[] term = db.executeReader("SELECT `term` FROM `tb_term` ORDER BY `term` ASC;"); Double sumTfIdf = 0.0; for (int i = 0; i < tf[0].Count; i++) { if (term[0].Contains(tf[0][i].ToString())) { int sumNi = 0; for (int k = 0; k < document[0].Count; k++) { ArrayList[] tmpNi = db.executeReader("SELECT `" + document[0][k] + "` FROM `tb_tf` WHERE `term`='" + tf[0][i] + "';"); if (!tmpNi[0][0].ToString().Equals("0")) sumNi++; } Double tfIdf = (1 + Math.Log10(Double.Parse(tf[1][i].ToString()))) * Math.Log10((double)document[0].Count / sumNi);

termWeight[0].Add(tf[0][i]); termWeight[1].Add(tfIdf); sumTfIdf += (tfIdf * tfIdf); } } sumTfIdf = Math.Sqrt(sumTfIdf); for (int i = 0; i < termWeight[0].Count; i++) { termWeight[1][i] = (Double)termWeight[1][i] / sumTfIdf; } return termWeight; } public void hitungSIM(ArrayList[] termWeight) { ArrayList[] document = db.executeReader("SELECT `no_doc`, `category` FROM `tb_cat_doc`;"); ArrayList[] term = db.executeReader("SELECT `term` FROM `tb_term` ORDER BY `term` ASC;"); for (int i = 0; i < document[0].Count; i++) { Double simXdj = 0.0; Double simXdj2 = 0.0; for (int j = 0; j < termWeight[0].Count; j++) { ArrayList[] tmpDj = db.executeReader("SELECT `" + document[0][i] + "` FROM `tb_vector_w` WHERE `term`='" + termWeight[0][j] + "';"); simXdj += (Double)termWeight[1][j] * Double.Parse(tmpDj[0][0].ToString()); simXdj2 += Math.Pow((Double)termWeight[1][j], 2) * Math.Pow(Double.Parse(tmpDj[0][0].ToString()), 2); } simXdj = simXdj / Math.Sqrt(simXdj2); SIM.Add(new SIM(simXdj, document[0][i].ToString())); for (int k = 0; k < SIM.Count-1; k++) { for (int j = k + 1; j < SIM.Count; j++) { if (SIM[k] < SIM[j]) { SIM tmp = SIM[k]; SIM[k] = SIM[j]; SIM[j] = tmp; } } } if (SIM.Count > kValue) { SIM.RemoveAt(kValue); } } }

You might also like