任何人都可以帮助解决为什么我的程序运行缓慢以处理650个文档,需要130分钟,以及如何使该程序具有更好的预处理性能 这里的主要班级****
public static void main(String[] args) throws IOException, BiffException, Exception {
ArrayList<dataset> dokumen = new ArrayList<>();
ArrayList<String> term = new ArrayList<>();
TA_baru t = new TA_baru();
Himpunan_metoda_pembobotan hmp;
dokumen = t.bacatxt(dokumen);//reading dokumen
term = t.doktermlist(dokumen);//split it into term
hmp = new Himpunan_metoda_pembobotan(dokumen, term);
hmp.tulistf();
hmp.tulisidf();
}
public ArrayList<String> doktermlist(ArrayList<dataset> tt) throws IOException, BiffException {
ArrayList<String> tmp = new ArrayList<>();
Set<String> a = new HashSet<>();
for (int i = 0; i < tt.size(); i++) {
//tt.get(i).ubah_term();
tmp = tt.get(i).getTerm();
for (int j = 0; j < tmp.size(); j++) {
a.add(tmp.get(j));
}
}
//tmp.clear();
tmp = new ArrayList<String>(a);
return tmp;
}
public ArrayList<dataset> bacatxt(ArrayList<dataset> x) throws IOException, BiffException {
x.clear();
File folder = new File("C:\\Users\\burhan\\Documents\\NetBeansProjects\\TA_baru\\data");
File[] listOfFiles = folder.listFiles();
dataset y;
for (int i = 0; i < listOfFiles.length; i++) {
File file = listOfFiles[i];
if (file.isFile() && file.getName().endsWith(".txt")) {
String content = FileUtils.readFileToString(file);
y = new dataset(file.getName(), content);
x.add(y);
}
}
return x;
}
这里有班级himpunan_metoda pembobotan
public class Himpunan_metoda_pembobotan {
ArrayList<dataset> dokumen = new ArrayList<>(), positif = new ArrayList<>(), negatif = new ArrayList<>();
ArrayList<String> term = new ArrayList<>();
ArrayList<Nilai> nabcd = new ArrayList<>();
double a, b, c, d;
public Himpunan_metoda_pembobotan(ArrayList<dataset> dok, ArrayList<String> term) {
dokumen = dok;
this.term = term;
for (int i = 0; i < dok.size(); i++) {
for (int j = 0; j < term.size(); j++) {
abcd(term.get(j), dokumen.get(i));
}//this loop for assign value to nabcd
System.out.println(i);
}
}
public int jumlah_kategori() {
Set setkategori = new HashSet();
for (int i = 0; i < dokumen.size(); i++) {
setkategori.add(dokumen.get(i).getKategori());
}
return setkategori.size();
}
public int jumlah_dokumen_terdapat_term(String x, ArrayList<dataset> y) {
int a = 0;
for (int i = 0; i < y.size(); i++) {
if (y.get(i).getTeks().contains(x)) {
a++;
}
}
return a;
}
public void clear() {
positif.clear();
negatif.clear();
a = 0;
b = 0;
c = 0;
d = 0;
}
public void carinilaiabcd(String tem,String kategori ){
for (int i = 0; i < nabcd.size(); i++) {
if (nabcd.get(i).getKata().equals(tem)&&nabcd.get(i).getKategori().equals(kategori)) {
a=nabcd.get(i).getA();
b=nabcd.get(i).getB();
c=nabcd.get(i).getC();
d=nabcd.get(i).getD();
}
}
}
public void abcd(String x, dataset y) {
Nilai n;
clear(); //this looping make run slow, and how the solution for this problem
for (int i = 0; i < dokumen.size(); i++) {
if (dokumen.get(i).getKategori().equals(y.getKategori())) {
positif.add(dokumen.get(i));
} else {
negatif.add(dokumen.get(i));
}
} //separate beetween negative and positif category
for (int i = 0; i < positif.size(); i++) {
if (!positif.get(i).carikata(x)) {
a++;
}
if (positif.get(i).carikata(x)) {
b++;
}
}
for (int i = 0; i < negatif.size(); i++) {
if (negatif.get(i).carikata(x)) {
c++;
}
if (!negatif.get(i).carikata(x)) {
d++;
}
}//assign value a, b, c,d to search value idf, tf,
n = new Nilai(a, b, c, d, x, y.getKategori()); //assign nilai
n.setIdf(idf(n));
n.setTf(tf(x, y));
nabcd.add(n); //add nilai into arraylist
}
public double tf(String term, dataset data) {
return data.jumlah_term(term);
}
public double itf(String term, dataset data) {
double x=0;
x=1-(1/(1+tf(term, data)));
return x;
}
public double rf(String term, Nilai n) {
//abcd(term, data);
double x = 0;
if (n.getC() < 1) {
x = Math.log(2 + (n.b / 1));
} else if (n.getC() > 0) {
x = Math.log(2 + (n.b / n.c));
}
return x;
}
public double tfrf(String term, Nilai n,dataset data) {
//abcd(term, data);
double x = 0;
x=tf(term, data)*rf(term, n);
return x;
}
public double idf(Nilai n) {
double x;
x = Math.log10(dokumen.size() / (n.b + n.c));
return x;
}
public double tf_idf(String term, dataset data,Nilai n) {
double x = 0;
x = tf(term, data) * idf(n);
return x;
}
public double x2(String term, dataset data) {
abcd(term, data);
double x;
x = dokumen.size() * (((a * b) - (c * d)) / ((a + d) * (b + c) * (a + b) * (c + d)));
return x;
}
public double OR(String term, dataset data) {
abcd(term, data);
double x;
x = Math.log((b * d) / (a * b));
return x;
}
public double IG(String term, dataset data) {
abcd(term, data);
double x;
double n = dokumen.size();
x = ((-(b + a) / n) * Math.log((b + a) / n)) - ((((c + d) / n) * Math.log((c + d) / n)))
+ ((b / n) * Math.log(b / (b + c))) + ((c / n) * Math.log(c / (b + c)))
+ ((a / n) * Math.log(a / (a + d))) + ((d / n) * Math.log(d / (d + a)));
return x;
}
public double ngl(String term, dataset data) {
abcd(term, data);
double g;
g = ((Math.sqrt(dokumen.size()) * ((a * d) - (b * c)))) / (Math.sqrt((b + c) * (a + d) * positif.size() * negatif.size()));
return g;
}
public double gss(String term, dataset data) {
abcd(term, data);
double g;
g = (a * d) - (b * c);
return g;
}
public double iqf(String term, dataset data) {
abcd(term, data);
double g;
g = Math.log(dokumen.size() / (b + c));
return g;
}
public double qf(String term, dataset data) {
abcd(term, data);
double g;
g = Math.log(b + 1);
return g;
}
public double vrf(String term, dataset data) {
abcd(term, data);
double g;
g = Math.log(b + 1) / Math.log(a + 1);
return g;
}
private double dia(String term, ArrayList<String> dok) {
double z, n = 0;
for (int i = 0; i < dok.size(); i++) {
String temp = dok.get(i).toLowerCase();
if (temp.contains(term)) {
n++;
}
}
z = n / (double) dok.size();
return z;
}
public void tulisidf() {
try {
FileWriter writer = new FileWriter("hasil_iidf.csv");
try (FileWriter out = new FileWriter(new File("hasil_iidf.csv"))) {
out.write("dokumen_data");
for (int i = 0; i < term.size(); i++) {
out.write("," + term.get(i));
}
out.write("," + "Class");
out.write("\n");
int gap=term.size();
int n=0,j;
System.out.println(nabcd.size());
System.out.println(term.size());
System.out.println(nabcd.get(0).getIdf());
for (int i = 0; i < dokumen.size(); i++) {
out.write("dokumen_" + i);
for ( j=n; j < gap; j++) {
out.write("," + nabcd.get(j).getIdf());
}
n=gap;
gap=gap+term.size();
//n=j+1;
out.write("," + dokumen.get(i).getKategori());
out.write("\n");
}
out.close();
}
writer.flush();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void tulistf() { //to write tf into csv
try {
FileWriter writer = new FileWriter("hasil_tf.csv");
try (FileWriter out = new FileWriter(new File("hasil_tf.csv"))) {
out.write("dokumen_data");
for (int i = 0; i < term.size(); i++) {
out.write("," + term.get(i));
}
out.write("," + "Class");
out.write("\n");
int gap=term.size();
int n=0,j;
System.out.println(nabcd.size());
System.out.println(term.size());
System.out.println(nabcd.get(0).getIdf());
for (int i = 0; i < dokumen.size(); i++) {
out.write("dokumen_" + i);
for ( j=n; j < gap; j++) {
out.write("," + nabcd.get(j).getTf());
}
n=gap;
gap=gap+term.size();
//n=j+1;
out.write("," + dokumen.get(i).getKategori());
out.write("\n");
}
out.close();
}
writer.flush();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}