How to run my personal cluster algorithm (java code) inside apache spark -
i have code other student it's k-medoids clustring algorithm, , should run inside apache spark, know spark based in rdd, algorithm complete , not know how change rdd's example part of code
public queue<arraylist<object>> getfswoosh() { queue<arraylist<object>> resultat = new linkedlist(); map<string, map<string, arraylist<object>>> p = new hashmap(); map<string, set<string>> n = new hashmap(); arraylist<object> courant = null; arraylist<object> buddy; arraylist<object> doc; arraylist<object> fusion; // preparation of sets "p" , "n" (reglesimilarite regle : this.regles) { string feature = regle.getsimilarites().get(0).getattribut(); (int = 1; < regle.getsimilarites().size(); i++) { feature += "+" + regle.getsimilarites().get(i).getattribut(); } p.put(feature, new hashmap()); n.put(feature, new hashset()); } while (!liste.isempty() || courant != null) { if (courant == null) { courant = liste.remove(); } buddy = null; // new feature values registered (feature, value) -> null (string feature : p.keyset()) { //system.out.println("verification de nouveaux documents"); string valeur = pfv(feature, courant); if (p.get(feature).get(valeur) == null) { p.get(feature).put(valeur, courant); //system.out.println("nouvelle valeur rencontrée"); } } // if value has been encountered (feature, value) -> (document! = current) (string feature : p.keyset()) { //system.out.println("verification de p s'il existe deja un document!=courant"); string valeur = pfv(feature, courant); if (p.get(feature).get(valeur) != courant) { buddy = p.get(feature).get(valeur); //system.out.println("valeur trouvé dans p !!"); } } // if not find match in p, in '(result) if (buddy == null) { int index = 0; // indice de la regle similarite = ordre de la feature !? ( tester ) (string feature : p.keyset()) { //system.out.println("verification pour match "+feature+"=="+this.regles.get(index)); string valeur_feature = pfv(feature, courant); if (!n.get(feature).contains(valeur_feature)) { if (!resultat.isempty()) { iterator<arraylist<object>> = resultat.iterator(); while (it.hasnext()) { doc = it.next(); if (this.matchfswoosh(doc, courant, index)) { buddy = doc; break; } } } if (buddy == null) { n.get(feature).add(valeur_feature); } } index++; } } if (buddy == null) { resultat.add(courant); courant = null; } else { //system.out.println("fusion de "+courant+" "+buddy); fusion = this.merge(courant, buddy); resultat.remove(buddy); (string feature : p.keyset()) { (string valeur : p.get(feature).keyset()) { if (p.get(feature).get(valeur) == courant || p.get(feature).get(valeur) == buddy) { p.get(feature).put(valeur, fusion); } } } courant = fusion; } } return resultat; }
there way run code inside spark there, if not there way run complex code in rdds please if there other suggestion thank you
Comments
Post a Comment