Commit e1a1273c authored by Markus Krug's avatar Markus Krug
Browse files

Index is fucking slower !

parent 32a4579c
......@@ -48,6 +48,12 @@ public class Instance {
public int[][] getFeatureArray() {
return featureArray;
}
public int getId() {
return id;
}
// this could be made faster by either a change of representation
// (Map<int->Set or even by calculating the array position by a hash
......
......@@ -126,4 +126,40 @@ public class MatrixMcMatrixFace {
return sb.toString();
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((fpMatrix == null) ? 0 : fpMatrix.hashCode());
result = prime * result + goldLabel;
result = prime * result + ((tpMatrix == null) ? 0 : tpMatrix.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
MatrixMcMatrixFace other = (MatrixMcMatrixFace) obj;
if (fpMatrix == null) {
if (other.fpMatrix != null)
return false;
} else if (!fpMatrix.equals(other.fpMatrix))
return false;
if (goldLabel != other.goldLabel)
return false;
if (tpMatrix == null) {
if (other.tpMatrix != null)
return false;
} else if (!tpMatrix.equals(other.tpMatrix))
return false;
return true;
}
}
......@@ -6,15 +6,17 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.datastructure.MatrixMapping;
import de.uniwue.ls6.datastructure.MatrixMcMatrixFace;
import jdk.nashorn.internal.runtime.arrays.IteratorAction;
import no.uib.cipr.matrix.Matrices;
import no.uib.cipr.matrix.MatrixEntry;
import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
......@@ -37,34 +39,33 @@ public class MatrixUtil {
double sum = entry.get();
if (sum >= maximum) {
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
//save the score
double score = entry.get() -
matrixface.getFpMatrix().get(entry.row(), entry.column());
//is this legit?
// save the score
double score = entry.get() - matrixface.getFpMatrix().get(entry.row(), entry.column());
// is this legit?
entry.set(score);
entries.add(entry);
}
}
if(matrixMapping.getDenseMatrixDimension()>50){
if (matrixMapping.getDenseMatrixDimension() > 50) {
System.out.println("Too many possible features! We restrict to the best 2500");
matrixMapping = new MatrixMapping(formerMatrixSize);
//sort
Collections.sort(entries,new Comparator<MatrixEntry>() {
// sort
Collections.sort(entries, new Comparator<MatrixEntry>() {
@Override
public int compare(MatrixEntry o1, MatrixEntry o2) {
return (int) (o1.get()-o2.get());
return (int) (o1.get() - o2.get());
}
});
//add the top 2500 to the mapping
for(MatrixEntry entry : entries){
matrixMapping.addEntry(new Point(entry.column(),entry.row()));
// add the top 2500 to the mapping
for (MatrixEntry entry : entries) {
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
}
}
// infer the -> righthandside
matrixMapping.inferDenseMapValues();
......@@ -81,25 +82,124 @@ public class MatrixUtil {
label);
// populate the matrix with the dataset this is expensive!
instances.parallelStream().forEach((Instance inst)->{
instances.parallelStream().forEach((Instance inst) -> {
// expand
FlexCompColMatrix expandedInstance = inst.expand(mappings);
//TODO can we use something faster here... currently this results in a speedup of 3...
// TODO can we use something faster here... currently this results
// in a speedup of 3...
synchronized (expandedMatrixFace) {
// add to kronecker
if (inst.getLabel() == label) {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
} else {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getFpMatrix(), expandedInstance);
}
// add to kronecker
if (inst.getLabel() == label) {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
} else {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getFpMatrix(), expandedInstance);
}
}
});
return expandedMatrixFace;
}
public static MatrixMcMatrixFace performKroneckerExpansionWithIndex(List<MatrixMapping> mappings,
Collection<Instance> instances, int label, Map<Point, Set<Instance>> index) {
MatrixMapping lastMapping = mappings.get(mappings.size() - 1);
int dimension = lastMapping.getDenseMatrixDimension();
MatrixMcMatrixFace expandedMatrixFace = new MatrixMcMatrixFace(dimension * dimension, dimension * dimension,
label);
ExecutorService executor = Executors.newFixedThreadPool(8);
// for each entry of the new matrix do...
for (int col = 0; col < dimension*dimension; col++) {
for (int row = 0; row < dimension*dimension; row++) {
final int column = col;
final int rowFin = row;
Runnable runnable = new Runnable() {
@Override
public void run() {
// recalculate this entry to feature points
// first is to revert the kronecker
List<Point> reversedfeatures = new ArrayList<>();
reversedfeatures.add(new Point(column, rowFin));
revertKroneckerExpansion(lastMapping, reversedfeatures);
// if the mapping does not contain one of those points
// we can
// not assume to find anything
for (Point p : reversedfeatures) {
if (!lastMapping.getInverseMappingMap().containsKey(p))
return;
}
// now determine the features
Set<Point> features = new HashSet<>();
features.addAll(determineFeaturesForIndex(reversedfeatures.get(0), mappings));
features.addAll(determineFeaturesForIndex(reversedfeatures.get(1), mappings));
// now we can access our index and get all instances
// that have
// exactly this feature
Set<Instance> goodSet = new HashSet<>();
for (Point feature : features) {
Set<Instance> set = index.get(feature);
// theres no need for instances to even exist
if (set == null) {
goodSet.clear();
break;
}
if (!goodSet.isEmpty()) {
goodSet.retainAll(set);
} else {
// keep only those that are already contained in
// goodset
for (Instance i : set) {
if (instances.contains(i)) {
goodSet.add(i);
}
}
}
}
// increase the matrix accordingly
// add to kronecker
int tps = 0;
int fps = 0;
for (Instance i : goodSet) {
if (i.getLabel() == label)
tps++;
else {
fps++;
}
}
if (tps == 0 && fps == 0)
return;
synchronized (expandedMatrixFace) {
expandedMatrixFace.getTpMatrix().set(rowFin, column, tps);
expandedMatrixFace.getFpMatrix().set(rowFin, column, fps);
}
}
};
executor.execute(runnable);
}
}
executor.shutdown();
while (!executor.isTerminated()) {
}
return expandedMatrixFace;
}
public static List<Point> determineFeaturesForIndex(Point index, List<MatrixMapping> mappings) {
List<Point> reversedfeatures = new ArrayList<Point>(Arrays.asList(new Point[] { index }));
......
......@@ -4,9 +4,11 @@ import java.awt.Point;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import de.uniwue.ls6.algorithm.datastructure.RepresentationRule;
......@@ -18,13 +20,13 @@ import de.uniwue.ls6.datastructure.MatrixMapping;
import de.uniwue.ls6.datastructure.MatrixMcMatrixFace;
import de.uniwue.ls6.rulelearning.algorithm.IRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.util.MatrixUtil;
import no.uib.cipr.matrix.MatrixEntry;
public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentationRuleLearningAlgorithm {
List<RulePass> passes;
private int goldLabel;
private int otherLabel;
private Map<Point, Set<Instance>> index;
public BinaryRepresentationRuleLearningAlgorithm(int goldLabel, int otherLabel) {
passes = new LinkedList<RulePass>();
......@@ -44,6 +46,10 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
System.out.println("Train binary classifier: " + "Goldlabel: " + LabelAlphabet.getFeatureToId(goldLabel)
+ "\tvs\t" + LabelAlphabet.getFeatureToId(otherLabel));
System.out.println("Amount distinct features: " + LabelAlphabet.getSize());
System.out.println("Start indexing for " + instances.length + " instances...");
createIndex(instances);
System.out.println("Finished creating instance!");
while (morePasses(instancesForPass, currentGoldIndex)) {
// update the learning objective
if (passIndex % 2 == 0) {
......@@ -60,8 +66,9 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
amountGoldInstances++;
}
System.out.println("Labeldistribution (Other;Gold) (" + (instancesForPass.size() - amountGoldInstances) + ";"
+ amountGoldInstances + ") " + "; " + amountGoldInstances / instancesForPass.size() * 100 + "%");
System.out.println("Labeldistribution (Other;Gold) (" + (instancesForPass.size() - amountGoldInstances)
+ ";" + amountGoldInstances + ") " + "; " + amountGoldInstances / instancesForPass.size() * 100
+ "%");
// create a new pass
learnRulePass(currentGoldIndex, instancesForPass);
......@@ -72,6 +79,26 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
}
private void createIndex(Instance[] instances) {
index = new HashMap<Point, Set<Instance>>();
for (Instance i : instances) {
for (int row = 0; row < i.getNrRows(); row++) {
for (int col = 0; col < i.getNrCols(); col++) {
Point p = new Point(col, i.getFeatureArray()[col][row]);
if (index.containsKey(p)) {
index.get(p).add(i);
} else {
Set<Instance> intList = new HashSet<Instance>();
intList.add(i);
index.put(p, intList);
}
}
}
}
}
private Set<Instance> keepClassifiableInstances(List<RulePass> passes, Set<Instance> instancesForPass) {
Set<Instance> remainingInstances = new HashSet<Instance>();
......@@ -157,7 +184,16 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
.containsKey(maxEntryLocation)) : "Mapping does not contain maximum point";
// expand in kronecker fashion
bestMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
long time = System.currentTimeMillis();
bestMatrix = MatrixUtil.performKroneckerExpansionWithIndex(mappings, instances, goldLabel,index);
// MatrixMcMatrixFace other = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
// if(!other.equals(bestMatrix)){
// System.out.println("BUG!");
// System.out.println(MatrixUtil.prettyMatrixFormat(bestMatrix.getTpMatrix()));
// System.out.println();
// System.out.println(MatrixUtil.prettyMatrixFormat(other.getTpMatrix()));
// }
System.out.println(System.currentTimeMillis()-time);
if (!betterRuleCanBeLearned(maximumScore, bestMatrix)) {
mappings.remove(mappingForMaximum);
......
......@@ -26,6 +26,7 @@ public class FirstTest {
File document = new File("resources\\Aston,-Louise__Lydia.xmi.xmi.xmi");
File doc2 = new File("resources\\Ahlefeld,-Charlotte-von_Erna1421[Lukas].xmi.xmi");
File typesystem = new File("resources\\MiKalliTypesystem.xml");
File bigDoc = new File("C:\\Users\\mkrug\\annoTest\\TestProject\\output\\hp5_utf8.txt.xmi");
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
......@@ -34,7 +35,7 @@ public class FirstTest {
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 2, 2,
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 5, 5,
"de.uniwue.kalimachos.coref.type.POS", tsd, new NNFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3), new SuffixNGenerator(2),
new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment