Commit 273f5eb0 authored by Markus Krug's avatar Markus Krug
Browse files

kronecker expansion by index, is much faster now

parent d95e3b1c
......@@ -6,6 +6,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
......@@ -70,25 +71,25 @@ public class MatrixUtil {
}
}
// get the features of this point
Set<Point> featuresOfPoints = determineFeaturesForIndex(new Point(entry.column(), entry.row()),
mappings, mappings.size() > 0 ? true : false);
// only keep each feature combination exactly once!
if (uniqueFeatureCombinations.contains(featuresOfPoints)) {
continue;
}
uniqueFeatureCombinations.add(featuresOfPoints);
// furthermore we can filter all those featurecombinations that
// resemble exactly the same instance set
// because the algrotihm can not differ between those
Set<Instance> instancesForFeatureSet = determineInstancesForFeatures(featuresOfPoints, indexMap);
if (uniqueInstance.contains(instancesForFeatureSet)) {
continue;
}
uniqueInstance.add(instancesForFeatureSet);
// // get the features of this point
// Set<Point> featuresOfPoints = determineFeaturesForIndex(new Point(entry.column(), entry.row()),
// mappings, mappings.size() > 0 ? true : false);
//
// // only keep each feature combination exactly once!
// if (uniqueFeatureCombinations.contains(featuresOfPoints)) {
// continue;
// }
// uniqueFeatureCombinations.add(featuresOfPoints);
//
// // furthermore we can filter all those featurecombinations that
// // resemble exactly the same instance set
// // because the algrotihm can not differ between those
// Set<Instance> instancesForFeatureSet = determineInstancesForFeatures(featuresOfPoints, indexMap);
//
// if (uniqueInstance.contains(instancesForFeatureSet)) {
// continue;
// }
// uniqueInstance.add(instancesForFeatureSet);
// finally we can decide to keep our feature for the next
// iteration
......@@ -137,7 +138,7 @@ public class MatrixUtil {
// entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
// }
//logger.info("Best possible coverage: " + maxCoverage);
// logger.info("Best possible coverage: " + maxCoverage);
return matrixMapping;
}
......@@ -158,6 +159,88 @@ public class MatrixUtil {
return intersectionSet;
}
public static MatrixMcMatrixFace performKroneckerExpansionByIndex(List<MatrixMapping> mappings,
Collection<Instance> instances, Map<Point, Set<Instance>> indexMap, int label) {
// to filter out double representants
Set<Set<Instance>> uniqueInstances = new HashSet<>();
// to filter double feaqture combinations
Set<Set<Point>> uniqueFeatures = new HashSet<>();
// map to speed up
Map<Point, Set<Instance>> indexOfActFeatures = new HashMap<>();
// begin expansion
MatrixMapping lastMapping = mappings.get(mappings.size() - 1);
int dimension = lastMapping.getDenseMatrixDimension();
// new Matrix
MatrixMcMatrixFace expandedMatrix = new MatrixMcMatrixFace(dimension * dimension, dimension * dimension, label);
int denseDimension = lastMapping.getDenseMatrixDimension();
FlexCompColMatrix denseInstanceMatrix = new FlexCompColMatrix(denseDimension, denseDimension);
for (Point denseIndices : lastMapping.getInverseMappingMap().keySet()) {
Set<Point> features = lastMapping.getFeaturesForDenseIndex(denseIndices);
denseInstanceMatrix.add(denseIndices.y, denseIndices.x, 1);
Set<Instance> determineInstancesForFeatures = determineInstancesForFeatures(features, indexMap);
determineInstancesForFeatures.retainAll(instances);
indexOfActFeatures.put(denseIndices, determineInstancesForFeatures);
}
outer: for (MatrixEntry e1 : denseInstanceMatrix) {
// iterate over all no sparse elements and expand
for (MatrixEntry e2 : denseInstanceMatrix) {
//
Set<Point> fullFeats = new HashSet<>();
Set<Point> featE1 = lastMapping.getFeaturesForDenseIndex(new Point(e1.column(), e1.row()));
Set<Point> featE2 = lastMapping.getFeaturesForDenseIndex(new Point(e2.column(), e2.row()));
fullFeats.addAll(featE1);
fullFeats.addAll(featE2);
if (uniqueFeatures.contains(fullFeats))
continue;
uniqueFeatures.add(fullFeats);
Set<Instance> fullInstances = new HashSet<>();
Set<Instance> insE1 = indexOfActFeatures.get(new Point(e1.column(), e1.row()));
Set<Instance> insE2 = indexOfActFeatures.get(new Point(e2.column(), e2.row()));
fullInstances.addAll(insE1);
fullInstances.retainAll(insE2);
if (uniqueInstances.contains(fullInstances))
continue;
uniqueInstances.add(fullInstances);
// calculate the new indices
int kroneckerCol = e1.column() * denseDimension + e2.column();
int kroneckerRow = e1.row() * denseDimension + e2.row();
// count tps and fps
int tp = 0;
for (Instance i : fullInstances) {
if (i.getLabel() == label) {
tp++;
}
}
expandedMatrix.getTpMatrix().add(kroneckerRow, kroneckerCol, tp);
expandedMatrix.getFpMatrix().add(kroneckerRow, kroneckerCol, fullInstances.size() - tp);
if (e1.row() == e2.row() && e1.column() == e2.column())
continue outer;
}
}
return expandedMatrix;
}
public static MatrixMcMatrixFace performKroneckerExpansion(List<MatrixMapping> mappings,
Collection<Instance> instances, int label) {
......
......@@ -232,19 +232,19 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
if (maxEntryLocation == null)
break;
// map matrix to dense matrix
long time = System.currentTimeMillis();
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(iterationMatrix, maximumScore,
maxEntryLocation.getLocation(), mappings, indexMap, beamSize);
mappings.add(mappingForMaximum);
//System.out.println(System.currentTimeMillis() - time);
// assert that the maximum is within bounds
assert (maxEntryLocation == null ? true
: maxEntryLocation.getX() < iterationMatrix.getTpMatrix().numColumns() && maxEntryLocation
.getY() < iterationMatrix.getTpMatrix().numRows()) : "Position of maximum out of bounds";
// expand in kronecker fashion
iterationMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
long time = System.currentTimeMillis();
iterationMatrix = MatrixUtil.performKroneckerExpansionByIndex(mappings, instances,indexMap,goldLabel);
System.out.println(System.currentTimeMillis() - time);
// assert that the maximum is growing
assert (maximumScore <= iterationMatrix.getMaximumScore()) : "Maximum decreased within iteration!";
......@@ -257,7 +257,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// determine the best rule that has not been used before! TODO is this
// even necessary???
if (maxEntryLocation == null)
if (maxEntryLocation == null || maximumScore<=1)
return null;
// rcalculate the rule
......
......@@ -42,7 +42,8 @@ public class FirstTest2 {
File doc2 = new File("resources\\Ahlefeld,-Charlotte-von_Erna1421[Lukas].xmi.xmi");
File typesystem = new File("resources\\MiKalliTypesystem.xml");
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
//File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
File korpusFOlder = new File("C:\\Users\\mkrug\\owncloud_neu\\projekt romangeschichte\\named entity recognition\\Goldstandard-Rescaled\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(100);
......@@ -63,8 +64,8 @@ public class FirstTest2 {
"DependencyRelation"),new UIMATypeAndFeatureGenerator("de.uniwue.kalimachos.coref.type.DependencyParse",
"Headname")));
System.out.println("instances: " + instances.size());
if (instances.size() > 10000)
break;
// if (instances.size() > 10000)
// break;
}
int id = 0;
......
......@@ -42,9 +42,10 @@ public class FirstTest3 {
File doc2 = new File("resources\\Ahlefeld,-Charlotte-von_Erna1421[Lukas].xmi.xmi");
File typesystem = new File("resources\\MiKalliTypesystem.xml");
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(100);
// File korpusFOlder = new File(
// "C:\\Users\\mkrug\\owncloud_neu\\projekt romangeschichte\\named entity recognition\\Goldstandard-Rescaled\\output+speech");
File korpusFOlder = new File("C:\\sandbox-Markus\\ner\\testConll");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(200);
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
......@@ -54,14 +55,14 @@ public class FirstTest3 {
continue;
instances.addAll(InstanceCreationFactory.createWindowedInstancesFromUIMA(f, instances.size(), 2, 2,
"de.uniwue.kalimachos.coref.type.POS", tsd,
new NNFeatureGenerator("POSTag"),
new BIO_UIMA_FeatureGen("de.uniwue.kalimachos.coref.type.NamedEntity", "NE"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3),
new SuffixNGenerator(2), new SuffixNGenerator(1), new PrefixNGenerator(1),
new IsUppercaseFeatureGenerator(), new NGramGenerator(), new WordCategorization(),
new LemmaFeatureGenerator("Lemma"),
new UIMATypeAndFeatureGenerator("de.uniwue.kalimachos.coref.type.DependencyParse",
"DependencyRelation"),new UIMATypeAndFeatureGenerator("de.uniwue.kalimachos.coref.type.DependencyParse",
"Headname")));
"DependencyRelation"),
new UIMATypeAndFeatureGenerator("de.uniwue.kalimachos.coref.type.DependencyParse", "Headname")));
System.out.println("instances: " + instances.size());
if (instances.size() > 50000)
break;
......
......@@ -31,8 +31,8 @@ public class FirstTestEACL {
public static void main(String[] args) throws Exception {
File korpusFOlder = new File("X:\\owncloud\\paper_RegelLernen\\xmi_gold_preprocessed");
String uriDkPro = new File("X:\\owncloud\\paper_RegelLernen\\\\TypeSystemDK.xml").toPath()
File korpusFOlder = new File("C:\\Users\\mkrug\\owncloud_neu\\paper_RegelLernen\\xmi_gold_preprocessed");
String uriDkPro = new File("C:\\Users\\mkrug\\owncloud_neu\\paper_RegelLernen\\TypeSystemDK.xml").toPath()
.toUri().toString();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment