Commit 628ae849 authored by mak28ma's avatar mak28ma
Browse files

*Fix der Abbruchbdg

* Einbau eines neuen Filters
parent 78f8a06e
......@@ -9,6 +9,7 @@ import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
......@@ -23,6 +24,7 @@ import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.datastructure.MatrixMapping;
import de.uniwue.ls6.datastructure.MatrixMcMatrixFace;
import no.uib.cipr.matrix.Matrices;
import no.uib.cipr.matrix.MatrixEntry;
import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
......@@ -31,7 +33,7 @@ public class MatrixUtil {
static final Logger logger = LoggerFactory.getLogger(MatrixUtil.class);
public static MatrixMapping getMappingForMaximum(MatrixMcMatrixFace matrixface, int maximum, Point maxEntryLocation,
List<MatrixMapping> mappings, int beamSize) {
List<MatrixMapping> mappings, Map<Point, Set<Instance>> indexMap, int beamSize) {
// totally unugly code not even necessary which makes it worse
int formerMatrixSize = matrixface.getTpMatrix().numRows() == matrixface.getTpMatrix().numColumns()
......@@ -41,8 +43,11 @@ public class MatrixUtil {
// collect all entries that may remain
List<MatrixPoint> entries = new ArrayList<>();
// used to filter unique feature combinations
Set<Set<Point>> uniqueFeatureCombinations = new HashSet<>();
// used to filter all features that refer to the same instances
Set<Set<Instance>> uniqueInstance = new HashSet<>();
// iterate over each feature combination left
for (MatrixEntry entry : matrixface.getTpMatrix()) {
......@@ -64,16 +69,24 @@ public class MatrixUtil {
Set<Point> featuresOfPoints = determineFeaturesForIndex(new Point(entry.column(), entry.row()),
mappings, mappings.size() > 0 ? true : false);
//only keep each feature combination exactly once!
// only keep each feature combination exactly once!
if (uniqueFeatureCombinations.contains(featuresOfPoints)) {
continue;
}
//furthermore we can filter all those featurecombinations that resemble exactly the same instance set
// because our algrotihm can not differ between those
uniqueFeatureCombinations.add(featuresOfPoints);
// furthermore we can filter all those featurecombinations that
// resemble exactly the same instance set
// because the algrotihm can not differ between those
Set<Instance> instancesForFeatureSet = determineInstancesForFeatures(featuresOfPoints, indexMap);
if (uniqueInstance.contains(instancesForFeatureSet)) {
continue;
}
uniqueInstance.add(instancesForFeatureSet);
// finally we can decide to keep our feature for the next
// iteration
double score = entry.get() - matrixface.getFpMatrix().get(entry.row(), entry.column());
entries.add(new MatrixPoint(entry.column(), entry.row(), score, entry.get(),
matrixface.getFpMatrix().get(entry.row(), entry.column())));
......@@ -114,7 +127,7 @@ public class MatrixUtil {
// Point(entry.getX(), entry.getY()),
// mappings, mappings.size() > 0 ? true : false);
// RepresentationRule representationRule = new RepresentationRule(6,
// featuresForDenseIndex, 1, 10);
// featuresForDenseIndex, 1, 10,0);
// System.out.println(entry.getScore() + "\t" + maximum + "\tTP " +
// entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
......@@ -122,6 +135,23 @@ public class MatrixUtil {
return matrixMapping;
}
public static Set<Instance> determineInstancesForFeatures(Set<Point> featuresOfPoints,
Map<Point, Set<Instance>> indexMap) {
// inverted list intersection!
Set<Instance> intersectionSet = new HashSet<>();
for (Point feature : featuresOfPoints) {
Set<Instance> set = indexMap.get(feature);
if (intersectionSet.isEmpty()) {
intersectionSet.addAll(set);
} else {
// keep only the elements that are in both sets
intersectionSet.retainAll(set);
}
}
return intersectionSet;
}
public static MatrixMcMatrixFace performKroneckerExpansion(List<MatrixMapping> mappings,
Collection<Instance> instances, int label) {
......
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
......
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.7
org.eclipse.jdt.core.compiler.source=1.8
......@@ -4,9 +4,11 @@ import java.awt.Point;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
......@@ -23,6 +25,7 @@ import de.uniwue.ls6.datastructure.SimpleLabelling;
import de.uniwue.ls6.rulelearning.algorithm.IRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.util.MatrixPoint;
import de.uniwue.ls6.util.MatrixUtil;
import no.uib.cipr.matrix.Matrices;
public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentationRuleLearningAlgorithm {
......@@ -34,6 +37,8 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
final Logger logger = LoggerFactory.getLogger(BinaryRepresentationRuleLearningAlgorithm.class);
private Map<Point, Set<Instance>> indexMap;
public BinaryRepresentationRuleLearningAlgorithm(int goldLabel, int otherLabel, int beamSize) {
passes = new LinkedList<RulePass>();
this.goldLabel = goldLabel;
......@@ -46,6 +51,9 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
if (instances.length == 0)
throw new IllegalArgumentException("Plz give data to train!");
logger.info("Start to index the instances");
indexInstances(instances);
Set<Instance> instancesForPass = new HashSet<Instance>(Arrays.asList(instances));
int passIndex = 0;
int currentGoldIndex = goldLabel;
......@@ -109,6 +117,29 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
}
private void indexInstances(Instance[] instances) {
indexMap = new HashMap<>();
Arrays.stream(instances).forEach((Instance inst) -> {
for (int i = 0; i < inst.getNrCols(); i++) {
for (int j = 0; j < inst.getNrRows(); j++) {
// create the feature
Point feature = new Point(i, inst.getFeatureArray()[i][j]);
// add to map
if (indexMap.containsKey(feature)) {
indexMap.get(feature).add(inst);
} else {
Set<Instance> set = new HashSet<>();
set.add(inst);
indexMap.put(feature, set);
}
}
}
});
}
private Set<Instance> keepClassifiableInstances(List<RulePass> passes, Set<Instance> instancesForPass,
int currentGoldIndex) {
......@@ -201,23 +232,23 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
if (maxEntryLocation == null)
break;
// map matrix to dense matrix
long time = System.currentTimeMillis();
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(iterationMatrix, maximumScore,
maxEntryLocation.getLocation(), mappings, beamSize);
maxEntryLocation.getLocation(), mappings, indexMap, beamSize);
mappings.add(mappingForMaximum);
//System.out.println(System.currentTimeMillis() - time);
// assert that the maximum is within bounds
assert (maxEntryLocation == null ? true
: maxEntryLocation.getX() < iterationMatrix.getTpMatrix().numColumns() && maxEntryLocation
.getY() < iterationMatrix.getTpMatrix().numRows()) : "Position of maximum out of bounds";
// expand in kronecker fashion
long time = System.currentTimeMillis();
iterationMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
// System.out.println(System.currentTimeMillis() - time);
// assert that the maximum is growing
assert (maximumScore <= iterationMatrix.getMaximumScore()) : "Maximum decreased within iteration!";
if (!betterRuleCanBeLearned(maximumScore,iterationMatrix)) {
if (!betterRuleCanBeLearned(mappings)) {
mappings.remove(mappingForMaximum);
break;
}
......@@ -252,11 +283,30 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
private boolean betterRuleCanBeLearned(MatrixMapping lastMapping, MatrixMapping newMapping) {
Set<Set<Point>> lastFeatures = new HashSet<Set<Point>>(lastMapping.getDenseIndexToFeaturesMapping().values());
Set<Set<Point>> newFeatures = new HashSet<Set<Point>>(newMapping.getDenseIndexToFeaturesMapping().values());
// TODO: is this really all we need?
// if no new feature combination was added we can be sure to have
// finished
if (lastFeatures.equals(newFeatures)) {
return false;
}
return true;
// compare the feature sets "deep" that means we convert the features to
// instances and compare on instances!
Set<Set<Instance>> lastMappingInstanceSets = new HashSet<>();
for (Set<Point> lastFeats : lastMapping.getDenseIndexToFeaturesMapping().values()) {
lastMappingInstanceSets.add(MatrixUtil.determineInstancesForFeatures(lastFeats, indexMap));
}
//perform deep equals
for(Set<Point> newFeats : newMapping.getDenseIndexToFeaturesMapping().values()){
if(!lastMappingInstanceSets.contains(MatrixUtil.determineInstancesForFeatures(newFeats, indexMap))){
return true;
}
}
//System.out.println("better");
//System.out.println(lastMapping.getInverseMappingMap().size() + "\t" + newMapping.getInverseMappingMap().size());
return false;
}
......@@ -289,8 +339,8 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
RepresentationRule rule = pass.apply(instanceToClassify);
if (rule != null) {
predictedLabel = pass.getLabel();
//score += rule.getUniquenessScore();
score = rule.getPrecision();
// score += rule.getUniquenessScore();
score = rule.getPrecision();
} else {
break;
}
......
......@@ -35,7 +35,7 @@ public class FirstTest {
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(250);
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(1000);
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
......
......@@ -35,7 +35,7 @@ public class FirstTest2 {
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(500);
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(1500);
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
......
......@@ -68,7 +68,7 @@ public class FirstTestBinary {
String evaluateToString = new LabelAccuracyEvaluation()
.evaluateToString(goldLabels.toArray(new ALabelling[0]), systemLabels.toArray(new ALabelling[0]));
System.out.println(evaluateToString);
break;
//break;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment