Commit 78b022b4 authored by Markus Krug's avatar Markus Krug
Browse files

Merge branch 'master' into 'dev_mk'

# Conflicts:
#   de.uniwue.ls6.rulelearning/RuleLearning/src/de/uniwue/ls6/rulelearning/algorithm/impl/BinaryRepresentationRuleLearningAlgorithm.java
parents bd65e3d1 f145d091
......@@ -15,6 +15,7 @@ import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.StreamSupport;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -31,7 +32,7 @@ public class MatrixUtil {
static final Logger logger = LoggerFactory.getLogger(MatrixUtil.class);
public static MatrixMapping getMappingForMaximum(MatrixMcMatrixFace matrixface, int maximum, Point maxEntryLocation,
List<MatrixMapping> mappings, Map<Point, Set<Instance>> indexMap, int beamSize) {
List<MatrixMapping> mappings, Set<Instance> instances, Map<Point, Set<Instance>> indexMap, int beamSize) {
// totally unugly code not even necessary which makes it worse
int formerMatrixSize = matrixface.getTpMatrix().numRows() == matrixface.getTpMatrix().numColumns()
......@@ -49,6 +50,12 @@ public class MatrixUtil {
// used to filter all features that refer to the same instances
Set<Set<Instance>> uniqueInstance = new HashSet<>();
// iterate over each feature combination left
// another map to speed up
Map<Point, Integer> cntMap = new HashMap<>();
for (MatrixEntry entry : matrixface.getTpMatrix()) {
Point p = new Point((int) entry.get(), (int) (matrixface.getFpMatrix().get(entry.row(), entry.column())));
cntMap.put(p, cntMap.get(p) == null ? 1 : cntMap.get(p) + 1);
}
for (MatrixEntry entry : matrixface.getTpMatrix()) {
double amountTP = entry.get();
......@@ -71,7 +78,6 @@ public class MatrixUtil {
// // get the features of this point
Set<Point> featuresOfPoints = determineFeaturesForIndex(new Point(entry.column(), entry.row()),
mappings, mappings.size() > 0 ? true : false);
// only keep each feature combination exactly once!
if (uniqueFeatureCombinations.contains(featuresOfPoints)) {
continue;
......@@ -82,12 +88,16 @@ public class MatrixUtil {
// that
// resemble exactly the same instance set
// because the algrotihm can not differ between those
Set<Instance> instancesForFeatureSet = determineInstancesForFeatures(featuresOfPoints, indexMap);
if (uniqueInstance.contains(instancesForFeatureSet)) {
continue;
Point p = new Point((int) entry.get(),
(int) (matrixface.getFpMatrix().get(entry.row(), entry.column())));
if (cntMap.get(p) != null && cntMap.get(p) > 1) {
Set<Instance> instancesForFeatureSet = determineInstancesForFeatures(featuresOfPoints, instances,
indexMap);
if (uniqueInstance.contains(instancesForFeatureSet)) {
continue;
}
uniqueInstance.add(instancesForFeatureSet);
}
uniqueInstance.add(instancesForFeatureSet);
// finally we can decide to keep our feature for the next
// iteration
......@@ -96,7 +106,6 @@ public class MatrixUtil {
matrixface.getFpMatrix().get(entry.row(), entry.column())));
}
}
if (entries.size() > beamSize) {
logger.warn("Too many possible features! We restrict to the best " + beamSize
+ " Amount of potentially good features " + entries.size());
......@@ -141,12 +150,13 @@ public class MatrixUtil {
return matrixMapping;
}
public static Set<Instance> determineInstancesForFeatures(Set<Point> featuresOfPoints,
public static Set<Instance> determineInstancesForFeatures(Set<Point> featuresOfPoints, Set<Instance> instances,
Map<Point, Set<Instance>> indexMap) {
// inverted list intersection!
List<Set<Instance>> instanceSetList = new ArrayList<>();
Set<Instance> intersectionSet = new HashSet<>();
instanceSetList.add(instances);
Set<Instance> smallestSet = null;
for (Point feature : featuresOfPoints) {
Set<Instance> set = indexMap.get(feature);
......@@ -156,6 +166,7 @@ public class MatrixUtil {
else if (set.size() < smallestSet.size())
smallestSet = set;
}
outer: for (Instance i : smallestSet) {
for (Set<Instance> set : instanceSetList) {
if (!set.contains(i)) {
......@@ -192,7 +203,7 @@ public class MatrixUtil {
for (Point denseIndices : lastMapping.getInverseMappingMap().keySet()) {
Set<Point> features = lastMapping.getFeaturesForDenseIndex(denseIndices);
denseInstanceMatrix.add(denseIndices.y, denseIndices.x, 1);
Set<Instance> determineInstancesForFeatures = determineInstancesForFeatures(features, indexMap);
Set<Instance> determineInstancesForFeatures = determineInstancesForFeatures(features, null, indexMap);
determineInstancesForFeatures.retainAll(instances);
indexOfActFeatures.put(denseIndices, determineInstancesForFeatures);
}
......@@ -259,8 +270,8 @@ public class MatrixUtil {
int kroneckerDim = dimension * dimension;
// add the forbidden expansion indexes
//does not speed up
//addForbiddenIndexesToMapping(lastMapping, dimension);
// does not speed up
// addForbiddenIndexesToMapping(lastMapping, dimension);
Supplier<MatrixMcMatrixFace> matrixConstructor = () -> new MatrixMcMatrixFace(kroneckerDim, kroneckerDim,
label);
......
......@@ -239,7 +239,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
return false;
}
private RepresentationRule learnRule(int goldLabel, Collection<Instance> instances, MatrixMcMatrixFace bestMatrix) {
private RepresentationRule learnRule(int goldLabel, Set<Instance> instances, MatrixMcMatrixFace bestMatrix) {
List<MatrixMapping> mappings = new ArrayList<MatrixMapping>();
MatrixMcMatrixFace iterationMatrix = bestMatrix;
......@@ -258,7 +258,8 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// map matrix to dense matrix
long time = System.currentTimeMillis();
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(iterationMatrix, maximumScore,
maxEntryLocation.getLocation(), mappings, indexMap, beamSize);
maxEntryLocation.getLocation(), mappings,instances, indexMap, beamSize);
System.out.println("Map:"+(System.currentTimeMillis() - time));
mappings.add(mappingForMaximum);
System.out.println(System.currentTimeMillis()-time);
time = System.currentTimeMillis();
......@@ -270,11 +271,11 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// expand in kronecker fashion
iterationMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
System.out.println(System.currentTimeMillis() - time);
System.out.println("Kron:"+(System.currentTimeMillis() - time));
// assert that the maximum is growing
assert (maximumScore <= iterationMatrix.getMaximumScore()) : "Maximum decreased within iteration!";
if (!betterRuleCanBeLearned(mappings)) {
if (!betterRuleCanBeLearned(mappings,instances)) {
mappings.remove(mappingForMaximum);
break;
}
......@@ -299,16 +300,16 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
maximumScore, (maxEntryLocation.getTp() / (maxEntryLocation.getFp() + maxEntryLocation.getTp())));
}
private boolean betterRuleCanBeLearned(List<MatrixMapping> mappings) {
private boolean betterRuleCanBeLearned(List<MatrixMapping> mappings,Set<Instance> instances) {
if (mappings.size() > maxExpandSize)
return false;
if (mappings.size() < 2)
return true;
return betterRuleCanBeLearned(mappings.get(mappings.size() - 2), mappings.get(mappings.size() - 1));
return betterRuleCanBeLearned(mappings.get(mappings.size() - 2), mappings.get(mappings.size() - 1),instances);
}
private boolean betterRuleCanBeLearned(MatrixMapping lastMapping, MatrixMapping newMapping) {
private boolean betterRuleCanBeLearned(MatrixMapping lastMapping, MatrixMapping newMapping, Set<Instance> instances) {
Set<Set<Point>> lastFeatures = new HashSet<Set<Point>>(lastMapping.getDenseIndexToFeaturesMapping().values());
Set<Set<Point>> newFeatures = new HashSet<Set<Point>>(newMapping.getDenseIndexToFeaturesMapping().values());
// if no new feature combination was added we can be sure to have
......@@ -321,12 +322,12 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// instances and compare on instances!
Set<Set<Instance>> lastMappingInstanceSets = new HashSet<>();
for (Set<Point> lastFeats : lastMapping.getDenseIndexToFeaturesMapping().values()) {
lastMappingInstanceSets.add(MatrixUtil.determineInstancesForFeatures(lastFeats, indexMap));
lastMappingInstanceSets.add(MatrixUtil.determineInstancesForFeatures(lastFeats,instances, indexMap));
}
// perform deep equals
for (Set<Point> newFeats : newMapping.getDenseIndexToFeaturesMapping().values()) {
if (!lastMappingInstanceSets.contains(MatrixUtil.determineInstancesForFeatures(newFeats, indexMap))) {
if (!lastMappingInstanceSets.contains(MatrixUtil.determineInstancesForFeatures(newFeats,instances, indexMap))) {
return true;
}
}
......@@ -397,4 +398,4 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
this.otherLabel = otherLabel;
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment