Commit ec53eadc authored by mak28ma's avatar mak28ma
Browse files

*added a way to read folds

*started the creation of the evaluation
parent 3845ae54
......@@ -2,6 +2,7 @@ package de.uniwue.ls6.algorithm.datastructure;
import java.awt.Point;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
......@@ -67,7 +68,7 @@ public class RepresentationRule {
* @param label the action of this rule
* @param maximumScore
*/
public RepresentationRule(int windowSize, List<Point> features, int label, int maximumScore) {
public RepresentationRule(int windowSize, Collection<Point> features, int label, int maximumScore) {
this.label = label;
conditionSet = new ArrayList<Set<Integer>>(windowSize);
this.maximumScore = maximumScore;
......@@ -110,7 +111,7 @@ public class RepresentationRule {
public String toString() {
StringBuilder sb = new StringBuilder("Score: " + maximumScore + " ");
for (Point p : asPointList()) {
for (Point p : asPointSet()) {
sb.append(p.x + "=" + LabelAlphabet.getFeatureToId(p.y)).append(" AND ");
}
return sb.toString().substring(0, sb.toString().length() - 5) + " => " + LabelAlphabet.getFeatureToId(label);
......@@ -119,17 +120,17 @@ public class RepresentationRule {
// TODO this could also be made faster
public boolean isApplicable(Instance ins) {
return ins.containsFeature(asPointList());
return ins.containsFeature(asPointSet());
}
private List<Point> asPointList() {
List<Point> pointList = new ArrayList<>();
private Set<Point> asPointSet() {
Set<Point> pointSet = new HashSet<>();
for (int i = 0; i < conditionSet.size(); i++) {
for (Integer feature : conditionSet.get(i)) {
pointList.add(new Point(i, feature));
pointSet.add(new Point(i, feature));
}
}
return pointList;
return pointSet;
}
}
package de.uniwue.ls6.datastructure;
import java.awt.Point;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import no.uib.cipr.matrix.Matrices;
import no.uib.cipr.matrix.MatrixEntry;
......@@ -104,7 +104,7 @@ public class Instance {
* instance
* @return whether this instance contains all features of features
*/
public boolean containsFeature(List<Point> features) {
public boolean containsFeature(Set<Point> features) {
outer: for (Point p : features) {
......@@ -142,7 +142,7 @@ public class Instance {
FlexCompColMatrix denseInstanceMatrix = new FlexCompColMatrix(denseDimension, denseDimension);
for (Point denseIndices : lastMapping.getInverseMappingMap().keySet()) {
List<Point> features = lastMapping.getFeaturesForDenseIndex(denseIndices);
Set<Point> features = lastMapping.getFeaturesForDenseIndex(denseIndices);
// check if this instance contains the features
if (containsFeature(features)) {
// if so then create a sprase matrix and put a 1 into the
......
......@@ -3,6 +3,7 @@ package de.uniwue.ls6.datastructure;
import java.awt.Point;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import de.uniwue.ls6.util.MatrixUtil;
......@@ -11,7 +12,7 @@ public class MatrixMapping {
// x is col and y is row
HashMap<Point, Point> mappingMap;
HashMap<Point, Point> inverseMappingMap;
HashMap<Point,List<Point>> denseIndexToFeaturesMapping;
HashMap<Point,Set<Point>> denseIndexToFeaturesMapping;
//
private int kroneckerDimension;
......@@ -80,7 +81,7 @@ public class MatrixMapping {
//also infer the features
for(Point p : inverseMappingMap.keySet()){
denseIndexToFeaturesMapping.put(p,MatrixUtil.determineFeaturesForIndex(p, mappings));
denseIndexToFeaturesMapping.put(p,MatrixUtil.determineFeaturesForIndex(p, mappings,false));
}
}
......@@ -98,7 +99,7 @@ public class MatrixMapping {
return kroneckerDimension;
}
public List<Point> getFeaturesForDenseIndex(Point densePoint){
public Set<Point> getFeaturesForDenseIndex(Point densePoint){
return denseIndexToFeaturesMapping.get(densePoint);
}
......
......@@ -3,6 +3,7 @@ package de.uniwue.ls6.datastructure;
import java.awt.Point;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import de.uniwue.ls6.util.MatrixUtil;
import no.uib.cipr.matrix.MatrixEntry;
......@@ -69,7 +70,7 @@ public class MatrixMcMatrixFace {
}
private void addToMatrix(int feature, int windowcolumn, FlexCompColMatrix matrix) {
//do not add the default label!!
// do not add the default label!!
if (feature == 0)
return;
matrix.add(feature, windowcolumn, 1);
......@@ -107,15 +108,28 @@ public class MatrixMcMatrixFace {
}
public Point getLocationOfMaximum() {
public Point getLocationOfMaximum(List<MatrixMapping> mappings) {
double maxScore = 0;
Point bestEntry = null;
Set<Point> mostSimpleRule = null;
for (MatrixEntry entry : tpMatrix) {
double scoreCurrent = entry.get() - fpMatrix.get(entry.row(), entry.column());
if (scoreCurrent > maxScore) {
maxScore = scoreCurrent;
bestEntry = new Point(entry.column(), entry.row());
mostSimpleRule = MatrixUtil.determineFeaturesForIndex(bestEntry, mappings,
mappings.size() > 0 ? true : false);
} else if (scoreCurrent == maxScore && scoreCurrent>0) {
// keep the simpler rule
Point loc = new Point(entry.column(), entry.row());
Set<Point> featuresForIndex = MatrixUtil.determineFeaturesForIndex(loc, mappings,
mappings.size() > 0 ? true : false);
if (mostSimpleRule == null) {
bestEntry = loc;
mostSimpleRule = featuresForIndex;
}
}
}
return bestEntry == null ? null : bestEntry;
......@@ -140,8 +154,8 @@ public class MatrixMcMatrixFace {
MatrixEntry next = iterator.next();
// recalculate the features for each entry
List<Point> determineFeaturesForIndex = MatrixUtil
.determineFeaturesForIndex(new Point(next.column(), next.row()), mappings);
Set<Point> determineFeaturesForIndex = MatrixUtil
.determineFeaturesForIndex(new Point(next.column(), next.row()), mappings, false);
String featureRepresentaion = MatrixUtil.convertPointListToFeatureString(determineFeaturesForIndex);
sb.append("Element at: " + "colnr " + next.column() + " and rownr " + next.row() + " Alphabet_ID:"
+ next.row() + " represents feature: " + featureRepresentaion).append("\n");
......
......@@ -6,8 +6,10 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Supplier;
......@@ -26,35 +28,50 @@ import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
public class MatrixUtil {
public static MatrixMapping getMappingForMaximum(MatrixMcMatrixFace matrixface, int maximum,
static final Logger logger = LoggerFactory.getLogger(MatrixUtil.class);
public static MatrixMapping getMappingForMaximum(MatrixMcMatrixFace matrixface, int maximum, Point maxEntryLocation,
List<MatrixMapping> mappings, int beamSize) {
final Logger logger = LoggerFactory.getLogger(MatrixUtil.class);
// totally unugly code not even necessary which makes it worse
int formerMatrixSize = matrixface.getTpMatrix().numRows() == matrixface.getTpMatrix().numColumns()
? matrixface.getTpMatrix().numColumns() : -1;
MatrixMapping matrixMapping = new MatrixMapping(formerMatrixSize);
// collect all entries that may remain
List<MatrixPoint> entries = new ArrayList<>();
Set<Set<Point>> uniqueFeatureCombinations = new HashSet<>();
for (MatrixEntry entry : matrixface.getTpMatrix()) {
// TODO do we need to add the false positives???
// double sum = entry.get() +
// matrixface.getFpMatrix().get(entry.row(), entry.column());
double sum = entry.get();
if (sum >= maximum) {
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
// save the score
// save all that may be kept
if (sum == maximum && matrixface.getFpMatrix().get(entry.row(), entry.column()) == 0) {
if (!(new Point(entry.column(), entry.row()).equals(maxEntryLocation))) {
//no potential to improve left!
continue;
}
}
// get the features of this point
Set<Point> featuresOfPoints = determineFeaturesForIndex(new Point(entry.column(), entry.row()),
mappings, mappings.size() > 0 ? true : false);
if (uniqueFeatureCombinations.contains(featuresOfPoints)) {
continue;
}
uniqueFeatureCombinations.add(featuresOfPoints);
double score = entry.get() - matrixface.getFpMatrix().get(entry.row(), entry.column());
entries.add(new MatrixPoint(entry.column(), entry.row(), score, entry.get(),
matrixface.getFpMatrix().get(entry.row(), entry.column())));
}
}
if (matrixMapping.getDenseMatrixDimension() > Math.sqrt(beamSize)) {
if (entries.size() > beamSize) {
logger.warn("Too many possible features! We restrict to the best " + beamSize
+ " Amount of potentially good features "
+ matrixMapping.getDenseMatrixDimension() * matrixMapping.getDenseMatrixDimension());
+ " Amount of potentially good features " + entries.size());
matrixMapping = new MatrixMapping(formerMatrixSize);
// sort
......@@ -67,17 +84,14 @@ public class MatrixUtil {
}
});
// add the top "beamsize" to the mapping
for (MatrixPoint entry : entries) {
if (matrixMapping.getMappingMap().size() > beamSize)
break;
if (entry.getScore() > 0) {
matrixMapping.addEntry(new Point(entry.getX(), entry.getY()));
}
}
// add the top "beamsize" to the mapping
for (MatrixPoint entry : entries) {
if (matrixMapping.getMappingMap().size() > beamSize)
break;
if (entry.getScore() > 0) {
matrixMapping.addEntry(new Point(entry.getX(), entry.getY()));
}
System.out.println("Mappingsize: " + matrixMapping.getMappingMap().size());
}
// infer the -> righthandside
......@@ -89,8 +103,8 @@ public class MatrixUtil {
// debug
// for (MatrixPoint entry : entries) {
// Point densePoints = matrixMapping.getMappingMap().get(new Point(entry.getX(), entry.getY()));
// List<Point> featuresForDenseIndex = matrixMapping.getFeaturesForDenseIndex(densePoints);
// Set<Point> featuresForDenseIndex = determineFeaturesForIndex(new Point(entry.getX(), entry.getY()),
// mappings, mappings.size() > 0 ? true : false);
// RepresentationRule representationRule = new RepresentationRule(6, featuresForDenseIndex, 1, 10);
// System.out.println(entry.getScore() + "\t" + maximum + "\tTP " + entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
......@@ -125,9 +139,16 @@ public class MatrixUtil {
.collect(Collector.of(matrixConstructor, accumulator, join, Collector.Characteristics.UNORDERED));
}
public static List<Point> determineFeaturesForIndex(Point index, List<MatrixMapping> mappings) {
public static Set<Point> determineFeaturesForIndex(Point index, List<MatrixMapping> mappings,
boolean revertKroneckerFirst) {
List<Point> reversedfeatures = new ArrayList<Point>(Arrays.asList(new Point[] { index }));
if (revertKroneckerFirst) {
revertKroneckerExpansion(mappings.get(mappings.size() - 1), reversedfeatures);
}
// revert the mapping and kronecker
for (int i = mappings.size() - 1; i >= 0; i--) {
MatrixMapping currentMapping = mappings.get(i);
......@@ -139,7 +160,7 @@ public class MatrixUtil {
}
return reversedfeatures;
return new HashSet<Point>(reversedfeatures);
}
public static void revertKroneckerExpansion(MatrixMapping previousMapping, List<Point> reversedfeatures) {
......@@ -183,7 +204,7 @@ public class MatrixUtil {
}
public static String convertPointListToFeatureString(List<Point> determineFeaturesForIndex) {
public static String convertPointListToFeatureString(Set<Point> determineFeaturesForIndex) {
StringBuilder sb = new StringBuilder();
for (Point p : determineFeaturesForIndex) {
......
package de.uniwue.ls6.datastructure.test;
import static org.mockito.Mockito.*;
import static org.junit.Assert.*;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.awt.Point;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.mockito.Mockito;
import de.uniwue.ls6.datastructure.Instance;
......@@ -114,10 +114,10 @@ public class InstanceTest {
@Test
public void testContainsFeature() {
List<Point> pointlist;
Set<Point> pointlist;
Point p;
p = new Point(1, 1337);
pointlist = new ArrayList<Point>();
pointlist = new HashSet<Point>();
pointlist.add(p);
int rows = 10;
......@@ -129,7 +129,7 @@ public class InstanceTest {
assertTrue(tInstance.containsFeature(pointlist));
p = new Point(3, 42);
pointlist = new ArrayList<Point>();
pointlist = new HashSet<Point>();
pointlist.add(p);
// a point not added shouldn't be contained
......@@ -149,7 +149,7 @@ public class InstanceTest {
for (int col = 0; col < cols; col++) {
p = new Point(col, 1337);
pointlist = new ArrayList<Point>();
pointlist = new HashSet<Point>();
pointlist.add(p);
if (col != 1) {
assertFalse(tInstance.containsFeature(pointlist));
......
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
......@@ -9,4 +9,11 @@
<artifactId>ruleLearningParent</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>de.uniwue.ls6.rulelearning</groupId>
<artifactId>DataStructure</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package de.uniwue.ls6.rulelearning.evaluation.eval;
public class Evaluation {
private double amountTp;
private double amountFp;
private double amountFn;
public Evaluation(double amountTp, double amountFp, double amountFn) {
super();
this.amountTp = amountTp;
this.amountFp = amountFp;
this.amountFn = amountFn;
}
public double getAmountTp() {
return amountTp;
}
public void setAmountTp(double amountTp) {
this.amountTp = amountTp;
}
public double getAmountFp() {
return amountFp;
}
public void setAmountFp(double amountFp) {
this.amountFp = amountFp;
}
public double getAmountFn() {
return amountFn;
}
public void setAmountFn(double amountFn) {
this.amountFn = amountFn;
}
}
package de.uniwue.ls6.rulelearning.evaluation.eval;
import de.uniwue.ls6.datastructure.ALabelling;
public interface IEvaluation {
public String evaluateToString(ALabelling[] goldLabels,ALabelling[] systemLabels);
public Evaluation evaluate(ALabelling[] goldLabels,ALabelling[] systemLabels);
}
package de.uniwue.ls6.rulelearning.evaluation.eval;
import de.uniwue.ls6.datastructure.ALabelling;
public class LabelAccuracyEvaluation implements IEvaluation {
public String evaluateToString(ALabelling[] goldLabels, ALabelling[] systemLabels) {
// TODO Auto-generated method stub
return null;
}
public Evaluation evaluate(ALabelling[] goldLabels, ALabelling[] systemLabels) {
// TODO Auto-generated method stub
return null;
}
}
package de.uniwue.ls6.rulelearning.evaluation.fold;
import java.util.List;
import de.uniwue.ls6.datastructure.Instance;
public abstract class AFold {
protected List<Instance> trainingset;
protected List<Instance> testSet;
public AFold(List<Instance> traininSet,List<Instance> testSet){
this.trainingset = traininSet;
this.testSet = testSet;
}
public List<Instance> getTrainingset() {
return trainingset;
}
public void setTrainingset(List<Instance> trainingset) {
this.trainingset = trainingset;
}
public List<Instance> getTestSet() {
return testSet;
}
public void setTestSet(List<Instance> testSet) {
this.testSet = testSet;
}
}
package de.uniwue.ls6.rulelearning.evaluation.fold;
import java.awt.Point;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import de.uniwue.ls6.datastructure.Instance;
public class FoldUtil {
public static List<UnstructuredFold> readInstancesToFold(List<Instance> instances, Random random, int amountFolds) {
if (amountFolds <= 1)
throw new IllegalArgumentException("Needs at least 2 folds!");
List<UnstructuredFold> folds = new ArrayList<UnstructuredFold>(amountFolds);
if (random != null) {
Collections.shuffle(instances, random);
}
// create the folds
int foldSize = instances.size() / amountFolds;
for (int i = 0; i < amountFolds; i++) {
List<Instance> trainingSet = new ArrayList<Instance>();
List<Instance> testSet = new ArrayList<Instance>();
Point foldInterval = new Point(i * foldSize, i * foldSize + foldSize);
for (int k = 0; k < instances.size(); k++) {
if (k >= foldInterval.x && k < foldInterval.y) {
testSet.add(instances.get(k));
} else {
trainingSet.add(instances.get(k));
}
}
folds.add(new UnstructuredFold(trainingSet, testSet));
}
return folds;
}
}
package de.uniwue.ls6.rulelearning.evaluation.fold;
import java.util.List;