Commit 7376acfe authored by mak28ma's avatar mak28ma
Browse files

added evaluation

parent ec53eadc
......@@ -35,15 +35,19 @@ public class RepresentationRule {
*/
private Integer label;
private int maximumScore;
/**
*
* @param windowSize the amount of columns of features of an instance
* @param instanceArray the incoming features, stored in an array[][]
* @param label the action of this rule
* @param maximumScore
*/
private double precision;
/**
*
* @param windowSize
* the amount of columns of features of an instance
* @param instanceArray
* the incoming features, stored in an array[][]
* @param label
* the action of this rule
* @param maximumScore
*/
public RepresentationRule(int windowSize, int[][] instanceArray, int label, int maximumScore) {
conditionSet = new ArrayList<Set<Integer>>(windowSize);
......@@ -61,17 +65,23 @@ public class RepresentationRule {
}
}
/**
*
* @param windowSize the amount of columns of features of an instance
* @param features the features stored as points, Point.x is the columns and Point.y the feature
* @param label the action of this rule
* @param maximumScore
*/
public RepresentationRule(int windowSize, Collection<Point> features, int label, int maximumScore) {
/**
*
* @param windowSize
* the amount of columns of features of an instance
* @param features
* the features stored as points, Point.x is the columns and
* Point.y the feature
* @param label
* the action of this rule
* @param maximumScore
*/
public RepresentationRule(int windowSize, Collection<Point> features, int label, int maximumScore,double precision) {
this.label = label;
conditionSet = new ArrayList<Set<Integer>>(windowSize);
this.maximumScore = maximumScore;
this.precision = precision;
// get max x
int maxX = 0;
......@@ -104,9 +114,10 @@ public class RepresentationRule {
return toString();
}
/**
* @return gives the String representaion of a rule, saved as Feature1 AND Feature2 => Label
*/
/**
* @return gives the String representaion of a rule, saved as Feature1 AND
* Feature2 => Label
*/
@Override
public String toString() {
......@@ -133,4 +144,32 @@ public class RepresentationRule {
return pointSet;
}
/**
*
* @return double the Math.log of the uniqueness of this rule
*/
public double getUniquenessScore() {
double uniqueness = 0;
Set<Integer> fullSet = asIntSet();
for (Integer i : fullSet) {
uniqueness += Math.log(LabelAlphabet.getUniquenessForFeature(i));
}
return uniqueness;
}
private Set<Integer> asIntSet() {
Set<Integer> integerSet = new HashSet<>();
for (Set<Integer> intSet : conditionSet) {
integerSet.addAll(intSet);
}
return integerSet;
}
public double getPrecision() {
return precision;
}
}
......@@ -88,6 +88,14 @@ public class RulePass {
}
return false;
}
public RepresentationRule apply(Instance ins){
for (RepresentationRule rule : ruleSet) {
if (rule.isApplicable(ins))
return rule;
}
return null;
}
public int getLabel() {
return label;
......
......@@ -72,4 +72,26 @@ public class LabelAlphabet {
return sb.toString();
}
/**
* The uniqueness is the amount of features in this labelalphabet that start
* with the same id
*
* @param i
* the feature id of which the uniqueness is queried
* @return double with the uniqueness
*/
public static double getUniquenessForFeature(Integer i) {
String feature = getFeatureToId(i);
String featureId = feature.split("=")[0];
double uniqueness = 0;
for (String feat : featureToIdMap.keySet()) {
if (feat.split("=")[0].equals(featureId)) {
uniqueness++;
}
}
return uniqueness;
}
}
......@@ -5,6 +5,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Set;
import de.uniwue.ls6.util.MatrixPoint;
import de.uniwue.ls6.util.MatrixUtil;
import no.uib.cipr.matrix.MatrixEntry;
import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
......@@ -108,23 +109,24 @@ public class MatrixMcMatrixFace {
}
public Point getLocationOfMaximum(List<MatrixMapping> mappings) {
public MatrixPoint getLocationOfMaximum(List<MatrixMapping> mappings) {
double maxScore = 0;
Point bestEntry = null;
MatrixPoint bestEntry = null;
Set<Point> mostSimpleRule = null;
for (MatrixEntry entry : tpMatrix) {
double scoreCurrent = entry.get() - fpMatrix.get(entry.row(), entry.column());
double fps = fpMatrix.get(entry.row(), entry.column());
double scoreCurrent = entry.get() - fps;
if (scoreCurrent > maxScore) {
maxScore = scoreCurrent;
bestEntry = new Point(entry.column(), entry.row());
mostSimpleRule = MatrixUtil.determineFeaturesForIndex(bestEntry, mappings,
mappings.size() > 0 ? true : false);
} else if (scoreCurrent == maxScore && scoreCurrent>0) {
bestEntry = new MatrixPoint(entry.column(), entry.row(), scoreCurrent, entry.get(), fps);
mostSimpleRule = MatrixUtil.determineFeaturesForIndex(bestEntry.getLocation(),
mappings, mappings.size() > 0 ? true : false);
} else if (scoreCurrent == maxScore && scoreCurrent > 0) {
// keep the simpler rule
Point loc = new Point(entry.column(), entry.row());
Set<Point> featuresForIndex = MatrixUtil.determineFeaturesForIndex(loc, mappings,
mappings.size() > 0 ? true : false);
MatrixPoint loc = new MatrixPoint(entry.column(), entry.row(), scoreCurrent, entry.get(), fps);
Set<Point> featuresForIndex = MatrixUtil.determineFeaturesForIndex(loc.getLocation(),
mappings, mappings.size() > 0 ? true : false);
if (mostSimpleRule == null) {
bestEntry = loc;
mostSimpleRule = featuresForIndex;
......
package de.uniwue.ls6.util;
import java.awt.Point;
public class MatrixPoint {
private int x;
......@@ -56,6 +58,10 @@ public class MatrixPoint {
public void setTp(double tp) {
this.tp = tp;
}
public Point getLocation() {
return new Point(x,y);
}
......
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
......
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
org.eclipse.jdt.core.compiler.source=1.7
package de.uniwue.ls6.rulelearning.evaluation.eval;
import java.util.HashMap;
import java.util.Map;
import de.uniwue.ls6.datastructure.LabelAlphabet;
public class Evaluation {
private double amountTp;
private double amountFp;
private double amountFn;
public Evaluation(double amountTp, double amountFp, double amountFn) {
private int goldLabel;
private Map<Integer, Integer> confusionMap;
public Evaluation(double amountTp, double amountFp, double amountFn, int label) {
super();
this.amountTp = amountTp;
this.amountFp = amountFp;
this.amountFn = amountFn;
this.goldLabel = label;
confusionMap = new HashMap<>();
}
public double getAmountTp() {
return amountTp;
}
public void setAmountTp(double amountTp) {
this.amountTp = amountTp;
}
public double getAmountFp() {
return amountFp;
}
public void setAmountFp(double amountFp) {
this.amountFp = amountFp;
}
public double getAmountFn() {
return amountFn;
}
public void setAmountFn(double amountFn) {
this.amountFn = amountFn;
}
public void addTp(){
this.amountTp++;
}
public void addFp(){
this.amountFp++;
}
public void addFn(int labelInsteast){
this.amountFn++;
// also update confusion map
if (confusionMap.containsKey(labelInsteast)) {
Integer integer = confusionMap.get(labelInsteast);
integer++;
confusionMap.put(labelInsteast, integer);
} else {
confusionMap.put(labelInsteast, 1);
}
}
public int getGoldLabel() {
return goldLabel;
}
public void setGoldLabel(int goldLabel) {
this.goldLabel = goldLabel;
}
public Map<Integer, Integer> getConfusionMap() {
return confusionMap;
}
public void setConfusionMap(Map<Integer, Integer> confusionMap) {
this.confusionMap = confusionMap;
}
public String getEvaluation() {
StringBuilder sb = new StringBuilder();
sb.append("Evaluation for Label: " + LabelAlphabet.getFeatureToId(goldLabel)).append("\n");
double recall = amountTp / (amountFn + amountTp);
double precision = amountTp / (amountFp + amountTp);
double f1 = 2 * recall * precision / (precision + recall);
sb.append("Amount TP:\t" + amountTp).append("\n");
sb.append("Amount FP:\t" + amountFp).append("\n");
sb.append("Amount FN:\t" + amountFn).append("\n");
sb.append("Precision:\t" + precision).append("\n");
sb.append("Recall: \t" + recall).append("\n");
sb.append("F1: \t" + f1).append("\n");
sb.append("Confusion table\n");
sb.append("Instead of Label " + LabelAlphabet.getFeatureToId(goldLabel) + ": |").append("\t");
for(Integer confusedLabel : confusionMap.keySet()){
sb.append(LabelAlphabet.getFeatureToId(confusedLabel)+":" + confusionMap.get(confusedLabel)+"|\t");
}
sb.append("\n");
return sb.toString();
}
}
package de.uniwue.ls6.rulelearning.evaluation.eval;
import java.util.HashMap;
import java.util.Map;
import de.uniwue.ls6.datastructure.ALabelling;
public class LabelAccuracyEvaluation implements IEvaluation {
public String evaluateToString(ALabelling[] goldLabels, ALabelling[] systemLabels) {
// TODO Auto-generated method stub
return null;
if (goldLabels.length != systemLabels.length)
throw new IllegalArgumentException("Sizes need to be the same!");
Map<Integer, Evaluation> evalMap = new HashMap<>();
for (int i = 0; i < goldLabels.length; i++) {
int goldLabel = goldLabels[i].getLabel();
int systemLabel = systemLabels[i].getLabel();
addLabelling(evalMap, goldLabel, systemLabel);
}
StringBuilder sb = new StringBuilder();
// create total label accuracy
double totalTp = 0;
for (Evaluation eval : evalMap.values()) {
totalTp += eval.getAmountTp();
}
sb.append("Labelaccuracy: " + totalTp / goldLabels.length).append("\n");
// create label accuracy per label
for (Evaluation e : evalMap.values()) {
String accuracyPerLabel = e.getEvaluation();
sb.append(accuracyPerLabel).append("\n");
}
sb.append("Labelaccuracy: " + totalTp / goldLabels.length).append("\n");
return sb.toString();
}
private void addLabelling(Map<Integer, Evaluation> evalMap, int goldLabel, int systemLabel) {
Evaluation evalGold = getEvaluationForLabel(evalMap, goldLabel);
Evaluation evalSystem = getEvaluationForLabel(evalMap, systemLabel);
if(goldLabel==systemLabel){
evalGold.addTp();
}
else{
//this means we add a fp for system and a fn for gold
evalGold.addFn(systemLabel);
evalSystem.addFp();
}
// add back to map
evalMap.put(goldLabel, evalGold);
}
private Evaluation getEvaluationForLabel(Map<Integer, Evaluation> evalMap, int goldLabel) {
Evaluation evalGold = null;
if (evalMap.containsKey(goldLabel)) {
evalGold = evalMap.get(goldLabel);
} else {
evalGold = new Evaluation(0, 0, 0, goldLabel);
}
return evalGold;
}
public Evaluation evaluate(ALabelling[] goldLabels, ALabelling[] systemLabels) {
......
......@@ -8,15 +8,16 @@ public class NNFeatureGenerator extends AFeatureGenerator{
private String posTagFeature;
public NNFeatureGenerator( String posTagFeature) {
super("IsNN");
super("IsPOSTag");
this.posTagFeature = posTagFeature;
}
@Override
public String[] generateFeatures(AnnotationFS token) {
String label = "NE";
String featureValueAsString = token.getFeatureValueAsString(token.getType().getFeatureByBaseName(posTagFeature));
return new String[]{featureValueAsString.startsWith("NE")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
return new String[]{featureValueAsString.startsWith(label)?super.featureIdentifier+"="+label:super.featureIdentifier+"=O"};
}
}
......@@ -21,6 +21,7 @@ import de.uniwue.ls6.datastructure.MatrixMapping;
import de.uniwue.ls6.datastructure.MatrixMcMatrixFace;
import de.uniwue.ls6.datastructure.SimpleLabelling;
import de.uniwue.ls6.rulelearning.algorithm.IRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.util.MatrixPoint;
import de.uniwue.ls6.util.MatrixUtil;
public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentationRuleLearningAlgorithm {
......@@ -189,23 +190,25 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// initial values
int maximumScore = 0;
Point maxEntryLocation = iterationMatrix.getLocationOfMaximum(mappings);
MatrixPoint maxEntryLocation = iterationMatrix.getLocationOfMaximum(mappings);
lookahead = 0;
while (true) {
maximumScore = iterationMatrix.getMaximumScore();
maxEntryLocation = iterationMatrix.getLocationOfMaximum(mappings);
if (maxEntryLocation == null)
break;
// map matrix to dense matrix
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(iterationMatrix, maximumScore,
maxEntryLocation, mappings, beamSize);
maxEntryLocation.getLocation(), mappings, beamSize);
mappings.add(mappingForMaximum);
// assert that the maximum is within bounds
assert (maxEntryLocation == null ? true
: maxEntryLocation.x < iterationMatrix.getTpMatrix().numColumns()
&& maxEntryLocation.y < iterationMatrix.getTpMatrix()
.numRows()) : "Position of maximum out of bounds";
: maxEntryLocation.getX() < iterationMatrix.getTpMatrix().numColumns() && maxEntryLocation
.getY() < iterationMatrix.getTpMatrix().numRows()) : "Position of maximum out of bounds";
// expand in kronecker fashion
long time = System.currentTimeMillis();
......@@ -228,15 +231,15 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// rcalculate the rule
List<Point> featuresAtMax = new ArrayList<Point>();
featuresAtMax.addAll(
MatrixUtil.determineFeaturesForIndex(maxEntryLocation, mappings, mappings.size() > 0 ? true : false));
//no default rules, TODO why does this happen??
featuresAtMax.addAll(MatrixUtil.determineFeaturesForIndex(maxEntryLocation.getLocation(), mappings,
mappings.size() > 0 ? true : false));
// no default rules, TODO why does this happen??
for (Point p : featuresAtMax)
if (p.y == 0)
return null;
return new RepresentationRule(iterationMatrix.getTpMatrix().numColumns(), featuresAtMax, goldLabel,
maximumScore);
maximumScore, (maxEntryLocation.getTp() / (maxEntryLocation.getFp() + maxEntryLocation.getTp())));
}
// TODO isnt this too greedy?
......@@ -264,13 +267,18 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
public ALabelling apply(Instance instanceToClassify) {
int predictedLabel = otherLabel;
double score = 0;
for (RulePass pass : passes) {
if (pass.isApplicable(instanceToClassify)) {
RepresentationRule rule = pass.apply(instanceToClassify);
if (rule != null) {
predictedLabel = pass.getLabel();
score+=rule.getUniquenessScore();
//score = rule.getPrecision();
} else {
break;
}
}
// TODO add a score
return new SimpleLabelling(predictedLabel, 0);
return new SimpleLabelling(predictedLabel, score);
}
public int getGoldLabel() {
......
......@@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory;
import de.uniwue.ls6.datastructure.ALabelling;
import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.SimpleLabelling;
import de.uniwue.ls6.rulelearning.algorithm.IRepresentationRuleLearningAlgorithm;
public class MultiClassRepresentationRuleAlgorithm implements IRepresentationRuleLearningAlgorithm {
......@@ -49,7 +50,7 @@ public class MultiClassRepresentationRuleAlgorithm implements IRepresentationRul
for (BinaryRepresentationRuleLearningAlgorithm binaryClassifier : binaryClassifiers) {
// adapt training instances
adaptTrainingInstancesToBinaryClassifier(instances,binaryClassifier.getGoldLabel());
adaptTrainingInstancesToBinaryClassifier(instances, binaryClassifier.getGoldLabel());
// train
binaryClassifier.learn(instances);
......@@ -61,21 +62,21 @@ public class MultiClassRepresentationRuleAlgorithm implements IRepresentationRul
}
private void revertTrainingLabelOfInstances(Instance[] instances) {
for(Instance i : instances){
for (Instance i : instances) {
i.setLabel(instanceToLabelMapping.get(i));
}