Commit 1ddd5102 authored by mak28ma's avatar mak28ma
Browse files

I think it is working now! Sometimes eats all my ram, investigate with

Daniel!
parent 60a19d81
......@@ -75,9 +75,9 @@ public class RepresentationRule {
StringBuilder sb = new StringBuilder("Score: " + maximumScore + " ");
for (Point p : asPointList()) {
sb.append(p.x + "=" + LabelAlphabet.getFeatureToId(p.y)).append(" AND");
sb.append(p.x + "=" + LabelAlphabet.getFeatureToId(p.y)).append(" AND ");
}
return sb.toString().substring(0, sb.toString().length() - 4) + " => " + LabelAlphabet.getFeatureToId(label);
return sb.toString().substring(0, sb.toString().length() - 5) + " => " + LabelAlphabet.getFeatureToId(label);
}
// TODO this could also be made faster
......
......@@ -5,6 +5,7 @@ import java.util.Arrays;
import java.util.List;
import de.uniwue.ls6.util.MatrixUtil;
import no.uib.cipr.matrix.Matrices;
import no.uib.cipr.matrix.MatrixEntry;
import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
......@@ -17,16 +18,19 @@ public class Instance {
// featureset
private int[][] featureArray;
private int label;
private int id;
public Instance(int nrCols, int nrRows) {
public Instance(int nrCols, int nrRows,int id) {
super();
featureArray = new int[nrCols][nrRows];
this.id = id;
}
public Instance(int nrCols, int nrRows, int label) {
public Instance(int nrCols, int nrRows, int label,int id) {
super();
featureArray = new int[nrCols][nrRows];
this.label = label;
this.id = id;
}
public int getLabel() {
......@@ -75,7 +79,6 @@ public class Instance {
FlexCompColMatrix denseInstanceMatrix = new FlexCompColMatrix(denseDimension, denseDimension);
for (Point denseIndices : lastMapping.getInverseMappingMap().keySet()) {
List<Point> features = MatrixUtil.determineFeaturesForIndex(denseIndices, mappings);
// check if this instance contains the features
if (containsFeature(features)) {
......@@ -87,9 +90,7 @@ public class Instance {
// perform the kronecker expansion from the non sparse dense elements
int kroneckerDimension = denseDimension * denseDimension;
FlexCompColMatrix expanedMatrix = new FlexCompColMatrix(kroneckerDimension, kroneckerDimension);
for (MatrixEntry e1 : denseInstanceMatrix) {
// iterate over all no sparse elements and expand
for (MatrixEntry e2 : denseInstanceMatrix) {
......@@ -108,17 +109,22 @@ public class Instance {
}
}
// expand the matrix clever (use the index of denseIndices and
// expand with non sparse elemtns)
// assert that the entries in the expanded matrices are of size x(x-1)
assert (Matrices.cardinality(denseInstanceMatrix) * (Matrices.cardinality(denseInstanceMatrix) - 1) == Matrices
.cardinality(
expanedMatrix)) : "Amount of non null entries of kronecker matrix after instance expansion not correct";
return expanedMatrix;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.deepHashCode(featureArray);
result = prime * result + label;
result = prime * result + id;
return result;
}
......@@ -131,9 +137,7 @@ public class Instance {
if (getClass() != obj.getClass())
return false;
Instance other = (Instance) obj;
if (!Arrays.deepEquals(featureArray, other.featureArray))
return false;
if (label != other.label)
if (id != other.id)
return false;
return true;
}
......
......@@ -6,7 +6,7 @@ import java.util.List;
public class MatrixMapping {
//x is col and y is row
// x is col and y is row
HashMap<Point, Point> mappingMap;
HashMap<Point, Point> inverseMappingMap;
//
......@@ -20,7 +20,7 @@ public class MatrixMapping {
* a point is an index
*/
public MatrixMapping(HashMap<Point, Point> mappingMap, HashMap<Point, Point> inverseMap,int kroneckerDimension) {
public MatrixMapping(HashMap<Point, Point> mappingMap, HashMap<Point, Point> inverseMap, int kroneckerDimension) {
super();
this.mappingMap = mappingMap;
this.inverseMappingMap = inverseMap;
......@@ -37,7 +37,6 @@ public class MatrixMapping {
public HashMap<Point, Point> getMappingMap() {
return mappingMap;
}
public HashMap<Point, Point> getInverseMappingMap() {
return inverseMappingMap;
......@@ -84,24 +83,23 @@ public class MatrixMapping {
return inverseMappingMap.get(feature);
}
//should return 0 but 42 is also alright
// should return 0 but 42 is also alright
public int getKroneckerMatrixDimension() {
return kroneckerDimension;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Matrixmapping: From point(x,y) to point(x,y)\n");
for(Point p : mappingMap.keySet()){
sb.append("("+p.x +","+p.y+")"+ "\t->" + "("+mappingMap.get(p).x +","+mappingMap.get(p).y+")").append("\n");
sb.append("Matrixmapping: From point(x,y) to point(x,y) KroneckerDimension" + kroneckerDimension
+ " DenseDImension " + getDenseMatrixDimension() + "\n");
for (Point p : mappingMap.keySet()) {
sb.append(
"(" + p.x + "," + p.y + ")" + "\t->" + "(" + mappingMap.get(p).x + "," + mappingMap.get(p).y + ")")
.append("\n");
}
return sb.toString();
}
}
......@@ -55,7 +55,6 @@ public class MatrixUtil {
// expand
FlexCompColMatrix expandedInstance = inst.expand(mappings);
// add to kronecker
if (inst.getLabel() == label) {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
......@@ -76,21 +75,20 @@ public class MatrixUtil {
// revertmapping
revertMapping(currentMapping, reversedfeatures);
if (i != 0) {
revertKroneckerExpansion(mappings.get(i-1), reversedfeatures);
revertKroneckerExpansion(mappings.get(i - 1), reversedfeatures);
}
}
return reversedfeatures;
}
private static void revertKroneckerExpansion(MatrixMapping previousMapping, List<Point> reversedfeatures) {
public static void revertKroneckerExpansion(MatrixMapping previousMapping, List<Point> reversedfeatures) {
List<Point> toRemove = new ArrayList<Point>();
List<Point> toAdd = new ArrayList<Point>();
for (Point p : reversedfeatures) {
// revert the kronecker expansion step this generates 2 points
// TODO is this correct?
int sizeBeforeExpansion = (int) previousMapping.getDenseMatrixDimension();
int xBefore1 = (int) Math.floor(p.x / sizeBeforeExpansion);
......@@ -105,32 +103,48 @@ public class MatrixUtil {
toRemove.add(p);
toAdd.add(secondBack);
toAdd.add(firstBack);
//assert that the recalculated points are contained in the mapping
// assert that the recalculated points are contained in the mapping,
// this also guarantes the dimension is respected
assert (previousMapping.getInverseMappingMap().containsKey(firstBack) && previousMapping
.getInverseMappingMap().containsKey(secondBack)) : "Reverted Points not part of the mapping";
// assure no point is null
assert (secondBack != null && firstBack != null) : "One of the reverted points is null";
}
reversedfeatures.removeAll(toRemove);
reversedfeatures.addAll(toAdd);
// assert the lists contain what they should
assert (toAdd.size() == toRemove.size() * 2
&& reversedfeatures.containsAll(toAdd)) : "Kronecker reversion failed";
}
private static void revertMapping(MatrixMapping currentMapping, List<Point> reversedfeatures) {
List<Point> toRemove = new ArrayList<Point>();
List<Point> toAdd = new ArrayList<Point>();
assert (!reversedfeatures.contains(null)) : "Tried to backwards map a null value";
for (Point p : reversedfeatures) {
// revert the mapping step this is easy !
Point backwardsMappedFeature = currentMapping.getBackwardsMappedFeature(p);
toRemove.add(p);
toAdd.add(backwardsMappedFeature);
// assert that the point exists
assert (backwardsMappedFeature != null) : "Backwardsmapping resulted in null entry! \n"
+ currentMapping.toString() + "At point: " + p;
}
reversedfeatures.removeAll(toRemove);
reversedfeatures.addAll(toAdd);
// assure everythign went alright
assert (toRemove.size() == toAdd.size() && toAdd.size() == reversedfeatures.size()
&& reversedfeatures.containsAll(reversedfeatures)) : "Dimension of backwards mapped features is wrong";
}
public static String convertPointListToFeatureString(List<Point> determineFeaturesForIndex) {
......@@ -145,14 +159,13 @@ public class MatrixUtil {
public static String prettyMatrixFormat(FlexCompColMatrix matrix) {
StringBuilder sb = new StringBuilder();
for (int col = 0; col < matrix.numRows(); col++) {
for (int row = 0; row < matrix.numColumns(); row++) {
double d = matrix.get(col, row);
if(d==0.0d){
for (int row = 0; row < matrix.numRows(); row++) {
for (int col = 0; col < matrix.numColumns(); col++) {
double d = matrix.get(row, col);
if (d == 0.0d) {
sb.append("<EMPTY>");
}
else{
sb.append(" " + d+" ");
} else {
sb.append(" " + d + " ");
}
}
sb.append("\n");
......
......@@ -56,7 +56,7 @@ public class InstanceTest {
int cols = 5;
// try and set single features
Instance tInstance = new Instance(cols, rows);
Instance tInstance = new Instance(cols, rows,0);
tInstance.setFeatureAt(0, 0, 1);
assertEquals(1, tInstance.getFeatureArray()[0][0]);
assertEquals(0, tInstance.getFeatureArray()[0][1]);
......@@ -73,7 +73,7 @@ public class InstanceTest {
public void testInstanceSetAllFeatures() {
int rows = 10;
int cols = 5;
Instance tInstance = new Instance(cols, rows);
Instance tInstance = new Instance(cols, rows,0);
int[][] test = new int[cols][rows];
// check for whole array feature setting
......@@ -94,7 +94,7 @@ public class InstanceTest {
public void testBoundary1() {
int rows = 10;
int cols = 5;
Instance tInstance = new Instance(cols, rows);
Instance tInstance = new Instance(cols, rows,0);
// setting features at a col which should not exist
exception.expect(ArrayIndexOutOfBoundsException.class);
......@@ -105,7 +105,7 @@ public class InstanceTest {
public void testBoundary2() {
int rows = 10;
int cols = 5;
Instance tInstance = new Instance(cols, rows);
Instance tInstance = new Instance(cols, rows,0);
// setting features at a row which should not exist
exception.expect(ArrayIndexOutOfBoundsException.class);
......@@ -122,7 +122,7 @@ public class InstanceTest {
int rows = 10;
int cols = 5;
Instance tInstance = new Instance(cols, rows);
Instance tInstance = new Instance(cols, rows,0);
// add a point and check if found
tInstance.setFeatureAt(1, 3, 1337);
......
......@@ -3,7 +3,13 @@ package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public abstract class AFeatureGenerator {
protected String featureIdentifier;
public AFeatureGenerator(String featureIdentidier) {
this.featureIdentifier = featureIdentidier;
}
public abstract String[] generateFeatures(AnnotationFS token);
}
......@@ -4,13 +4,18 @@ import org.apache.uima.cas.text.AnnotationFS;
public class IsUppercaseFeatureGenerator extends AFeatureGenerator {
public IsUppercaseFeatureGenerator() {
super("IsUC");
}
public static final String LOWERCASE = "Lowercase";
public static final String UPPERCASE = "Uppercase";
@Override
public String[] generateFeatures(AnnotationFS token) {
return new String[]{Character.isUpperCase(token.getCoveredText().charAt(0))?UPPERCASE:LOWERCASE};
return new String[] { Character.isUpperCase(token.getCoveredText().charAt(0))
? super.featureIdentifier + "=" + UPPERCASE : super.featureIdentifier + "=" + LOWERCASE };
}
}
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public class NNFeatureGenerator extends AFeatureGenerator{
private String posTagFeature;
public NNFeatureGenerator( String posTagFeature) {
super("IsNN");
this.posTagFeature = posTagFeature;
}
@Override
public String[] generateFeatures(AnnotationFS token) {
String featureValueAsString = token.getFeatureValueAsString(token.getType().getFeatureByBaseName(posTagFeature));
return new String[]{featureValueAsString.startsWith("N")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
}
}
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public class SuffixNGenerator extends AFeatureGenerator {
int suffixLen;
public SuffixNGenerator(int suffixLen) {
super("Suffix"+suffixLen);
this.suffixLen = suffixLen;
}
@Override
public String[] generateFeatures(AnnotationFS token) {
String text = token.getCoveredText();
int len = text.length();
return new String[] { super.featureIdentifier+"="+text.substring(Math.max(0, len-suffixLen), text.length())};
}
}
......@@ -4,9 +4,13 @@ import org.apache.uima.cas.text.AnnotationFS;
public class WordFeaturegenerator extends AFeatureGenerator{
public WordFeaturegenerator() {
super("Text");
}
@Override
public String[] generateFeatures(AnnotationFS token) {
return new String[]{token.getCoveredText()};
return new String[]{super.featureIdentifier+"="+token.getCoveredText()};
}
}
......@@ -52,7 +52,7 @@ public class InstanceCreationFactory {
// update the queue
if (windowQueue.size() >= windowSize) {
Instance inst = generateInstanceFromQueue(windowQueue, windowSize, labelList.get(tokenIndex));
Instance inst = generateInstanceFromQueue(windowQueue, windowSize, labelList.get(tokenIndex),tokenIndex);
instances.add(inst);
windowQueue.poll();
tokenIndex++;
......@@ -63,7 +63,7 @@ public class InstanceCreationFactory {
for (int i = 0; i < rightWindowSize; i++) {
windowQueue.poll();
windowQueue.add(new LinkedList<String>());
Instance inst = generateInstanceFromQueue(windowQueue, windowSize, labelList.get(tokenIndex));
Instance inst = generateInstanceFromQueue(windowQueue, windowSize, labelList.get(tokenIndex),tokenIndex);
instances.add(inst);
tokenIndex++;
}
......@@ -82,7 +82,7 @@ public class InstanceCreationFactory {
}
private static Instance generateInstanceFromQueue(Queue<List<String>> windowQueue, int windowSize,
String goldfeature) {
String goldfeature, int tokenIndex) {
int maxNrRows = 0;
for (List<String> features : windowQueue) {
if (features.size() > maxNrRows)
......@@ -90,7 +90,7 @@ public class InstanceCreationFactory {
}
int idToFeature = LabelAlphabet.getIdToFeature(goldfeature);
Instance instance = new Instance(windowSize, maxNrRows, idToFeature);
Instance instance = new Instance(windowSize, maxNrRows, idToFeature,tokenIndex);
//fill the data
int colNr =0;
for(List<String> tokenFeatures : windowQueue){
......
......@@ -42,6 +42,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// TODO include a logger
System.out.println("Train binary classifier: " + "Goldlabel: " + LabelAlphabet.getFeatureToId(goldLabel)
+ "\tvs\t" + LabelAlphabet.getFeatureToId(otherLabel));
System.out.println("Amount distinct features: " + LabelAlphabet.getSize());
while (morePasses(instancesForPass, currentGoldIndex)) {
// update the learning objective
if (passIndex % 2 == 0) {
......@@ -122,20 +123,29 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// add all instances to the initial matrix
bestMatrix.addInstance(instances.toArray(new Instance[0]));
// initial values
int maximumScore = 0;
Point maxEntryLocation = bestMatrix.getLocationOfMaximum();
while (true) {
maximumScore = bestMatrix.getMaximumScore();
maxEntryLocation = bestMatrix.getLocationOfMaximum();
// map matrix to dense matrix
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(bestMatrix, maximumScore);
mappings.add(mappingForMaximum);
// assert that the maximum is within bounds
assert (maxEntryLocation.x < bestMatrix.getTpMatrix().numColumns()
&& maxEntryLocation.y < bestMatrix.getTpMatrix().numRows()) : "Position of maximum out of bounds";
// also assert that the mapping contains our maximum
assert (mappingForMaximum.getMappingMap()
.containsKey(maxEntryLocation)) : "Mapping does not contain maximum point";
// expand in kronecker fashion
bestMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
if (!betterRuleCanBeLearned(maximumScore, bestMatrix)) {
mappings.remove(mappingForMaximum);
break;
......@@ -147,10 +157,23 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// even necessary???
if (maxEntryLocation == null)
return null;
List<Point> featuresAtMax = MatrixUtil.determineFeaturesForIndex(maxEntryLocation, mappings);
List<Point> featuresAtMax =new ArrayList<Point>();
//TODO i have no idea why we need this if !
if (mappings.size() > 0) {
List<Point> reversedfeatures = new ArrayList<Point>();
reversedfeatures.add(maxEntryLocation);
MatrixUtil.revertKroneckerExpansion(mappings.get(mappings.size()-1), reversedfeatures);
featuresAtMax.addAll(MatrixUtil.determineFeaturesForIndex(reversedfeatures.get(0), mappings));
featuresAtMax.addAll(MatrixUtil.determineFeaturesForIndex(reversedfeatures.get(1), mappings));
} else {
featuresAtMax.addAll(MatrixUtil.determineFeaturesForIndex(maxEntryLocation, mappings));
}
return new RepresentationRule(windowSize, featuresAtMax, goldLabel, maximumScore);
}
// TODO isnt this too greedy?
private boolean betterRuleCanBeLearned(int maximumScore, MatrixMcMatrixFace matrixInFocus) {
return matrixInFocus.getMaximumScore() > maximumScore;
}
......
......@@ -13,6 +13,8 @@ import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator;
import de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory;
......@@ -25,14 +27,16 @@ public class FirstTest {
File typesystem = new File("resources\\MiKalliTypesystem.xml");
BinaryRepresentationRuleLearningAlgorithm algorithm = new BinaryRepresentationRuleLearningAlgorithm(
LabelAlphabet.getIdToFeature(IsUppercaseFeatureGenerator.LOWERCASE),
LabelAlphabet.getIdToFeature(IsUppercaseFeatureGenerator.UPPERCASE));
TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 2, 2, "de.uniwue.kalimachos.coref.type.POS", tsd, new IsUppercaseFeatureGenerator(), new WordFeaturegenerator());
LabelAlphabet.getIdToFeature("IsNN=NN"), LabelAlphabet.getIdToFeature("IsNN=O"));
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 2, 2,
"de.uniwue.kalimachos.coref.type.POS", tsd, new NNFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new IsUppercaseFeatureGenerator(), new SuffixNGenerator(4));
System.out.println(instances.size());
algorithm.learn(instances.toArray(new Instance[0]));
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment