Commit 756c4039 authored by Markus Krug's avatar Markus Krug
Browse files

viel probiert und nur etwas gewonnen, das lernen bleibt relativ langsam

parent e1a1273c
......@@ -85,7 +85,7 @@ public class Instance {
FlexCompColMatrix denseInstanceMatrix = new FlexCompColMatrix(denseDimension, denseDimension);
for (Point denseIndices : lastMapping.getInverseMappingMap().keySet()) {
List<Point> features = MatrixUtil.determineFeaturesForIndex(denseIndices, mappings);
List<Point> features = lastMapping.getFeaturesForDenseIndex(denseIndices);
// check if this instance contains the features
if (containsFeature(features)) {
// if so then create a sprase matrix and put a 1 into the
......
......@@ -4,11 +4,14 @@ import java.awt.Point;
import java.util.HashMap;
import java.util.List;
import de.uniwue.ls6.util.MatrixUtil;
public class MatrixMapping {
// x is col and y is row
HashMap<Point, Point> mappingMap;
HashMap<Point, Point> inverseMappingMap;
HashMap<Point,List<Point>> denseIndexToFeaturesMapping;
//
private int kroneckerDimension;
......@@ -25,6 +28,7 @@ public class MatrixMapping {
this.mappingMap = mappingMap;
this.inverseMappingMap = inverseMap;
this.kroneckerDimension = kroneckerDimension;
this.denseIndexToFeaturesMapping = new HashMap<>();
}
public MatrixMapping(int kroneckerDimension) {
......@@ -32,6 +36,7 @@ public class MatrixMapping {
this.mappingMap = new HashMap<Point, Point>();
this.inverseMappingMap = new HashMap<Point, Point>();
this.kroneckerDimension = kroneckerDimension;
this.denseIndexToFeaturesMapping = new HashMap<>();
}
public HashMap<Point, Point> getMappingMap() {
......@@ -63,7 +68,7 @@ public class MatrixMapping {
}
// this method generates all values based on the keys
public void inferDenseMapValues() {
public void inferDenseMapValues(List<MatrixMapping> mappings) {
int numCols = (int) Math.ceil(Math.sqrt(mappingMap.keySet().size()));
int index = 0;
for (Point key : mappingMap.keySet()) {
......@@ -72,6 +77,11 @@ public class MatrixMapping {
inverseMappingMap.put(value, key);
index++;
}
//also infer the features
for(Point p : inverseMappingMap.keySet()){
denseIndexToFeaturesMapping.put(p,MatrixUtil.determineFeaturesForIndex(p, mappings));
}
}
public int getDenseMatrixDimension() {
......@@ -87,6 +97,10 @@ public class MatrixMapping {
public int getKroneckerMatrixDimension() {
return kroneckerDimension;
}
public List<Point> getFeaturesForDenseIndex(Point densePoint){
return denseIndexToFeaturesMapping.get(densePoint);
}
@Override
public String toString() {
......
......@@ -7,11 +7,18 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
......@@ -22,7 +29,7 @@ import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
public class MatrixUtil {
public static MatrixMapping getMappingForMaximum(MatrixMcMatrixFace matrixface, int maximum) {
public static MatrixMapping getMappingForMaximum(MatrixMcMatrixFace matrixface, int maximum,List<MatrixMapping> mappings) {
// totally unugly code not even necessary which makes it worse
int formerMatrixSize = matrixface.getTpMatrix().numRows() == matrixface.getTpMatrix().numColumns()
......@@ -37,7 +44,7 @@ public class MatrixUtil {
// double sum = entry.get() +
// matrixface.getFpMatrix().get(entry.row(), entry.column());
double sum = entry.get();
if (sum >= maximum) {
if (sum > maximum) {
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
// save the score
double score = entry.get() - matrixface.getFpMatrix().get(entry.row(), entry.column());
......@@ -47,7 +54,8 @@ public class MatrixUtil {
}
}
if (matrixMapping.getDenseMatrixDimension() > 50) {
System.out.println("Densedimension: "+ matrixMapping.getDenseMatrixDimension());
if (matrixMapping.getDenseMatrixDimension() > 20) {
System.out.println("Too many possible features! We restrict to the best 2500");
matrixMapping = new MatrixMapping(formerMatrixSize);
......@@ -68,7 +76,9 @@ public class MatrixUtil {
}
// infer the -> righthandside
matrixMapping.inferDenseMapValues();
ArrayList<MatrixMapping> arrayList = new ArrayList<MatrixMapping>(mappings);
arrayList.add(matrixMapping);
matrixMapping.inferDenseMapValues(arrayList);
return matrixMapping;
}
......@@ -78,28 +88,46 @@ public class MatrixUtil {
MatrixMapping lastMapping = mappings.get(mappings.size() - 1);
int dimension = lastMapping.getDenseMatrixDimension();
MatrixMcMatrixFace expandedMatrixFace = new MatrixMcMatrixFace(dimension * dimension, dimension * dimension,
label);
// populate the matrix with the dataset this is expensive!
instances.parallelStream().forEach((Instance inst) -> {
// expand
Supplier<MatrixMcMatrixFace> matrixConstructor = ()-> new MatrixMcMatrixFace(dimension * dimension, dimension * dimension, label);
BiConsumer<MatrixMcMatrixFace, Instance> accumulator = (MatrixMcMatrixFace expandedMatrix, Instance inst) -> {
FlexCompColMatrix expandedInstance = inst.expand(mappings);
// TODO can we use something faster here... currently this results
// in a speedup of 3...
synchronized (expandedMatrixFace) {
// add to kronecker
if (inst.getLabel() == label) {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
} else {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getFpMatrix(), expandedInstance);
}
if (inst.getLabel() == label) {
expandedMatrix.addToMatrix(expandedMatrix.getTpMatrix(), expandedInstance);
} else {
expandedMatrix.addToMatrix(expandedMatrix.getFpMatrix(), expandedInstance);
}
});
return expandedMatrixFace;
};
BinaryOperator<MatrixMcMatrixFace> join = (MatrixMcMatrixFace a, MatrixMcMatrixFace b) -> {
a.getTpMatrix().add(b.getTpMatrix());
a.getFpMatrix().add(b.getFpMatrix());
return a;
};
return instances.parallelStream().collect(Collector.of(
matrixConstructor,
accumulator,
join,Collector.Characteristics.UNORDERED
));
// // populate the matrix with the dataset this is expensive!
// List<FlexCompColMatrix> syncList = Collections.synchronizedList(new LinkedList<>());
// instances.parallelStream().forEach((Instance inst) -> {
// // expand
// FlexCompColMatrix expandedInstance = inst.expand(mappings);
//
// syncList.add(expandedInstance);
// // TODO can we use something faster here... currently this results
// // in a speedup of 3...
// synchronized (expandedMatrixFace) {
// // add to kronecker
// if (inst.getLabel() == label) {
// expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
// } else {
// expandedMatrixFace.addToMatrix(expandedMatrixFace.getFpMatrix(), expandedInstance);
// }
// }
//
// });
// return expandedMatrixFace;
}
public static MatrixMcMatrixFace performKroneckerExpansionWithIndex(List<MatrixMapping> mappings,
......@@ -218,67 +246,44 @@ public class MatrixUtil {
}
public static void revertKroneckerExpansion(MatrixMapping previousMapping, List<Point> reversedfeatures) {
List<Point> toRemove = new ArrayList<Point>();
List<Point> toAdd = new ArrayList<Point>();
for (Point p : reversedfeatures) {
Iterator<Point> iterator = reversedfeatures.iterator();
while(iterator.hasNext()){
Point p = iterator.next();
iterator.remove();
// revert the kronecker expansion step this generates 2 points
int sizeBeforeExpansion = (int) previousMapping.getDenseMatrixDimension();
int xBefore1 = (int) Math.floor(p.x / sizeBeforeExpansion);
int xBefore2 = p.x % sizeBeforeExpansion;
int yBefore1 = (int) Math.floor(p.y / sizeBeforeExpansion);
int yBefore2 = p.y % sizeBeforeExpansion;
Point firstBack = new Point(xBefore1, yBefore1);
Point secondBack = new Point(xBefore2, yBefore2);
toRemove.add(p);
toAdd.add(secondBack);
toAdd.add(firstBack);
// assert that the recalculated points are contained in the mapping,
// this also guarantes the dimension is respected
assert (previousMapping.getInverseMappingMap().containsKey(firstBack) && previousMapping
.getInverseMappingMap().containsKey(secondBack)) : "Reverted Points not part of the mapping";
// assure no point is null
assert (secondBack != null && firstBack != null) : "One of the reverted points is null";
}
reversedfeatures.removeAll(toRemove);
reversedfeatures.addAll(toAdd);
// assert the lists contain what they should
assert (toAdd.size() == toRemove.size() * 2
&& reversedfeatures.containsAll(toAdd)) : "Kronecker reversion failed";
}
private static void revertMapping(MatrixMapping currentMapping, List<Point> reversedfeatures) {
List<Point> toRemove = new ArrayList<Point>();
List<Point> toAdd = new ArrayList<Point>();
assert (!reversedfeatures.contains(null)) : "Tried to backwards map a null value";
reversedfeatures.replaceAll((Point p) -> currentMapping.getBackwardsMappedFeature(p));
for (Point p : reversedfeatures) {
// revert the mapping step this is easy !
Point backwardsMappedFeature = currentMapping.getBackwardsMappedFeature(p);
toRemove.add(p);
toAdd.add(backwardsMappedFeature);
// assert that the point exists
assert (backwardsMappedFeature != null) : "Backwardsmapping resulted in null entry! \n"
+ currentMapping.toString() + "At point: " + p;
}
reversedfeatures.removeAll(toRemove);
reversedfeatures.addAll(toAdd);
// assure everythign went alright
assert (toRemove.size() == toAdd.size() && toAdd.size() == reversedfeatures.size()
&& reversedfeatures.containsAll(reversedfeatures)) : "Dimension of backwards mapped features is wrong";
}
public static String convertPointListToFeatureString(List<Point> determineFeaturesForIndex) {
......
......@@ -16,7 +16,7 @@ public class NNFeatureGenerator extends AFeatureGenerator{
public String[] generateFeatures(AnnotationFS token) {
String featureValueAsString = token.getFeatureValueAsString(token.getType().getFeatureByBaseName(posTagFeature));
return new String[]{featureValueAsString.startsWith("N")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
return new String[]{featureValueAsString.startsWith("NE")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
}
}
......@@ -47,8 +47,8 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
+ "\tvs\t" + LabelAlphabet.getFeatureToId(otherLabel));
System.out.println("Amount distinct features: " + LabelAlphabet.getSize());
System.out.println("Start indexing for " + instances.length + " instances...");
createIndex(instances);
//System.out.println("Start indexing for " + instances.length + " instances...");
// createIndex(instances);
System.out.println("Finished creating instance!");
while (morePasses(instancesForPass, currentGoldIndex)) {
// update the learning objective
......@@ -172,7 +172,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
maxEntryLocation = bestMatrix.getLocationOfMaximum();
// map matrix to dense matrix
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(bestMatrix, maximumScore);
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(bestMatrix, maximumScore,mappings);
mappings.add(mappingForMaximum);
// assert that the maximum is within bounds
......@@ -185,14 +185,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// expand in kronecker fashion
long time = System.currentTimeMillis();
bestMatrix = MatrixUtil.performKroneckerExpansionWithIndex(mappings, instances, goldLabel,index);
// MatrixMcMatrixFace other = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
// if(!other.equals(bestMatrix)){
// System.out.println("BUG!");
// System.out.println(MatrixUtil.prettyMatrixFormat(bestMatrix.getTpMatrix()));
// System.out.println();
// System.out.println(MatrixUtil.prettyMatrixFormat(other.getTpMatrix()));
// }
bestMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
System.out.println(System.currentTimeMillis()-time);
if (!betterRuleCanBeLearned(maximumScore, bestMatrix)) {
......
......@@ -13,6 +13,7 @@ import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator;
......@@ -35,10 +36,10 @@ public class FirstTest {
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 5, 5,
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(bigDoc, 3, 3,
"de.uniwue.kalimachos.coref.type.POS", tsd, new NNFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3), new SuffixNGenerator(2),
new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3));
new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3), new NGramGenerator());
System.out.println(instances.size());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment