Commit e1d41e7a authored by mak28ma's avatar mak28ma
Browse files

confused again...

parent 97915b6e
......@@ -9,7 +9,9 @@ import no.uib.cipr.matrix.MatrixEntry;
import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
/**
* This class represents a single datapoint to be classified by the machine learning algorithm
* This class represents a single datapoint to be classified by the machine
* learning algorithm
*
* @author mkrug, dschloer
*
*/
......@@ -31,11 +33,15 @@ public class Instance {
/**
* Creates an instance with no features
* @param nrCols the windowsize of the task
* @param nrRows usually the amounf of features stored in this column
* @param id a unique id
*
* @param nrCols
* the windowsize of the task
* @param nrRows
* usually the amounf of features stored in this column
* @param id
* a unique id
*/
public Instance(int nrCols, int nrRows,int id) {
public Instance(int nrCols, int nrRows, int id) {
super();
featureArray = new int[nrCols][nrRows];
this.id = id;
......@@ -43,26 +49,31 @@ public class Instance {
/**
* Creates an instance with no features
* @param nrCols the windowsize of the task
* @param nrRows usually the amounf of features stored in this column
* @param id a unique id
* @param label the goldlabel of this instance
*
* @param nrCols
* the windowsize of the task
* @param nrRows
* usually the amounf of features stored in this column
* @param id
* a unique id
* @param label
* the goldlabel of this instance
*/
public Instance(int nrCols, int nrRows, int label,int id) {
public Instance(int nrCols, int nrRows, int label, int id) {
super();
featureArray = new int[nrCols][nrRows];
this.label = label;
this.id = id;
}
public int getLabel() {
return label;
}
/**
* gets the feature at position (col,row)
* @param col
*
* @param col
* @param row
* @param value
*/
......@@ -77,8 +88,6 @@ public class Instance {
public int[][] getFeatureArray() {
return featureArray;
}
public int getId() {
return id;
......@@ -89,7 +98,10 @@ public class Instance {
// function)
/**
* Checks whether this instance has a set of features
* @param features the features that are checked if they are contained in this instance
*
* @param features
* the features that are checked if they are contained in this
* instance
* @return whether this instance contains all features of features
*/
public boolean containsFeature(List<Point> features) {
......@@ -109,12 +121,16 @@ public class Instance {
return true;
}
/** Expands this instance clever for the next iteration
/**
* Expands this instance clever for the next iteration
*
* @param mappings the Matrixmappings already included in the learning process
* @return the expanded instances in the size of the next kronecker expanded iteration
* @param mappings
* the Matrixmappings already included in the learning process
* @return the expanded instances in the size of the next kronecker expanded
* iteration
*/
// perform kronecker expansion should be executed in parallel for many instances
// perform kronecker expansion should be executed in parallel for many
// instances
public FlexCompColMatrix expand(List<MatrixMapping> mappings) {
MatrixMapping lastMapping = mappings.get(mappings.size() - 1);
......@@ -135,39 +151,35 @@ public class Instance {
// perform the kronecker expansion from the non sparse dense elements
int kroneckerDimension = denseDimension * denseDimension;
FlexCompColMatrix expanedMatrix = new FlexCompColMatrix(kroneckerDimension, kroneckerDimension);
for (MatrixEntry e1 : denseInstanceMatrix) {
outer: for (MatrixEntry e1 : denseInstanceMatrix) {
// iterate over all no sparse elements and expand
for (MatrixEntry e2 : denseInstanceMatrix) {
// // skip diagonal expansion
// if (e1.row() == e2.row() && e1.column() == e2.column())
// continue;
// // skip diagonal expansion TODO ??
// calculate the new indices
int kroneckerCol = e1.column() * denseDimension + e2.column();
int kroneckerRow = e1.row() * denseDimension + e2.row();
// check if this feature is available TODO is this already clear
// since we got both single features???
expanedMatrix.add(kroneckerRow, kroneckerCol, 1);
if (e1.row() == e2.row() && e1.column() == e2.column())
continue outer;
}
}
// assert that the entries in the expanded matrices are of size x(x-1)
assert (Matrices.cardinality(denseInstanceMatrix) * (Matrices.cardinality(denseInstanceMatrix)) == Matrices
.cardinality(
expanedMatrix)) : "Amount of non null entries of kronecker matrix after instance expansion not correct";
// assert that the entries in the expanded matrices are of size x(x-1)/2+x
assert (Matrices.cardinality(denseInstanceMatrix) * ((Matrices.cardinality(denseInstanceMatrix) - 1) * 0.5)
+ Matrices.cardinality(denseInstanceMatrix) == Matrices.cardinality(
expanedMatrix)) : "Amount of non null entries of kronecker matrix after instance expansion not correct "
+ Matrices.cardinality(denseInstanceMatrix) + " vs "
+ Matrices.cardinality(expanedMatrix);
return expanedMatrix;
}
@Override
public int hashCode() {
final int prime = 31;
......@@ -221,7 +233,7 @@ public class Instance {
public void setId(int id) {
this.id = id;
}
}
package de.uniwue.ls6.util;
public class MatrixPoint {
private int x;
private int y;
private double score;
public MatrixPoint(int x, int y, double score) {
super();
this.x = x;
this.y = y;
this.score = score;
}
public int getX() {
return x;
}
public void setX(int x) {
this.x = x;
}
public int getY() {
return y;
}
public void setY(int y) {
this.y = y;
}
public double getScore() {
return score;
}
public void setScore(double score) {
this.score = score;
}
}
......@@ -34,20 +34,18 @@ public class MatrixUtil {
MatrixMapping matrixMapping = new MatrixMapping(formerMatrixSize);
List<MatrixEntry> entries = new ArrayList<>();
List<MatrixPoint> entries = new ArrayList<>();
for (MatrixEntry entry : matrixface.getTpMatrix()) {
// TODO do we need to add the false positives???
// double sum = entry.get() +
// matrixface.getFpMatrix().get(entry.row(), entry.column());
double sum = entry.get();
if (sum > maximum) {
if (sum >= maximum) {
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
// save the score
double score = entry.get() - matrixface.getFpMatrix().get(entry.row(), entry.column());
// is this legit?
entry.set(score);
entries.add(entry);
entries.add(new MatrixPoint(entry.column(), entry.row(), score));
}
}
......@@ -57,18 +55,22 @@ public class MatrixUtil {
matrixMapping = new MatrixMapping(formerMatrixSize);
// sort
Collections.sort(entries, new Comparator<MatrixEntry>() {
Collections.sort(entries, new Comparator<MatrixPoint>() {
@Override
public int compare(MatrixEntry o1, MatrixEntry o2) {
return (int) (o1.get() - o2.get());
public int compare(MatrixPoint arg0, MatrixPoint arg1) {
// TODO Auto-generated method stub
return (int) (arg1.getScore()-arg0.getScore());
}
});
// add the top "beamsize" to the mapping
for (MatrixEntry entry : entries) {
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
int amountEntries=0;
for (MatrixPoint entry : entries) {
if(matrixMapping.getMappingMap().size()>beamSize)break;
matrixMapping.addEntry(new Point(entry.getX(), entry.getY()));
}
}
......@@ -150,6 +152,7 @@ public class MatrixUtil {
// assure no point is null
assert (secondBack != null && firstBack != null) : "One of the reverted points is null";
}
reversedfeatures.addAll(toAdd);
......
......@@ -78,8 +78,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
if (passLearned) {
instancesForPass = keepClassifiableInstances(passes, instancesForPass, currentGoldIndex);
passIndex++;
}
else{
} else {
break;
}
}
......@@ -186,31 +185,36 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
private RepresentationRule learnRule(int goldLabel, Collection<Instance> instances, MatrixMcMatrixFace bestMatrix) {
List<MatrixMapping> mappings = new ArrayList<MatrixMapping>();
MatrixMcMatrixFace iterationMatrix = bestMatrix;
// initial values
int maximumScore = 0;
Point maxEntryLocation = bestMatrix.getLocationOfMaximum();
Point maxEntryLocation = iterationMatrix.getLocationOfMaximum();
lookahead = 0;
while (true) {
maximumScore = bestMatrix.getMaximumScore();
maximumScore = iterationMatrix.getMaximumScore();
maxEntryLocation = bestMatrix.getLocationOfMaximum();
maxEntryLocation = iterationMatrix.getLocationOfMaximum();
// map matrix to dense matrix
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(bestMatrix, maximumScore, mappings,
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(iterationMatrix, maximumScore, mappings,
beamSize);
mappings.add(mappingForMaximum);
// assert that the maximum is within bounds
assert (maxEntryLocation == null ? true
: maxEntryLocation.x < bestMatrix.getTpMatrix().numColumns() && maxEntryLocation.y < bestMatrix
: maxEntryLocation.x < iterationMatrix.getTpMatrix().numColumns() && maxEntryLocation.y < iterationMatrix
.getTpMatrix().numRows()) : "Position of maximum out of bounds";
// expand in kronecker fashion
long time = System.currentTimeMillis();
bestMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
System.out.println(System.currentTimeMillis() - time);
iterationMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
System.out.println(System.currentTimeMillis() - time);
if (!betterRuleCanBeLearned(maximumScore, bestMatrix)) {
//assert that the maximum is growing
assert(maximumScore<= iterationMatrix.getMaximumScore()):"Maximum decreased within iteration!";
if (!betterRuleCanBeLearned(maximumScore, iterationMatrix)) {
mappings.remove(mappingForMaximum);
break;
}
......@@ -234,7 +238,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
} else {
featuresAtMax.addAll(MatrixUtil.determineFeaturesForIndex(maxEntryLocation, mappings));
}
return new RepresentationRule(bestMatrix.getTpMatrix().numColumns(), featuresAtMax, goldLabel, maximumScore);
return new RepresentationRule(iterationMatrix.getTpMatrix().numColumns(), featuresAtMax, goldLabel, maximumScore);
}
// TODO isnt this too greedy?
......@@ -243,8 +247,12 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
lookahead = 0;
return true;
} else {
if (matrixInFocus.getMaximumScore() < maximumScore)
if (matrixInFocus.getMaximumScore() < maximumScore) {
//TODO this should actually be impossible!!
//System.out.println("MaxBefore " + maximumScore + "\t MaxAfter" + matrixInFocus.getMaximumScore());
//System.out.println("WHY!");
return false;
}
if (lookahead < 7) {
lookahead++;
return true;
......
......@@ -33,14 +33,13 @@ public class FirstTest {
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(2500);
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(500);
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 0,3, 3,
"de.uniwue.kalimachos.coref.type.POS", tsd, new POSTagFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3), new SuffixNGenerator(2),
new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3), new NGramGenerator());
new WordFeaturegenerator(),new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3), new NGramGenerator());
System.out.println(instances.size());
......
......@@ -35,7 +35,7 @@ public class FirstTest2 {
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(2500);
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(9);
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment