Commit 78f8a06e authored by mak28ma's avatar mak28ma
Browse files

started to improve readability a bit

parent 1bc99e89
......@@ -42,14 +42,20 @@ public class MatrixUtil {
// collect all entries that may remain
List<MatrixPoint> entries = new ArrayList<>();
Set<Set<Point>> uniqueFeatureCombinations = new HashSet<>();
// iterate over each feature combination left
for (MatrixEntry entry : matrixface.getTpMatrix()) {
double sum = entry.get();
if (sum >= maximum) {
// save all that may be kept
if (sum == maximum && matrixface.getFpMatrix().get(entry.row(), entry.column()) == 0) {
double amountTP = entry.get();
// save all that may be kept per default any that have more TP may
// be kept since the have potential
if (amountTP >= maximum) {
// but if they are at maxScore (can be more than that) and dont
// have any FP left there is no potential left
if (amountTP == maximum && matrixface.getFpMatrix().get(entry.row(), entry.column()) == 0) {
if (!(new Point(entry.column(), entry.row()).equals(maxEntryLocation))) {
//no potential to improve left!
// no potential to improve left!
continue;
}
}
......@@ -58,9 +64,14 @@ public class MatrixUtil {
Set<Point> featuresOfPoints = determineFeaturesForIndex(new Point(entry.column(), entry.row()),
mappings, mappings.size() > 0 ? true : false);
//only keep each feature combination exactly once!
if (uniqueFeatureCombinations.contains(featuresOfPoints)) {
continue;
}
//furthermore we can filter all those featurecombinations that resemble exactly the same instance set
// because our algrotihm can not differ between those
uniqueFeatureCombinations.add(featuresOfPoints);
double score = entry.get() - matrixface.getFpMatrix().get(entry.row(), entry.column());
......@@ -74,14 +85,10 @@ public class MatrixUtil {
+ " Amount of potentially good features " + entries.size());
matrixMapping = new MatrixMapping(formerMatrixSize);
// sort
Collections.sort(entries, new Comparator<MatrixPoint>() {
// sort by score //TODO can this be done better??
Collections.sort(entries, (MatrixPoint arg0, MatrixPoint arg1) -> {
@Override
public int compare(MatrixPoint arg0, MatrixPoint arg1) {
// TODO Auto-generated method stub
return (int) (arg1.getScore() - arg0.getScore());
}
return (int) (arg1.getScore() - arg0.getScore());
});
}
......@@ -102,13 +109,16 @@ public class MatrixUtil {
// System.out.println(entries.size()+"==");
// debug
// for (MatrixPoint entry : entries) {
// Set<Point> featuresForDenseIndex = determineFeaturesForIndex(new Point(entry.getX(), entry.getY()),
// mappings, mappings.size() > 0 ? true : false);
// RepresentationRule representationRule = new RepresentationRule(6, featuresForDenseIndex, 1, 10);
// System.out.println(entry.getScore() + "\t" + maximum + "\tTP " + entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
// }
// for (MatrixPoint entry : entries) {
// Set<Point> featuresForDenseIndex = determineFeaturesForIndex(new
// Point(entry.getX(), entry.getY()),
// mappings, mappings.size() > 0 ? true : false);
// RepresentationRule representationRule = new RepresentationRule(6,
// featuresForDenseIndex, 1, 10);
// System.out.println(entry.getScore() + "\t" + maximum + "\tTP " +
// entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
// }
return matrixMapping;
}
......
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public class LemmaFeatureGenerator extends AFeatureGenerator{
private String lemmaTagFeature;
public LemmaFeatureGenerator( String posTagFeature) {
super("Lemma");
this.lemmaTagFeature = posTagFeature;
}
@Override
public String[] generateFeatures(AnnotationFS token) {
String featureValueAsString = token.getFeatureValueAsString(token.getType().getFeatureByBaseName(lemmaTagFeature));
return new String[]{super.featureIdentifier+"="+ featureValueAsString};
}
}
......@@ -28,18 +28,28 @@ public class WordCategorization extends AFeatureGenerator {
ngrams.add(super.featureIdentifier + "=" + ngram);
}
}
//also add a simplified version of text
String simplifiedVersion = simplifyString(text);
ngrams.add(super.featureIdentifier+"_simplified="+simplifiedVersion);
return ngrams.toArray(new String[0]);
}
private String simplifyString(String text) {
return text.replaceAll("c+", "c").replaceAll("C+", "C").replaceAll("D+", "D");
}
private String unifyString(String coveredText) {
String refinedString = "";
for (Character c : coveredText.toCharArray()) {
if (c.toString().matches("[a-z]")) {
refinedString += "x";
refinedString += "c";
} else if (c.toString().matches("[A-Z]")) {
refinedString += "X";
refinedString += "C";
} else if (c.toString().matches("[0-9]")) {
refinedString += "D";
} else {
......
......@@ -20,6 +20,7 @@ import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerato
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.POSTagFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordCategorization;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator;
import de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory;
......@@ -34,14 +35,14 @@ public class FirstTest {
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(100);
MultiClassRepresentationRuleAlgorithm algorithm = new MultiClassRepresentationRuleAlgorithm(250);
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 0,0, 0,
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 0,2, 2,
"de.uniwue.kalimachos.coref.type.POS", tsd, new POSTagFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3),
new NGramGenerator());
new NGramGenerator(),new WordCategorization());
// create 5 folds
List<UnstructuredFold> folds = FoldUtil.readInstancesToFold(instances, new Random(13374211), 10);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment