Commit 32a4579c authored by mak28ma's avatar mak28ma
Browse files

ngramfeatureGen

parent 9e18f8a9
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.cas.text.AnnotationFS;
public class NGramGenerator extends AFeatureGenerator {
public NGramGenerator() {
super("NGram");
}
@Override
public String[] generateFeatures(AnnotationFS token) {
String text = token.getCoveredText();
int len = text.length();
List<String> ngrams = new ArrayList<String>();
for (int beg = 0; beg < len; beg++) {
for (int end = beg + 1; end <= len; end++) {
String ngram = text.substring(beg, end);
if (beg > 0)
ngram = "*" + ngram;
if (end < len )
ngram += "*";
ngrams.add(super.featureIdentifier + "=" +ngram);
}
}
return ngrams.toArray(new String[0]);
}
}
......@@ -16,7 +16,7 @@ public class NNFeatureGenerator extends AFeatureGenerator{
public String[] generateFeatures(AnnotationFS token) {
String featureValueAsString = token.getFeatureValueAsString(token.getType().getFeatureByBaseName(posTagFeature));
return new String[]{featureValueAsString.startsWith("VVFIN")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
return new String[]{featureValueAsString.startsWith("N")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
}
}
......@@ -14,6 +14,7 @@ import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator;
......@@ -43,7 +44,7 @@ public class FirstTest2 {
"de.uniwue.kalimachos.coref.type.POS", tsd, new NNFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3),
new SuffixNGenerator(2), new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),
new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3)));
new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3), new NGramGenerator()));
System.out.println("instances: " + instances.size());
//if(instances.size()>20000)break;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment