Commit 9e18f8a9 authored by mak28ma's avatar mak28ma
Browse files

added a little parallelism

parent e44099ec
......@@ -69,7 +69,7 @@ public class Instance {
return true;
}
// perform kronecker expansion
// perform kronecker expansion should be executed in parallel for many instances
public FlexCompColMatrix expand(List<MatrixMapping> mappings) {
MatrixMapping lastMapping = mappings.get(mappings.size() - 1);
......@@ -120,11 +120,14 @@ public class Instance {
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.deepHashCode(featureArray);
result = prime * result + id;
result = prime * result + label;
return result;
}
......@@ -137,8 +140,12 @@ public class Instance {
if (getClass() != obj.getClass())
return false;
Instance other = (Instance) obj;
if (!Arrays.deepEquals(featureArray, other.featureArray))
return false;
if (id != other.id)
return false;
if (label != other.label)
return false;
return true;
}
......
......@@ -81,18 +81,22 @@ public class MatrixUtil {
label);
// populate the matrix with the dataset this is expensive!
for (Instance inst : instances) {
instances.parallelStream().forEach((Instance inst)->{
// expand
FlexCompColMatrix expandedInstance = inst.expand(mappings);
//TODO can we use something faster here... currently this results in a speedup of 3...
synchronized (expandedMatrixFace) {
// add to kronecker
if (inst.getLabel() == label) {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
} else {
expandedMatrixFace.addToMatrix(expandedMatrixFace.getFpMatrix(), expandedInstance);
}
}
}
});
return expandedMatrixFace;
}
......
......@@ -16,7 +16,7 @@ public class NNFeatureGenerator extends AFeatureGenerator{
public String[] generateFeatures(AnnotationFS token) {
String featureValueAsString = token.getFeatureValueAsString(token.getType().getFeatureByBaseName(posTagFeature));
return new String[]{featureValueAsString.startsWith("N")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
return new String[]{featureValueAsString.startsWith("VVFIN")?super.featureIdentifier+"=NN":super.featureIdentifier+"=O"};
}
}
......@@ -53,6 +53,15 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
}
System.out.println("Perform pass for Label: " + LabelAlphabet.getFeatureToId(currentGoldIndex));
System.out.println("Remaining instances for pass: " + instancesForPass.size());
double amountGoldInstances = 0;
for (Instance i : instancesForPass) {
if (i.getLabel() == currentGoldIndex)
amountGoldInstances++;
}
System.out.println("Labeldistribution (Other;Gold) (" + (instancesForPass.size() - amountGoldInstances) + ";"
+ amountGoldInstances + ") " + "; " + amountGoldInstances / instancesForPass.size() * 100 + "%");
// create a new pass
learnRulePass(currentGoldIndex, instancesForPass);
......
......@@ -26,16 +26,18 @@ public class FirstTest {
File document = new File("resources\\Aston,-Louise__Lydia.xmi.xmi.xmi");
File doc2 = new File("resources\\Ahlefeld,-Charlotte-von_Erna1421[Lukas].xmi.xmi");
File typesystem = new File("resources\\MiKalliTypesystem.xml");
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
BinaryRepresentationRuleLearningAlgorithm algorithm = new BinaryRepresentationRuleLearningAlgorithm(
LabelAlphabet.getIdToFeature("IsNN=NN"), LabelAlphabet.getIdToFeature("IsNN=O"));
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 5, 5,
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 2, 2,
"de.uniwue.kalimachos.coref.type.POS", tsd, new NNFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3), new SuffixNGenerator(2),
new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2), new PrefixNGenerator(3));
new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3));
System.out.println(instances.size());
......
package test;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.xml.sax.SAXException;
import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator;
import de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory;
public class FirstTest2 {
public static void main(String[] args) throws Exception {
File document = new File("resources\\Aston,-Louise__Lydia.xmi.xmi.xmi");
File doc2 = new File("resources\\Ahlefeld,-Charlotte-von_Erna1421[Lukas].xmi.xmi");
File typesystem = new File("resources\\MiKalliTypesystem.xml");
File korpusFOlder = new File("X:\\Neuer Ordner\\output+speech");
BinaryRepresentationRuleLearningAlgorithm algorithm = new BinaryRepresentationRuleLearningAlgorithm(
LabelAlphabet.getIdToFeature("IsNN=NN"), LabelAlphabet.getIdToFeature("IsNN=O"));
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = new ArrayList<Instance>();
for (File f : korpusFOlder.listFiles()) {
if (!f.getName().endsWith(".xmi"))
continue;
instances.addAll(InstanceCreationFactory.createWindowedInstancesFromUIMA(f, 2, 2,
"de.uniwue.kalimachos.coref.type.POS", tsd, new NNFeatureGenerator("POSTag"),
new WordFeaturegenerator(), new SuffixNGenerator(4), new SuffixNGenerator(3),
new SuffixNGenerator(2), new SuffixNGenerator(1), new PrefixNGenerator(1), new PrefixNGenerator(2),
new IsUppercaseFeatureGenerator(), new PrefixNGenerator(3)));
System.out.println("instances: " + instances.size());
//if(instances.size()>20000)break;
}
System.out.println(instances.size());
algorithm.learn(instances.toArray(new Instance[0]));
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment