Commit a658cbfd authored by Markus Krug's avatar Markus Krug
Browse files

*full implementation, does not work!

*should start the debugging now
parent 8e4542fe
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
......
......@@ -20,12 +20,12 @@ public class Instance {
public Instance(int nrRows, int nrCols) {
super();
featureArray = new int[nrRows][nrCols];
featureArray = new int[nrCols][nrRows];
}
public Instance(int nrRows, int nrCols, int label) {
super();
featureArray = new int[nrRows][nrCols];
featureArray = new int[nrCols][nrRows];
this.label = label;
}
......@@ -33,6 +33,9 @@ public class Instance {
return label;
}
public void setFeatureAt(int col,int row, int value){
featureArray[col][row] = value;
}
public void setLabel(int label) {
this.label = label;
}
......
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>InstanceLoading</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>de.uniwue.ls6.rulelearning</groupId>
<relativePath>../</relativePath>
<artifactId>ruleLearningParent</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<groupId>de.uniwue.ls6.rulelearning</groupId>
<artifactId>InstanceLoading</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>InstanceLoading</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>de.uniwue.ls6.rulelearning</groupId>
<artifactId>DataStructure</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-core</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimafit-core</artifactId>
<version>2.2.0</version>
</dependency>
</dependencies>
</project>
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public abstract class AFeatureGenerator {
public abstract String[] generateFeatures(AnnotationFS token);
}
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public class IsUppercaseFeatureGenerator extends AFeatureGenerator {
public static final String LOWERCASE = "Lowercase";
public static final String UPPERCASE = "Uppercase";
@Override
public String[] generateFeatures(AnnotationFS token) {
return new String[]{Character.isUpperCase(token.getCoveredText().charAt(0))?UPPERCASE:LOWERCASE};
}
}
package de.uniwue.ls6.rulelearning.instanceloading.featuregenerator;
import org.apache.uima.cas.text.AnnotationFS;
public class WordFeaturegenerator extends AFeatureGenerator{
@Override
public String[] generateFeatures(AnnotationFS token) {
return new String[]{token.getCoveredText()};
}
}
package de.uniwue.ls6.rulelearning.instanceloading.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.CasCreationUtils;
import org.xml.sax.SAXException;
import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.AFeatureGenerator;
public class InstanceCreationFactory {
public static List<Instance> createWindowedInstancesFromUIMA(File fileToDocument, int leftWindowsize,
int rightWindowSize, String tokentypeS, TypeSystemDescription typesystem, AFeatureGenerator goldGenerator,
AFeatureGenerator... generators) throws ResourceInitializationException, SAXException, IOException {
// deserialize
CAS cas = CasCreationUtils.createCas(typesystem, null, null);
FileInputStream fis = new FileInputStream(fileToDocument);
XmiCasDeserializer.deserialize(fis, cas);
Type tokentype = cas.getTypeSystem().getType(tokentypeS);
fis.close();
//
List<Instance> instances = new ArrayList<Instance>();
int windowSize = leftWindowsize + 1 + rightWindowSize;
Queue<List<String>> windowQueue = new LinkedList<List<String>>();
List<String> labelList = new ArrayList<String>();
// init leftWindowsize empty lists
for (int i = 0; i < leftWindowsize; i++) {
windowQueue.add(new LinkedList<String>());
}
int tokenIndex = 0;
for (AnnotationFS token : cas.getAnnotationIndex(tokentype)) {
String goldFeature = goldGenerator.generateFeatures(token)[0];
labelList.add(goldFeature);
List<String> featureForToken = generateFeatureForToken(token, generators);
// generate the instance with the help of the queue
// update the queue
if (windowQueue.size() >= windowSize) {
Instance inst = generateInstanceFromQueue(windowQueue, windowSize, labelList.get(tokenIndex));
instances.add(inst);
windowQueue.poll();
tokenIndex++;
}
windowQueue.add(featureForToken);
}
// add remaining tokens at the end of the token stream
for (int i = 0; i < rightWindowSize; i++) {
windowQueue.poll();
windowQueue.add(new LinkedList<String>());
Instance inst = generateInstanceFromQueue(windowQueue, windowSize, labelList.get(tokenIndex));
instances.add(inst);
tokenIndex++;
}
return instances;
}
private static Instance generateInstanceFromQueue(Queue<List<String>> windowQueue, int windowSize,
String goldfeature) {
int maxNrRows = 0;
for (List<String> features : windowQueue) {
if (features.size() > maxNrRows)
maxNrRows = features.size();
}
int idToFeature = LabelAlphabet.getIdToFeature(goldfeature);
Instance instance = new Instance(maxNrRows, windowSize, idToFeature);
//fill the data
int colNr =0;
for(List<String> tokenFeatures : windowQueue){
for(int i =0;i<tokenFeatures.size();i++){
instance.setFeatureAt(colNr, i, LabelAlphabet.getIdToFeature(tokenFeatures.get(i)));
}
colNr++;
}
return instance;
}
private static List<String> generateFeatureForToken(AnnotationFS token, AFeatureGenerator[] generators) {
List<String> features = new ArrayList<String>();
for (AFeatureGenerator generator : generators) {
features.addAll(Arrays.asList(generator.generateFeatures(token)));
}
return features;
}
}
......@@ -9,6 +9,11 @@
<artifactId>DataStructure</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>de.uniwue.ls6.rulelearning</groupId>
<artifactId>InstanceLoading</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
</dependencies>
<parent>
<groupId>de.uniwue.ls6.rulelearning</groupId>
......
package test;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.xml.sax.SAXException;
import de.uniwue.ls6.datastructure.Instance;
import de.uniwue.ls6.datastructure.LabelAlphabet;
import de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator;
import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator;
import de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory;
public class FirstTest {
public static void main(String[] args) throws Exception {
File document = new File("C:\\Users\\mkrug\\annoTest\\TestProject\\input\\Aston,-Louise__Lydia.xmi.xmi.xmi");
File typesystem = new File("C:\\Users\\mkrug\\annoTest\\TestProject\\descriptor\\MiKalliTypesystem.xml");
BinaryRepresentationRuleLearningAlgorithm algorithm = new BinaryRepresentationRuleLearningAlgorithm(
LabelAlphabet.getIdToFeature(IsUppercaseFeatureGenerator.LOWERCASE),
LabelAlphabet.getIdToFeature(IsUppercaseFeatureGenerator.UPPERCASE));
TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(typesystem.toURL().toString());
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 2, 2, "de.uniwue.kalimachos.coref.type.POS", tsd, new IsUppercaseFeatureGenerator(), new WordFeaturegenerator());
System.out.println(instances.size());
algorithm.learn(instances.toArray(new Instance[0]));
}
}
......@@ -8,5 +8,6 @@
<module>DataStructure</module>
<module>Evaluation</module>
<module>RuleLearning</module>
<module>InstanceLoading</module>
</modules>
</project>
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment