Commit 2bec1b9d authored by Markus Krug's avatar Markus Krug
Browse files

Fixed some stuff of the rule learning

*sometimes col and row were exchanged
* struct of representation rule forget to init lists
*added some debug output (via toString)
*plz change the sysos with logging messages !
parent d8403933
......@@ -18,10 +18,13 @@ public class RepresentationRule {
// label of the rule
private Integer label;
public RepresentationRule(int windowSize, int[][] instanceArray, int label) {
private int maximumScore;
public RepresentationRule(int windowSize, int[][] instanceArray, int label, int maximumScore) {
conditionSet = new ArrayList<Set<Integer>>(windowSize);
this.label = label;
this.maximumScore = maximumScore;
for (int col = 0; col < instanceArray[0].length; col++) {
Set<Integer> set = conditionSet.get(col);
if (set == null) {
......@@ -35,10 +38,23 @@ public class RepresentationRule {
}
public RepresentationRule(int windowSize, List<Point> features, int label) {
public RepresentationRule(int windowSize, List<Point> features, int label, int maximumScore) {
this.label = label;
conditionSet = new ArrayList<Set<Integer>>(windowSize);
this.maximumScore = maximumScore;
// get max x
int maxX = 0;
for (Point p : features) {
if (p.x > maxX)
maxX = p.x;
}
// init all the sets as empty sets
for (int i = 0; i <= maxX; i++) {
Set<Integer> set = new HashSet<>();
conditionSet.add(i, set);
}
for (Point p : features) {
Set<Integer> set = conditionSet.get(p.x);
if (set == null) {
......@@ -56,13 +72,12 @@ public class RepresentationRule {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
// TODO
for (Set<Integer> set : conditionSet) {
StringBuilder sb = new StringBuilder("Score: " + maximumScore + " ");
for (Point p : asPointList()) {
sb.append(p.x + "=" + LabelAlphabet.getFeatureToId(p.y)).append(" AND");
}
return "";
return sb.toString().substring(0, sb.toString().length() - 4) + " => " + LabelAlphabet.getFeatureToId(label);
}
// TODO this could also be made faster
......
package de.uniwue.ls6.algorithm.datastructure;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
......@@ -14,6 +15,7 @@ public class RulePass {
public RulePass() {
super();
ruleSet = new ArrayList<>();
}
public RulePass(List<RepresentationRule> ruleSet) {
......
......@@ -33,9 +33,10 @@ public class Instance {
return label;
}
public void setFeatureAt(int col,int row, int value){
public void setFeatureAt(int col, int row, int value) {
featureArray[col][row] = value;
}
public void setLabel(int label) {
this.label = label;
}
......@@ -51,7 +52,7 @@ public class Instance {
outer: for (Point p : features) {
for (int row = 0; row < featureArray[0].length; row++) {
for (int row = 0; row < getNrRows(); row++) {
if (featureArray[p.x][row] == p.y) {
continue outer;
......@@ -72,7 +73,7 @@ public class Instance {
// create dense matrix for instance
int denseDimension = lastMapping.getDenseMatrixDimension();
FlexCompColMatrix denseInstanceMatrix = new FlexCompColMatrix(denseDimension, denseDimension);
for (Point denseIndices : lastMapping.getMappingMap().keySet()) {
for (Point denseIndices : lastMapping.getInverseMappingMap().keySet()) {
List<Point> features = MatrixUtil.determineFeaturesForIndex(denseIndices, mappings);
......@@ -136,7 +137,34 @@ public class Instance {
return false;
return true;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("Instancegoldlabel: " + LabelAlphabet.getFeatureToId(label) + "\n");
// add the feature matrix
for (int i = 0; i < featureArray[0].length; i++) {
String line = "|";
for (int j = 0; j < featureArray.length; j++) {
line += LabelAlphabet.getFeatureToId(featureArray[j][i]) + "\t|";
}
builder.append(line).append("\n");
}
return builder.toString();
}
public int getNrCols() {
return featureArray.length;
}
public int getNrRows() {
return featureArray[0].length;
}
public int getValueAt(int col, int row) {
return featureArray[col][row];
}
}
......@@ -12,6 +12,10 @@ public class LabelAlphabet {
private static Map<String, Integer> featureToIdMap = new ConcurrentHashMap<String, Integer>();
private static Map<Integer, String> idToFeatureMap = new ConcurrentHashMap<Integer, String>();
static {
addToMaps("DEFAULT_UNKNOWN", 0);
}
private LabelAlphabet() {
}
......@@ -49,4 +53,15 @@ public class LabelAlphabet {
return featureToIdMap.size();
}
public static String asString() {
StringBuilder sb = new StringBuilder();
sb.append("Alphabetsize: " + featureToIdMap.size()).append("\n");
for(String verboseFeature : featureToIdMap.keySet()){
sb.append(verboseFeature+ "\t" + featureToIdMap.get(verboseFeature) + "\n");
}
return sb.toString();
}
}
......@@ -6,6 +6,7 @@ import java.util.Set;
public class MatrixMapping {
//x is col and y is row
HashMap<Point, Point> mappingMap;
HashMap<Point, Point> inverseMappingMap;
//
......@@ -36,6 +37,15 @@ public class MatrixMapping {
public HashMap<Point, Point> getMappingMap() {
return mappingMap;
}
public HashMap<Point, Point> getInverseMappingMap() {
return inverseMappingMap;
}
public void setInverseMappingMap(HashMap<Point, Point> inverseMappingMap) {
this.inverseMappingMap = inverseMappingMap;
}
public void setMappingMap(HashMap<Point, Point> mappingMap) {
this.mappingMap = mappingMap;
......@@ -76,8 +86,20 @@ public class MatrixMapping {
//should return 0 but 42 is also alright
public int getKroneckerMatrixDimension() {
// TODO Auto-generated method stub
return 0;
return kroneckerDimension;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Matrixmapping: From point(x,y) to point(x,y)\n");
for(Point p : mappingMap.keySet()){
sb.append("("+p.x +","+p.y+")"+ "\t->" + "("+mappingMap.get(p).x +","+mappingMap.get(p).y+")").append("\n");
}
return sb.toString();
}
}
package de.uniwue.ls6.datastructure;
import java.awt.Point;
import java.util.Iterator;
import no.uib.cipr.matrix.MatrixEntry;
import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
......@@ -24,19 +25,18 @@ public class MatrixMcMatrixFace {
public void addInstance(Instance... instances) {
for (Instance i : instances) {
int[][] featureArray = i.getFeatureArray();
// for each column, #columns == windowsize
// l2r t2B
for (int col = 0; col < featureArray[0].length; col++) {
for (int row = 0; row < featureArray.length; row++) {
for (int col = 0; col < i.getNrCols(); col++) {
for (int row = 0; row < i.getNrRows(); row++) {
if (goldLabel == i.getLabel()) {
// add to TP matrix
addToMatrix(featureArray[row][col], col, tpMatrix);
addToMatrix(i.getValueAt(col,row), col, tpMatrix);
} else {
// add to FP matrix
addToMatrix(featureArray[row][col], col, fpMatrix);
addToMatrix(i.getValueAt(col,row), col, fpMatrix);
}
}
}
......@@ -81,23 +81,41 @@ public class MatrixMcMatrixFace {
public Point getLocationOfMaximum() {
double maxScore = 0;
MatrixEntry bestEntry = null;
Point bestEntry = null;
for (MatrixEntry entry : tpMatrix) {
double scoreCurrent = entry.get() - fpMatrix.get(entry.row(), entry.column());
if (scoreCurrent > maxScore) {
maxScore = scoreCurrent;
bestEntry = entry;
bestEntry = new Point(entry.column(),entry.row());
}
}
return bestEntry == null ? null : new Point(bestEntry.column(), bestEntry.row());
return bestEntry == null ? null :bestEntry;
}
/*
* 1. Differenzmatrix ausrechnen => Best Score ist Max(Matrix) 2. Setze
* check! Werte auf Sparse wenn Summe von TP un FP <=? MaxScore 3. Expansion
* der Matrix (Gruppierung von n+1 Windowelementen) (ber alle Instanzen
* gehen)
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Matrix, Goldlabel=" + LabelAlphabet.getFeatureToId(goldLabel) + "\n");
// print the tp matrix
sb.append("TP-Matrix:\n");
sb.append(matrixToString(tpMatrix));
// print the fp matrix
sb.append("FP-Matrix:\n");
sb.append(matrixToString(fpMatrix));
return sb.toString();
}
private String matrixToString(FlexCompColMatrix matrix) {
StringBuilder sb = new StringBuilder();
Iterator<MatrixEntry> iterator = matrix.iterator();
while (iterator.hasNext()) {
MatrixEntry next = iterator.next();
sb.append("Col: " + next.column() + " Row: " + LabelAlphabet.getFeatureToId(next.row()) +"("+next.row()+")" + " " + " Value: "
+next.get()).append("\n");
}
return sb.toString();
}
}
......@@ -29,9 +29,11 @@ public class MatrixUtil {
// double sum = entry.get() + matrixface.getFpMatrix().get(entry.row(), entry.column());
double sum = entry.get();
if (sum >= maximum) {
matrixMapping.addEntry(new Point(entry.row(), entry.column()));
matrixMapping.addEntry(new Point(entry.column(), entry.row()));
}
}
//infer the -> righthandside
matrixMapping.inferDenseMapValues();
return matrixMapping;
}
......
......@@ -2,6 +2,7 @@ package de.uniwue.ls6.rulelearning.instanceloading.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
......@@ -28,16 +29,13 @@ public class InstanceCreationFactory {
int rightWindowSize, String tokentypeS, TypeSystemDescription typesystem, AFeatureGenerator goldGenerator,
AFeatureGenerator... generators) throws ResourceInitializationException, SAXException, IOException {
//get the windowsize of the experiment
int windowSize = leftWindowsize + 1 + rightWindowSize;
// deserialize
CAS cas = CasCreationUtils.createCas(typesystem, null, null);
FileInputStream fis = new FileInputStream(fileToDocument);
XmiCasDeserializer.deserialize(fis, cas);
CAS cas = deserializeCAS(fileToDocument, typesystem);
Type tokentype = cas.getTypeSystem().getType(tokentypeS);
fis.close();
//
List<Instance> instances = new ArrayList<Instance>();
int windowSize = leftWindowsize + 1 + rightWindowSize;
Queue<List<String>> windowQueue = new LinkedList<List<String>>();
List<String> labelList = new ArrayList<String>();
// init leftWindowsize empty lists
......@@ -74,6 +72,15 @@ public class InstanceCreationFactory {
}
private static CAS deserializeCAS(File fileToDocument, TypeSystemDescription typesystem)
throws ResourceInitializationException, FileNotFoundException, SAXException, IOException {
CAS cas = CasCreationUtils.createCas(typesystem, null, null);
FileInputStream fis = new FileInputStream(fileToDocument);
XmiCasDeserializer.deserialize(fis, cas);
fis.close();
return cas;
}
private static Instance generateInstanceFromQueue(Queue<List<String>> windowQueue, int windowSize,
String goldfeature) {
int maxNrRows = 0;
......@@ -83,7 +90,7 @@ public class InstanceCreationFactory {
}
int idToFeature = LabelAlphabet.getIdToFeature(goldfeature);
Instance instance = new Instance(maxNrRows, windowSize, idToFeature);
Instance instance = new Instance(windowSize, maxNrRows, idToFeature);
//fill the data
int colNr =0;
for(List<String> tokenFeatures : windowQueue){
......
<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:salsa="http:///de/salsa.ecore" xmlns:type3="http:///de/uniwue/kallimachos/coref/type.ecore" xmlns:type="http:///de/uniwue/kalimachos/coref/type.ecore" xmlns:sl="http:///de/uniwue/sl.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmlns:type4="http:///org/apache/uima/ruta/type.ecore" xmlns:medIE="http:///de/uniwue/medIE.ecore" xmlns:noNamespace="http:///uima/noNamespace.ecore" xmlns:tueba="http:///de/uniwue/tueba.ecore" xmlns:kallimachos="http:///de/uniwue/mk/kallimachos.ecore" xmlns:NounPhraseDetection="http:///NounPhraseDetection.ecore" xmlns:test="http:///de/test.ecore" xmlns:type2="http:///de/uniwue/kalimachos/rutahelper/type.ecore" xmi:version="2.0"><cas:NULL xmi:id="0"/><tcas:DocumentAnnotation xmi:id="1" sofa="1504" begin="0" end="121" language="x-unspecified"/><type:Sentence xmi:id="6" sofa="1504" begin="0" end="121" ConstituencyParse="0.568291193582922"/><type:POS xmi:id="12" sofa="1504" begin="0" end="4" POSTag="NE" Lemma="Erna" CoveringSentence="6"/><type:POS xmi:id="26" sofa="1504" begin="5" end="11" POSTag="VVFIN" Lemma="kennen" CoveringSentence="6"/><type:POS xmi:id="40" sofa="1504" begin="12" end="15" POSTag="PPER" Lemma="sie" CoveringSentence="6"/><type:POS xmi:id="54" sofa="1504" begin="16" end="20" POSTag="ADV" Lemma="noch" CoveringSentence="6"/><type:POS xmi:id="68" sofa="1504" begin="21" end="26" POSTag="PTKNEG" Lemma="nicht" CoveringSentence="6"/><type:POS xmi:id="82" sofa="1504" begin="26" end="27" POSTag="$," Lemma="," CoveringSentence="6"/><type:POS xmi:id="96" sofa="1504" begin="28" end="31" POSTag="KON" Lemma="und" CoveringSentence="6"/><type:POS xmi:id="110" sofa="1504" begin="32" end="36" POSTag="PPOSAT" Lemma="sein" CoveringSentence="6"/><type:POS xmi:id="124" sofa="1504" begin="37" end="46" POSTag="NN" Lemma="Vorschlag" CoveringSentence="6"/><type:POS xmi:id="138" sofa="1504" begin="46" end="47" POSTag="$," Lemma="," CoveringSentence="6"/><type:POS xmi:id="152" sofa="1504" begin="48" end="51" POSTag="PPER" Lemma="sie" CoveringSentence="6"/><type:POS xmi:id="166" sofa="1504" begin="52" end="55" POSTag="PPER" Lemma="ihr" CoveringSentence="6"/><type:POS xmi:id="180" sofa="1504" begin="56" end="58" POSTag="PTKZU" Lemma="zu" CoveringSentence="6"/><type:POS xmi:id="194" sofa="1504" begin="59" end="66" POSTag="VVINF" Lemma="bringen" CoveringSentence="6"/><type:POS xmi:id="208" sofa="1504" begin="67" end="70" POSTag="KON" Lemma="und" CoveringSentence="6"/><type:POS xmi:id="222" sofa="1504" begin="71" end="81" POSTag="VVIZU" Lemma="vorlesen" CoveringSentence="6"/><type:POS xmi:id="236" sofa="1504" begin="81" end="82" POSTag="$," Lemma="," CoveringSentence="6"/><type:POS xmi:id="250" sofa="1504" begin="83" end="88" POSTag="VAFIN" Lemma="werden" CoveringSentence="6"/><type:POS xmi:id="264" sofa="1504" begin="89" end="93" POSTag="APPR" Lemma="ohne" CoveringSentence="6"/><type:POS xmi:id="278" sofa="1504" begin="94" end="101" POSTag="NN" Lemma="Weigern" CoveringSentence="6"/><type:POS xmi:id="292" sofa="1504" begin="102" end="105" POSTag="APPR" Lemma="von" CoveringSentence="6"/><type:POS xmi:id="306" sofa="1504" begin="106" end="109" POSTag="PPER" Lemma="ihr" CoveringSentence="6"/><type:POS xmi:id="320" sofa="1504" begin="110" end="120" POSTag="VVPP" Lemma="annehmen" CoveringSentence="6"/><type:POS xmi:id="334" sofa="1504" begin="120" end="121" POSTag="$." Lemma="." CoveringSentence="6"/><type:RFTagType xmi:id="348" sofa="1504" begin="0" end="4" Tag="N.Name.Nom.Sg.Fem"/><type:RFTagType xmi:id="353" sofa="1504" begin="5" end="11" Tag="VFIN.Full.3.Sg.Past.Ind"/><type:RFTagType xmi:id="358" sofa="1504" begin="12" end="15" Tag="PRO.Pers.Subst.3.Acc.Sg.Fem"/><type:RFTagType xmi:id="363" sofa="1504" begin="16" end="20" Tag="ADV"/><type:RFTagType xmi:id="368" sofa="1504" begin="21" end="26" Tag="PART.Neg"/><type:RFTagType xmi:id="373" sofa="1504" begin="26" end="27" Tag="SYM.Pun.Comma"/><type:RFTagType xmi:id="378" sofa="1504" begin="28" end="31" Tag="CONJ.Coord.-"/><type:RFTagType xmi:id="383" sofa="1504" begin="32" end="36" Tag="PRO.Poss.Attr.-.Nom.Sg.Masc"/><type:RFTagType xmi:id="388" sofa="1504" begin="37" end="46" Tag="N.Reg.Nom.Sg.Masc"/><type:RFTagType xmi:id="393" sofa="1504" begin="46" end="47" Tag="SYM.Pun.Comma"/><type:RFTagType xmi:id="398" sofa="1504" begin="48" end="51" Tag="PRO.Pers.Subst.3.Acc.Pl.*"/><type:RFTagType xmi:id="403" sofa="1504" begin="52" end="55" Tag="PRO.Pers.Subst.3.Dat.Sg.Fem"/><type:RFTagType xmi:id="408" sofa="1504" begin="56" end="58" Tag="PART.Zu"/><type:RFTagType xmi:id="413" sofa="1504" begin="59" end="66" Tag="VINF.Full.-"/><type:RFTagType xmi:id="418" sofa="1504" begin="67" end="70" Tag="CONJ.Coord.-"/><type:RFTagType xmi:id="423" sofa="1504" begin="71" end="81" Tag="VINF.Full.zu"/><type:RFTagType xmi:id="428" sofa="1504" begin="81" end="82" Tag="SYM.Pun.Comma"/><type:RFTagType xmi:id="433" sofa="1504" begin="83" end="88" Tag="VFIN.Aux.3.Sg.Past.Ind"/><type:RFTagType xmi:id="438" sofa="1504" begin="89" end="93" Tag="APPR.Acc"/><type:RFTagType xmi:id="443" sofa="1504" begin="94" end="101" Tag="N.Reg.Acc.Sg.Neut"/><type:RFTagType xmi:id="448" sofa="1504" begin="102" end="105" Tag="APPR.Dat"/><type:RFTagType xmi:id="453" sofa="1504" begin="106" end="109" Tag="PRO.Pers.Subst.3.Dat.Sg.Fem"/><type:RFTagType xmi:id="458" sofa="1504" begin="110" end="120" Tag="VPP.Full.Psp"/><type:RFTagType xmi:id="463" sofa="1504" begin="120" end="121" Tag="SYM.Pun.Sent"/><type:Morphology xmi:id="468" sofa="1504" begin="0" end="4" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="477" sofa="1504" begin="5" end="11" Gender="" Kasus="" Number="sg" Komparation="" Person="3"/><type:Morphology xmi:id="486" sofa="1504" begin="12" end="15" Gender="fem" Kasus="nom" Number="sg" Komparation="" Person="3"/><type:Morphology xmi:id="495" sofa="1504" begin="16" end="20" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="504" sofa="1504" begin="21" end="26" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="513" sofa="1504" begin="26" end="27" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="522" sofa="1504" begin="28" end="31" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="531" sofa="1504" begin="32" end="36" Gender="masc" Kasus="nom" Number="sg" Komparation="" Person=""/><type:Morphology xmi:id="540" sofa="1504" begin="37" end="46" Gender="masc" Kasus="nom" Number="sg" Komparation="" Person=""/><type:Morphology xmi:id="549" sofa="1504" begin="46" end="47" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="558" sofa="1504" begin="48" end="51" Gender="fem" Kasus="nom" Number="sg" Komparation="" Person="3"/><type:Morphology xmi:id="567" sofa="1504" begin="52" end="55" Gender="neut" Kasus="acc" Number="sg" Komparation="" Person=""/><type:Morphology xmi:id="576" sofa="1504" begin="56" end="58" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="585" sofa="1504" begin="59" end="66" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="594" sofa="1504" begin="67" end="70" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="603" sofa="1504" begin="71" end="81" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="612" sofa="1504" begin="81" end="82" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="621" sofa="1504" begin="83" end="88" Gender="" Kasus="" Number="sg" Komparation="" Person="3"/><type:Morphology xmi:id="630" sofa="1504" begin="89" end="93" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="639" sofa="1504" begin="94" end="101" Gender="fem" Kasus="acc" Number="pl" Komparation="" Person=""/><type:Morphology xmi:id="648" sofa="1504" begin="102" end="105" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="657" sofa="1504" begin="106" end="109" Gender="fem" Kasus="dat" Number="sg" Komparation="" Person="3"/><type:Morphology xmi:id="666" sofa="1504" begin="110" end="120" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:Morphology xmi:id="675" sofa="1504" begin="120" end="121" Gender="" Kasus="" Number="" Komparation="" Person=""/><type:SelfMorph xmi:id="684" sofa="1504" begin="0" end="4" Numerus="Sg" Gender="Neut"/><type:SelfMorph xmi:id="691" sofa="1504" begin="5" end="11" Numerus="Sg" Gender="--"/><type:SelfMorph xmi:id="698" sofa="1504" begin="12" end="15" Numerus="Sg" Gender="Fem"/><type:SelfMorph xmi:id="705" sofa="1504" begin="16" end="20" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="712" sofa="1504" begin="21" end="26" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="719" sofa="1504" begin="26" end="27" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="726" sofa="1504" begin="28" end="31" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="733" sofa="1504" begin="32" end="36" Numerus="Sg" Gender="Masc"/><type:SelfMorph xmi:id="740" sofa="1504" begin="37" end="46" Numerus="Sg" Gender="Masc"/><type:SelfMorph xmi:id="747" sofa="1504" begin="46" end="47" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="754" sofa="1504" begin="48" end="51" Numerus="Sg" Gender="Fem"/><type:SelfMorph xmi:id="761" sofa="1504" begin="52" end="55" Numerus="--" Gender="Fem"/><type:SelfMorph xmi:id="768" sofa="1504" begin="56" end="58" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="775" sofa="1504" begin="59" end="66" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="782" sofa="1504" begin="67" end="70" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="789" sofa="1504" begin="71" end="81" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="796" sofa="1504" begin="81" end="82" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="803" sofa="1504" begin="83" end="88" Numerus="Sg" Gender="--"/><type:SelfMorph xmi:id="810" sofa="1504" begin="89" end="93" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="817" sofa="1504" begin="94" end="101" Numerus="Pl" Gender="Masc"/><type:SelfMorph xmi:id="824" sofa="1504" begin="102" end="105" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="831" sofa="1504" begin="106" end="109" Numerus="Sg" Gender="Fem"/><type:SelfMorph xmi:id="838" sofa="1504" begin="110" end="120" Numerus="--" Gender="--"/><type:SelfMorph xmi:id="845" sofa="1504" begin="120" end="121" Numerus="--" Gender="--"/><type:Chunk xmi:id="852" sofa="1504" begin="0" end="4" ChunkType="NC"/><type:Chunk xmi:id="857" sofa="1504" begin="5" end="11" ChunkType="VC"/><type:Chunk xmi:id="862" sofa="1504" begin="12" end="15" ChunkType="NC"/><type:Chunk xmi:id="867" sofa="1504" begin="32" end="46" ChunkType="NC"/><type:Chunk xmi:id="872" sofa="1504" begin="48" end="51" ChunkType="NC"/><type:Chunk xmi:id="877" sofa="1504" begin="52" end="55" ChunkType="NC"/><type:Chunk xmi:id="882" sofa="1504" begin="56" end="66" ChunkType="VC"/><type:Chunk xmi:id="887" sofa="1504" begin="71" end="81" ChunkType="VC"/><type:Chunk xmi:id="892" sofa="1504" begin="83" end="88" ChunkType="VC"/><type:Chunk xmi:id="897" sofa="1504" begin="89" end="101" ChunkType="PC"/><type:Chunk xmi:id="902" sofa="1504" begin="102" end="109" ChunkType="PC"/><type:Chunk xmi:id="907" sofa="1504" begin="110" end="120" ChunkType="VC"/><type:DependencyParse xmi:id="912" sofa="1504" begin="0" end="4" Headname="kannte" WordNumber="2" DependencyRelation="MO"/><type:DependencyParse xmi:id="920" sofa="1504" begin="5" end="11" Headname="ROOT" WordNumber="0" DependencyRelation="--"/><type:DependencyParse xmi:id="928" sofa="1504" begin="12" end="15" Headname="kannte" WordNumber="2" DependencyRelation="OA"/><type:DependencyParse xmi:id="936" sofa="1504" begin="16" end="20" Headname="nicht" WordNumber="5" DependencyRelation="MO"/><type:DependencyParse xmi:id="944" sofa="1504" begin="21" end="26" Headname="kannte" WordNumber="2" DependencyRelation="NG"/><type:DependencyParse xmi:id="952" sofa="1504" begin="26" end="27" Headname="nicht" WordNumber="5" DependencyRelation="--"/><type:DependencyParse xmi:id="960" sofa="1504" begin="28" end="31" Headname="kannte" WordNumber="2" DependencyRelation="CD"/><type:DependencyParse xmi:id="968" sofa="1504" begin="32" end="36" Headname="Vorschlag" WordNumber="9" DependencyRelation="NK"/><type:DependencyParse xmi:id="976" sofa="1504" begin="37" end="46" Headname="wurde" WordNumber="18" DependencyRelation="SB"/><type:DependencyParse xmi:id="984" sofa="1504" begin="46" end="47" Headname="Vorschlag" WordNumber="9" DependencyRelation="--"/><type:DependencyParse xmi:id="992" sofa="1504" begin="48" end="51" Headname="bringen" WordNumber="14" DependencyRelation="OA"/><type:DependencyParse xmi:id="1000" sofa="1504" begin="52" end="55" Headname="vorzulesen" WordNumber="16" DependencyRelation="DA"/><type:DependencyParse xmi:id="1008" sofa="1504" begin="56" end="58" Headname="bringen" WordNumber="14" DependencyRelation="PM"/><type:DependencyParse xmi:id="1016" sofa="1504" begin="59" end="66" Headname="Vorschlag" WordNumber="9" DependencyRelation="OC"/><type:DependencyParse xmi:id="1024" sofa="1504" begin="67" end="70" Headname="bringen" WordNumber="14" DependencyRelation="CD"/><type:DependencyParse xmi:id="1032" sofa="1504" begin="71" end="81" Headname="und" WordNumber="15" DependencyRelation="CJ"/><type:DependencyParse xmi:id="1040" sofa="1504" begin="81" end="82" Headname="vorzulesen" WordNumber="16" DependencyRelation="--"/><type:DependencyParse xmi:id="1048" sofa="1504" begin="83" end="88" Headname="und" WordNumber="7" DependencyRelation="CJ"/><type:DependencyParse xmi:id="1056" sofa="1504" begin="89" end="93" Headname="angenommen" WordNumber="23" DependencyRelation="MO"/><type:DependencyParse xmi:id="1064" sofa="1504" begin="94" end="101" Headname="ohne" WordNumber="19" DependencyRelation="NK"/><type:DependencyParse xmi:id="1072" sofa="1504" begin="102" end="105" Headname="Weigern" WordNumber="20" DependencyRelation="PG"/><type:DependencyParse xmi:id="1080" sofa="1504" begin="106" end="109" Headname="von" WordNumber="21" DependencyRelation="NK"/><type:DependencyParse xmi:id="1088" sofa="1504" begin="110" end="120" Headname="wurde" WordNumber="18" DependencyRelation="OC"/><type:DependencyParse xmi:id="1096" sofa="1504" begin="120" end="121" Headname="angenommen" WordNumber="23" DependencyRelation="--"/><type:NamedEntity xmi:id="1104" sofa="1504" begin="0" end="4" NEType="CORE" Construction="VORNAME|"/><type:NamedEntity xmi:id="1120" sofa="1504" begin="12" end="15" NEType="PRON" Construction="PRONOUN"/><type:NamedEntity xmi:id="1136" sofa="1504" begin="32" end="36" NEType="PRON" Construction="PRONOUN"/><type:NamedEntity xmi:id="1152" sofa="1504" begin="48" end="51" NEType="PRON" Construction="PRONOUN"/><type:NamedEntity xmi:id="1168" sofa="1504" begin="52" end="55" NEType="PRON" Construction="PRONOUN"/><type:NamedEntity xmi:id="1184" sofa="1504" begin="106" end="109" NEType="PRON" Construction="PRONOUN"/><type:StanfordParse xmi:id="1200" sofa="1504" begin="0" end="121" PhraseType="ROOT"/><type:StanfordParse xmi:id="1208" sofa="1504" begin="0" end="121" PhraseType="S" Parent="1200"/><type:StanfordParse xmi:id="1216" sofa="1504" begin="0" end="4" PhraseType="NP" Parent="1208"/><type:StanfordParse xmi:id="1224" sofa="1504" begin="0" end="4" PhraseType="NE" Parent="1216"/><type:StanfordParse xmi:id="1232" sofa="1504" begin="5" end="11" PhraseType="VVFIN" Parent="1208"/><type:StanfordParse xmi:id="1240" sofa="1504" begin="12" end="15" PhraseType="PPER"/><type:StanfordParse xmi:id="1248" sofa="1504" begin="16" end="20" PhraseType="ADV" Parent="1208"/><type:StanfordParse xmi:id="1256" sofa="1504" begin="21" end="26" PhraseType="PTKNEG" Parent="1208"/><type:StanfordParse xmi:id="1264" sofa="1504" begin="26" end="27" PhraseType="$,"/><type:StanfordParse xmi:id="1272" sofa="1504" begin="28" end="120" PhraseType="S" Parent="1208"/><type:StanfordParse xmi:id="1280" sofa="1504" begin="28" end="31" PhraseType="KON" Parent="1272"/><type:StanfordParse xmi:id="1288" sofa="1504" begin="32" end="46" PhraseType="NP" Parent="1272"/><type:StanfordParse xmi:id="1296" sofa="1504" begin="32" end="36" PhraseType="PPOSAT" Parent="1288"/><type:StanfordParse xmi:id="1304" sofa="1504" begin="37" end="46" PhraseType="NN" Parent="1288"/><type:StanfordParse xmi:id="1312" sofa="1504" begin="46" end="47" PhraseType="$," Parent="1272"/><type:StanfordParse xmi:id="1320" sofa="1504" begin="48" end="81" PhraseType="CS" Parent="1272"/><type:StanfordParse xmi:id="1328" sofa="1504" begin="48" end="66" PhraseType="S" Parent="1320"/><type:StanfordParse xmi:id="1336" sofa="1504" begin="48" end="51" PhraseType="NP" Parent="1328"/><type:StanfordParse xmi:id="1344" sofa="1504" begin="48" end="51" PhraseType="PPER" Parent="1336"/><type:StanfordParse xmi:id="1352" sofa="1504" begin="52" end="66" PhraseType="VP" Parent="1328"/><type:StanfordParse xmi:id="1360" sofa="1504" begin="52" end="55" PhraseType="NP" Parent="1352"/><type:StanfordParse xmi:id="1368" sofa="1504" begin="52" end="55" PhraseType="PPER" Parent="1464"/><type:StanfordParse xmi:id="1376" sofa="1504" begin="56" end="66" PhraseType="VZ" Parent="1352"/><type:StanfordParse xmi:id="1384" sofa="1504" begin="56" end="58" PhraseType="PTKZU" Parent="1376"/><type:StanfordParse xmi:id="1392" sofa="1504" begin="59" end="66" PhraseType="VVINF" Parent="1376"/><type:StanfordParse xmi:id="1400" sofa="1504" begin="67" end="70" PhraseType="KON" Parent="1320"/><type:StanfordParse xmi:id="1408" sofa="1504" begin="71" end="81" PhraseType="VVINF" Parent="1320"/><type:StanfordParse xmi:id="1416" sofa="1504" begin="81" end="82" PhraseType="$," Parent="1272"/><type:StanfordParse xmi:id="1424" sofa="1504" begin="83" end="88" PhraseType="VAFIN" Parent="1272"/><type:StanfordParse xmi:id="1432" sofa="1504" begin="89" end="120" PhraseType="VP" Parent="1272"/><type:StanfordParse xmi:id="1440" sofa="1504" begin="89" end="101" PhraseType="PP" Parent="1432"/><type:StanfordParse xmi:id="1448" sofa="1504" begin="89" end="93" PhraseType="APPR" Parent="1440"/><type:StanfordParse xmi:id="1456" sofa="1504" begin="94" end="101" PhraseType="NN" Parent="1440"/><type:StanfordParse xmi:id="1464" sofa="1504" begin="102" end="109" PhraseType="PP" Parent="1432"/><type:StanfordParse xmi:id="1472" sofa="1504" begin="102" end="105" PhraseType="APPR" Parent="1464"/><type:StanfordParse xmi:id="1480" sofa="1504" begin="106" end="109" PhraseType="PPER" Parent="1464"/><type:StanfordParse xmi:id="1488" sofa="1504" begin="110" end="120" PhraseType="VVPP" Parent="1432"/><type:StanfordParse xmi:id="1496" sofa="1504" begin="120" end="121" PhraseType="$." Parent="1208"/><cas:Sofa xmi:id="1504" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Erna kannte sie noch nicht, und sein Vorschlag, sie ihr zu bringen und vorzulesen, wurde ohne Weigern von ihr angenommen."/><cas:View sofa="1504" members="1 6 12 26 40 54 68 82 96 110 124 138 152 166 180 194 208 222 236 250 264 278 292 306 320 334 348 353 358 363 368 373 378 383 388 393 398 403 408 413 418 423 428 433 438 443 448 453 458 463 468 477 486 495 504 513 522 531 540 549 558 567 576 585 594 603 612 621 630 639 648 657 666 675 684 691 698 705 712 719 726 733 740 747 754 761 768 775 782 789 796 803 810 817 824 831 838 845 852 857 862 867 872 877 882 887 892 897 902 907 912 920 928 936 944 952 960 968 976 984 992 1000 1008 1016 1024 1032 1040 1048 1056 1064 1072 1080 1088 1096 1104 1120 1136 1152 1168 1184 1200 1208 1216 1224 1232 1240 1248 1256 1264 1272 1280 1288 1296 1304 1312 1320 1328 1336 1344 1352 1360 1368 1376 1384 1392 1400 1408 1416 1424 1432 1440 1448 1456 1464 1472 1480 1488 1496"/></xmi:XMI>
\ No newline at end of file
......@@ -39,14 +39,18 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
Set<Instance> instancesForPass = new HashSet<Instance>(Arrays.asList(instances));
int passIndex = 0;
int currentGoldIndex = goldLabel;
// TODO include a logger
System.out.println("Train binary classifier: " + "Goldlabel: " + LabelAlphabet.getFeatureToId(goldLabel)
+ "\tvs\t" + LabelAlphabet.getFeatureToId(otherLabel));
while (morePasses(instancesForPass, currentGoldIndex)) {
// update the learning objective
if (passIndex % 2 == 0) {
currentGoldIndex = goldLabel;
} else {
currentGoldIndex = otherLabel;
}
System.out.println("Perform pass for Label: " + LabelAlphabet.getFeatureToId(currentGoldIndex));
System.out.println("Remaining instances for pass: " + instancesForPass.size());
// create a new pass
learnRulePass(currentGoldIndex, instancesForPass);
......@@ -72,18 +76,22 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
Set<Instance> temporaryCopy = new HashSet<Instance>(instancesForPass);
RulePass pass = new RulePass();
while (true) {
RepresentationRule learnedRule = learnRule(goldIndex, temporaryCopy);
if (learnedRule != null) {
pass.addRule(learnedRule);
System.out.println("Learned rule: " + learnedRule);
// modify instances so that already classified instances are
// removed
// from the trainingdata
temporaryCopy = removeAlreadyClassifiable(temporaryCopy, pass);
RepresentationRule learnedRule = learnRule(goldIndex, temporaryCopy);
if (learnedRule != null) {
pass.addRule(learnedRule);
// modify instances so that already classified instances are removed
// from the trainingdata
temporaryCopy = removeAlreadyClassifiable(temporaryCopy, pass);
} else {
if (!pass.getRuleset().isEmpty()) {
passes.add(pass);
} else {
if (!pass.getRuleset().isEmpty()) {
passes.add(pass);
}
return;
}
return;
}
}
......@@ -109,23 +117,38 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
private RepresentationRule learnRule(int goldLabel, Collection<Instance> instances) {
List<MatrixMapping> mappings = new ArrayList<MatrixMapping>();
// perform the training
int windowSize = instances.iterator().next().getFeatureArray()[0].length;
MatrixMcMatrixFace matrixInFocus = new MatrixMcMatrixFace(LabelAlphabet.getSize(),
windowSize, goldLabel);
int windowSize = instances.iterator().next().getFeatureArray().length;
MatrixMcMatrixFace bestMatrix = new MatrixMcMatrixFace(LabelAlphabet.getSize(), windowSize, goldLabel);
// add all instances to the initial matrix
bestMatrix.addInstance(instances.toArray(new Instance[0]));
int maximumScore = 0;
while (betterRuleCanBeLearned(maximumScore, matrixInFocus)) {
maximumScore = matrixInFocus.getMaximumScore();
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(matrixInFocus, maximumScore);
Point maxEntryLocation = bestMatrix.getLocationOfMaximum();
while (true) {
maximumScore = bestMatrix.getMaximumScore();
maxEntryLocation = bestMatrix.getLocationOfMaximum();
// map matrix to dense matrix
MatrixMapping mappingForMaximum = MatrixUtil.getMappingForMaximum(bestMatrix, maximumScore);
mappings.add(mappingForMaximum);
matrixInFocus = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
// expand in kronecker fashion
bestMatrix = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
if (!betterRuleCanBeLearned(maximumScore, bestMatrix)) {
mappings.remove(mappingForMaximum);
break;
}
}
// determine the best rule that has not been used before! TODO is this even necessary???
Point maxEntryLocation = matrixInFocus.getLocationOfMaximum();
// determine the best rule that has not been used before! TODO is this
// even necessary???
if (maxEntryLocation == null)
return null;
List<Point> featuresAtMax = MatrixUtil.determineFeaturesForIndex(maxEntryLocation, mappings);
return new RepresentationRule(windowSize, featuresAtMax, goldLabel);
return new RepresentationRule(windowSize, featuresAtMax, goldLabel,maximumScore);
}
private boolean betterRuleCanBeLearned(int maximumScore, MatrixMcMatrixFace matrixInFocus) {
......
......@@ -20,8 +20,9 @@ public class FirstTest {
public static void main(String[] args) throws Exception {
File document = new File("C:\\Users\\mkrug\\annoTest\\TestProject\\input\\Aston,-Louise__Lydia.xmi.xmi.xmi");
File typesystem = new File("C:\\Users\\mkrug\\annoTest\\TestProject\\descriptor\\MiKalliTypesystem.xml");
File document = new File("resources\\Aston,-Louise__Lydia.xmi.xmi.xmi");
File doc2 = new File("resources\\Ahlefeld,-Charlotte-von_Erna1421[Lukas].xmi.xmi");
File typesystem = new File("resources\\MiKalliTypesystem.xml");
BinaryRepresentationRuleLearningAlgorithm algorithm = new BinaryRepresentationRuleLearningAlgorithm(
LabelAlphabet.getIdToFeature(IsUppercaseFeatureGenerator.LOWERCASE),
......@@ -31,6 +32,7 @@ public class FirstTest {
List<Instance> instances = InstanceCreationFactory.createWindowedInstancesFromUIMA(document, 2, 2, "de.uniwue.kalimachos.coref.type.POS", tsd, new IsUppercaseFeatureGenerator(), new WordFeaturegenerator());
System.out.println(instances.size());
algorithm.learn(instances.toArray(new Instance[0]));
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment