Commit e1f3f634 authored by Markus Krug's avatar Markus Krug
Browse files

updated the algo

parent 727133ce
......@@ -9,40 +9,39 @@ import de.uniwue.mk.kall.coref.datastructures.Mention;
import de.uniwue.mk.kall.coref.datastructures.MentionPair;
import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
public class CorefRestartAlgo {
public static List<Mention> resolve(Mention act, List<MentionPair> alreadyResolved, CAS cas,
Util_impl util, List<Mention> mentions) throws Exception {
List<Mention> list = new ArrayList<Mention>();
Mention resolved = null;
if (act.isPronoun()) {
resolved = CorefRestartPronounAlgo.resolve(act, alreadyResolved, cas, util, mentions);
public static List<Mention> resolve(Mention act, List<MentionPair> alreadyResolved, CAS cas, Util_impl util,
List<Mention> mentions) throws Exception {
}
List<Mention> list = new ArrayList<Mention>();
Mention resolved = null;
if (act.isPronoun()) {
else {
resolved = CorefRestartNounsAlgo.resolve(act, alreadyResolved, cas, util, mentions);
}
resolved = CorefRestartPronounAlgo.resolve(act, alreadyResolved, cas, util, mentions);
if (resolved != null) {
list.add(resolved);
}
}
else {
resolved = CorefRestartNounsAlgo.resolve(act, alreadyResolved, cas, util, mentions);
}
if (resolved != null) {
list.add(resolved);
}
// resolve more with NN in sentence lexico syntactic constraints TODO those make mist !!
// List<Mention> resolve = NNInSentenceResolver.resolve(act.getMarkable(), cas, util,
// alreadyResolved, mentions);
//
// for (Mention l : resolve) {
// list.add(l);
// }
// resolve more with NN in sentence lexico syntactic constraints TODO
// those make mist !!
// List<Mention> resolve =
// NNInSentenceResolver.resolve(act.getMarkable(), cas, util,
// alreadyResolved, mentions);
//
// for (Mention l : resolve) {
// list.add(l);
// }
return list;
return list;
}
}
}
......@@ -2,7 +2,6 @@ package de.uniwue.mk.kall.coref5.app;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
......@@ -12,7 +11,6 @@ import java.util.Set;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.cas.text.AnnotationFS;
import de.uniwue.mk.kall.coref.datastructures.ClusterCreator;
......@@ -57,7 +55,7 @@ public class MainAppAlgo5 {
public static void main(String[] args) throws Exception {
File in = new File(
"C:\\Users\\mkrug\\owncloud_neu\\projekt romangeschichte\\named entity recognition\\Komplettannotationen");
"C:\\Users\\mkrug\\owncloud_neu\\kallimachos\\named entity recognition\\Goldstandard-Rescaled\\output+speech");
// File in = new File("C:\\owncloud\\outputRescaled");
// File in = new File(
......@@ -103,6 +101,8 @@ public class MainAppAlgo5 {
totalNesThatAreResolved = 0;
for (File f : in.listFiles()) {
if (!f.getName().endsWith(".xmi"))
continue;
System.out.println(f);
XmiCasDeserializer.deserialize(new FileInputStream(f), cas);
......@@ -205,7 +205,8 @@ public class MainAppAlgo5 {
// evaluate vertical resolution
annotate(systemCluster, cas, util);
XmiCasSerializer.serialize(cas, new FileOutputStream(out.getAbsolutePath() + "\\" + f.getName()));
// XmiCasSerializer.serialize(cas, new
// FileOutputStream(out.getAbsolutePath() + "\\" + f.getName()));
}
......@@ -536,6 +537,10 @@ public class MainAppAlgo5 {
for (Mention m : mc.getClusterList()) {
// the id has to be separate
m.getMarkable().setFeatureValueFromString(util.getNEFeatureSystemId(), mc.getId());
// also paste to gold if that one is null
if (m.getMarkable().getFeatureValueAsString(util.getNEId()) == null) {
m.getMarkable().setFeatureValueFromString(util.getNEId(), mc.getId());
}
if (m.getResolvedMention() != null) {
m.getMarkable().setFeatureValue(util.getNEFeatureResolvedReference(),
......
......@@ -11,77 +11,78 @@ import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
public class SichReflexivePronounResolver {
// this rule resolves "sich"
// this rule resolves "sich"
public static AnnotationFS resolve(AnnotationFS ne, CAS cas, Util_impl util,
List<MentionPair> resolvedPairs) {
public static AnnotationFS resolve(AnnotationFS ne, CAS cas, Util_impl util, List<MentionPair> resolvedPairs) {
AnnotationFS resolved = null;
AnnotationFS resolved = null;
// if the next token is a NE we use this !!
// if the next token is a NE we use this !!
AnnotationFS nextTok = CorefRuleUtil.getNextToken(ne, cas, util);
AnnotationFS prevTok = CorefRuleUtil.getPreviousToken(ne, cas, util);
AnnotationFS nextTok = CorefRuleUtil.getNextToken(ne, cas, util);
AnnotationFS prevTok = CorefRuleUtil.getPreviousToken(ne, cas, util);
if ((nextTok != null && util.isNamedEntity(nextTok))) {
if ((nextTok != null && util.isNamedEntity(nextTok))) {
if (prevTok != null && util.isNamedEntity(prevTok)) {
return util.getPreviousAnnotation(ne);
}
if (prevTok != null && util.isNamedEntity(prevTok)) {
return util.getPreviousAnnotation(ne);
}
return util.getNextAnnotation(ne);
}
return util.getNextAnnotation(ne);
}
List<AnnotationFS> allPreviousInSentence = CorefRuleUtil
.getAllPreviousInSentence(ne, cas, util);
List<AnnotationFS> allPreviousInSentence = CorefRuleUtil.getAllPreviousInSentence(ne, cas, util);
for (AnnotationFS prev : allPreviousInSentence) {
for (AnnotationFS prev : allPreviousInSentence) {
AnnotationFS covChunk = util.getCoveringChunk(prev);
AnnotationFS covChunk = util.getCoveringChunk(prev);
String chunkType = "";
if (covChunk != null) {
String chunkType = "";
if (covChunk != null) {
chunkType = covChunk.getFeatureValueAsString(util.getChunkFeature());
}
if (util.isPronoun(prev)) {
chunkType = covChunk.getFeatureValueAsString(util.getChunkFeature());
}
if (util.isPronoun(prev)) {
if (prev.getCoveredText().matches("mich|mir|mein.*|ich|uns|wir"))
continue;
// should not be followed by a prep !!
if (prev.getCoveredText().matches("mich|mir|mein.*|ich|uns|wir"))
continue;
// should not be followed by a prep !!
AnnotationFS previousToken = CorefRuleUtil.getPreviousToken(prev, cas, util);
AnnotationFS previousToken = CorefRuleUtil.getPreviousToken(prev, cas, util);
if (previousToken != null && util.getPosTag(previousToken).matches("(APP.*)"))
continue;
// TODO this is not working properly!!
String posTag = util.getPosTag(previousToken);
if (previousToken != null && posTag != null && posTag.matches("(APP.*)"))
continue;
if (chunkType.contains("PC"))
continue;
if (chunkType.contains("PC"))
continue;
return prev;
}
return prev;
}
else {
else {
if (chunkType.contains("PC"))
continue;
if (chunkType.contains("PC"))
continue;
// vorherige token von prev ist genitiv => skip this is the same as with pc but ...
// vorherige token von prev ist genitiv => skip this is the same
// as with pc but ...
AnnotationFS prevTokPrev = CorefRuleUtil.getPreviousToken(prev, cas, util);
AnnotationFS prevTokPrev = CorefRuleUtil.getPreviousToken(prev, cas, util);
if (prevTokPrev != null
&& util.getCovered(prevTokPrev, util.getRFType()).get(0)
.getFeatureValueAsString(util.getRFTagFeature()).contains("Gen"))
continue;
if (prevTokPrev != null
&& util.getCovered(prevTokPrev, util.getRFType()).get(0)
.getFeatureValueAsString(util.getRFTagFeature()).contains("Gen"))
continue;
//
//
return prev;
}
}
return prev;
}
}
return resolved;
}
return resolved;
}
}
......@@ -9,61 +9,57 @@ import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
public class SameHeadAlsEin extends ANounCoreferenceRule {
// NE Verbs als ein lustiger NE
// NE Verbs als ein lustiger NE
@Override
public boolean apply(AnnotationFS act, AnnotationFS cand, CAS cas, Util_impl util) {
@Override
public boolean apply(AnnotationFS act, AnnotationFS cand, CAS cas, Util_impl util) {
// cand may not be a reflexiv pronoun !!
// cand may not be a reflexiv pronoun !!
List<AnnotationFS> covered = util.getCovered(cand, util.getPOSType());
List<AnnotationFS> covered = util.getCovered(cand, util.getPOSType());
if (covered.size() > 0
&& covered.get(0).getFeatureValueAsString(util.getPOSTagFeature()).equals("PRF"))
return false;
if (covered.isEmpty())
return false;
String posTag = covered.get(0).getFeatureValueAsString(util.getPOSTagFeature());
if (covered.size() > 0 && posTag != null && posTag.equals("PRF"))
return false;
if (cand.getCoveredText().matches("mir"))
return false;
if (cand.getCoveredText().matches("mir"))
return false;
AnnotationFS father = util.getFather(act);
AnnotationFS father = util.getFather(act);
AnnotationFS father2 = util.getFather(cand);
if (father == null || father2 == null)
return false;
AnnotationFS father2 = util.getFather(cand);
if (father == null || father2 == null)
return false;
if (father.getBegin() == father2.getBegin()) {
// System.out.println(father.getCoveredText());
if (father.getBegin() == father2.getBegin()) {
// System.out.println(father.getCoveredText());
if (!util.isVerb(father))
return false;
if (!util.isVerb(father))
return false;
// get Substring between the annos
// get Substring between the annos
String substring = cas.getDocumentText().substring(cand.getEnd(), act.getBegin());
String substring = cas.getDocumentText().substring(cand.getEnd(), act.getBegin());
// must contain als ein and there may nor be an intermediate NE !!
if (substring.contains("als ein") || substring.contains("wie ein")) {
//must contain als ein and there may nor be an intermediate NE !!
if (substring.contains("als ein") || substring.contains("wie ein")) {
for(AnnotationFS neCov : util.getCovered(util.getCoveringSentence(act), util.getNamedEntityType())){
if (neCov.getBegin() > cand.getBegin() && neCov.getEnd() < act.getBegin())
return false;
}
for (AnnotationFS neCov : util.getCovered(util.getCoveringSentence(act), util.getNamedEntityType())) {
if (substring.contains("noch") || substring.contains("nicht")
|| substring.contains("weder")
|| substring.contains(",")
|| substring.contains("gerade"))
return false;
if (neCov.getBegin() > cand.getBegin() && neCov.getEnd() < act.getBegin())
return false;
}
return true;
}
}
if (substring.contains("noch") || substring.contains("nicht") || substring.contains("weder")
|| substring.contains(",") || substring.contains("gerade"))
return false;
return false;
}
return true;
}
}
return false;
}
}
package de.uniwue.mk.kall.mainApp;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.impl.XmiCasSerializer;
import de.uniwue.mk.kall.coU.ds.ADirectSpeechResolvingAlgorithm;
import de.uniwue.mk.kall.coU.ds.ASpeakerDetectionRule;
import de.uniwue.mk.kall.coU.ds.DsPropagationResolvingRule;
import de.uniwue.mk.kall.coU.ds.ExplicitlySpeakerResolvingRule;
import de.uniwue.mk.kall.mentioncreator.MorphologyHandler;
import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
public class MainAnnotateDS {
static File in = new File("C:\\Users\\mkrug\\annoTest\\corefOut\\output");
static File out = new File("C:\\Users\\mkrug\\annoTest\\corefOut\\outputAuto");
private static File resources = new File("resources");
public static void main(String[] args) throws Exception {
CAS cas = Util_impl.createCas();
MorphologyHandler handler = new MorphologyHandler(resources);
for (File f : in.listFiles()) {
XmiCasDeserializer.deserialize(new FileInputStream(f), cas);
Util_impl util = new Util_impl(cas);
speakerDetection(cas, f, util, handler);
XmiCasSerializer.serialize(cas, new FileOutputStream(out.getAbsolutePath() + "/" + f.getName()));
}
}
private static void speakerDetection(CAS cas, File f, Util_impl util, MorphologyHandler handler) {
ASpeakerDetectionRule explicitRule = new ExplicitlySpeakerResolvingRule(cas);
ASpeakerDetectionRule propagationRule = new DsPropagationResolvingRule(cas);
ADirectSpeechResolvingAlgorithm dsAlgo = new ADirectSpeechResolvingAlgorithm(resources, explicitRule,
propagationRule);
dsAlgo.execute(cas, util, f.getName(), handler);
}
}
......@@ -2,6 +2,7 @@ package de.uniwue.mk.kall.mainApp;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
......@@ -9,6 +10,7 @@ import java.util.List;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.cas.text.AnnotationFS;
import org.xml.sax.SAXException;
......@@ -73,7 +75,7 @@ public class MainCorefUpdateAlgoApplication {
// File("\\\\hastur\\scratch\\kallimachos\\kalimachos Doks\\korpusNew\\weimer_binding2");
in = new File(
"C:\\Users\\mkrug\\owncloud_neu\\projekt romangeschichte\\named entity recognition\\Goldstandard-Rescaled\\output+speech");
"C:\\Users\\mkrug\\owncloud_neu\\kallimachos\\named entity recognition\\Goldstandard-Rescaled\\output+speech");
// File in = new File(
// "\\\\hastur\\scratch\\kallimachos\\kalimachos Doks\\korpusNew\\newKorpusBinding");
......@@ -149,8 +151,7 @@ public class MainCorefUpdateAlgoApplication {
annotateDebugAndNE(cas, f, util, cluster);
// step 6 serialize
// XmiCasSerializer.serialize(cas, new FileOutputStream(new
// File(out.getAbsolutePath() + "/" + f.getName())));
XmiCasSerializer.serialize(cas, new FileOutputStream(new File(out.getAbsolutePath() + "/" + f.getName())));
}
evaluator.printFullEvalScore(in.listFiles().length);
}
......
......@@ -716,7 +716,7 @@ public class MainPreprocessingByIni {
if (f.getName().endsWith(".xmi")) {
XmiCasDeserializer.deserialize(new FileInputStream(f), cas);
XmiCasDeserializer.deserialize(new FileInputStream(f), cas, true);
}
else if (f.getName().endsWith(".txt")) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment