Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Krug
EfficientRuleLearning
Commits
1bc99e89
Commit
1bc99e89
authored
Nov 18, 2016
by
Markus Krug
Browse files
alles kaput!!
parent
c3f06cc7
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/EACL_UIMA_FeatureGen.java
0 → 100644
View file @
1bc99e89
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
org.apache.uima.cas.Type
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
class
EACL_UIMA_FeatureGen
extends
AFeatureGenerator
{
int
prefixLen
;
String
typeS
;
private
String
feature
;
public
EACL_UIMA_FeatureGen
(
String
typeS
,
String
feature
,
String
labelId
)
{
super
(
labelId
);
this
.
typeS
=
typeS
;
this
.
feature
=
feature
;
}
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
Type
type
=
token
.
getCAS
().
getTypeSystem
().
getType
(
typeS
);
for
(
AnnotationFS
anno
:
token
.
getCAS
().
getAnnotationIndex
(
type
)){
String
name
=
type
.
getName
();
name
=
anno
.
getFeatureValueAsString
(
type
.
getFeatureByBaseName
(
feature
));
if
(
anno
.
getBegin
()==
token
.
getBegin
()
&&
anno
.
getEnd
()==
token
.
getEnd
()){
return
new
String
[]
{
super
.
featureIdentifier
+
"="
+
"B-"
+
name
};
}
else
if
(
token
.
getBegin
()>
anno
.
getBegin
()
&&
token
.
getEnd
()<=
anno
.
getEnd
()){
return
new
String
[]
{
super
.
featureIdentifier
+
"="
+
"I-"
+
name
};
}
}
return
new
String
[]
{
super
.
featureIdentifier
+
"="
+
"O"
};
}
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/EACL_UIMA_FeatureGenB_E.java
0 → 100644
View file @
1bc99e89
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
org.apache.uima.cas.Type
;
import
org.apache.uima.cas.text.AnnotationFS
;
import
de.uniwue.ls6.datastructure.LabelAlphabet
;
public
class
EACL_UIMA_FeatureGenB_E
extends
AFeatureGenerator
{
int
prefixLen
;
String
typeS
;
private
String
feature
;
public
EACL_UIMA_FeatureGenB_E
(
String
typeS
,
String
feature
,
String
labelId
)
{
super
(
labelId
);
this
.
typeS
=
typeS
;
this
.
feature
=
feature
;
}
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
Type
type
=
token
.
getCAS
().
getTypeSystem
().
getType
(
typeS
);
for
(
AnnotationFS
anno
:
token
.
getCAS
().
getAnnotationIndex
(
type
)){
String
name
=
type
.
getName
();
name
=
anno
.
getFeatureValueAsString
(
type
.
getFeatureByBaseName
(
feature
));
if
(
anno
.
getBegin
()==
token
.
getBegin
()
&&
anno
.
getEnd
()==
token
.
getEnd
()){
return
new
String
[]
{
super
.
featureIdentifier
+
"="
+
"B-"
+
name
};
}
else
if
(
token
.
getEnd
()==
anno
.
getEnd
()
){
return
new
String
[]
{
super
.
featureIdentifier
+
"="
+
"E-"
+
name
};
}
}
return
new
String
[]
{
LabelAlphabet
.
getFeatureToId
(
0
)
};
}
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/WordCategorization.java
0 → 100644
View file @
1bc99e89
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.List
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
class
WordCategorization
extends
AFeatureGenerator
{
public
WordCategorization
()
{
super
(
"WordBuilding"
);
}
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
String
text
=
unifyString
(
token
.
getCoveredText
());
int
len
=
text
.
length
();
List
<
String
>
ngrams
=
new
ArrayList
<
String
>();
for
(
int
beg
=
0
;
beg
<
len
;
beg
++)
{
for
(
int
end
=
beg
+
1
;
end
<=
len
;
end
++)
{
String
ngram
=
text
.
substring
(
beg
,
end
);
if
(
beg
>
0
)
ngram
=
"*"
+
ngram
;
if
(
end
<
len
)
ngram
+=
"*"
;
ngrams
.
add
(
super
.
featureIdentifier
+
"="
+
ngram
);
}
}
return
ngrams
.
toArray
(
new
String
[
0
]);
}
private
String
unifyString
(
String
coveredText
)
{
String
refinedString
=
""
;
for
(
Character
c
:
coveredText
.
toCharArray
())
{
if
(
c
.
toString
().
matches
(
"[a-z]"
))
{
refinedString
+=
"x"
;
}
else
if
(
c
.
toString
().
matches
(
"[A-Z]"
))
{
refinedString
+=
"X"
;
}
else
if
(
c
.
toString
().
matches
(
"[0-9]"
))
{
refinedString
+=
"D"
;
}
else
{
refinedString
+=
c
.
toString
();
}
}
return
refinedString
;
}
}
de.uniwue.ls6.rulelearning/RuleLearning/resources/tmp.txt
0 → 100644
View file @
1bc99e89
This diff is collapsed.
Click to expand it.
de.uniwue.ls6.rulelearning/RuleLearning/src/de/uniwue/ls6/rulelearning/algorithm/impl/BinaryRepresentationRuleLearningAlgorithm.java
View file @
1bc99e89
...
...
@@ -217,7 +217,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// assert that the maximum is growing
assert
(
maximumScore
<=
iterationMatrix
.
getMaximumScore
())
:
"Maximum decreased within iteration!"
;
if
(!
betterRuleCanBeLearned
(
ma
ppings
.
get
(
mappings
.
size
()
-
2
),
mappings
.
get
(
mappings
.
size
()
-
1
)
))
{
if
(!
betterRuleCanBeLearned
(
ma
ximumScore
,
iterationMatrix
))
{
mappings
.
remove
(
mappingForMaximum
);
break
;
}
...
...
@@ -242,7 +242,13 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
maximumScore
,
(
maxEntryLocation
.
getTp
()
/
(
maxEntryLocation
.
getFp
()
+
maxEntryLocation
.
getTp
())));
}
private
boolean
betterRuleCanBeLearned
(
List
<
MatrixMapping
>
mappings
)
{
if
(
mappings
.
size
()
<
2
)
return
true
;
return
betterRuleCanBeLearned
(
mappings
.
get
(
mappings
.
size
()
-
2
),
mappings
.
get
(
mappings
.
size
()
-
1
));
}
private
boolean
betterRuleCanBeLearned
(
MatrixMapping
lastMapping
,
MatrixMapping
newMapping
)
{
Set
<
Set
<
Point
>>
lastFeatures
=
new
HashSet
<
Set
<
Point
>>(
lastMapping
.
getDenseIndexToFeaturesMapping
().
values
());
Set
<
Set
<
Point
>>
newFeatures
=
new
HashSet
<
Set
<
Point
>>(
newMapping
.
getDenseIndexToFeaturesMapping
().
values
());
...
...
@@ -251,8 +257,9 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
return
false
;
}
return
true
;
}
private
boolean
betterRuleCanBeLearned
(
int
maximumScore
,
MatrixMcMatrixFace
matrixInFocus
)
{
if
(
matrixInFocus
.
getMaximumScore
()
>
maximumScore
)
{
...
...
@@ -282,8 +289,8 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
RepresentationRule
rule
=
pass
.
apply
(
instanceToClassify
);
if
(
rule
!=
null
)
{
predictedLabel
=
pass
.
getLabel
();
score
+=
rule
.
getUniquenessScore
();
//
score = rule.getPrecision();
//
score
+=
rule.getUniquenessScore();
score
=
rule
.
getPrecision
();
}
else
{
break
;
}
...
...
de.uniwue.ls6.rulelearning/RuleLearning/src/de/uniwue/ls6/rulelearning/algorithm/impl/MultiClassRepresentationRuleAlgorithm.java
View file @
1bc99e89
...
...
@@ -84,7 +84,10 @@ public class MultiClassRepresentationRuleAlgorithm implements IRepresentationRul
Set
<
Integer
>
labelSet
=
new
HashSet
<>();
for
(
Instance
i
:
instances
)
{
labelSet
.
add
(
i
.
getLabel
());
// never create a classifier for the DEFAULT label
if
(
i
.
getLabel
()
!=
0
)
{
labelSet
.
add
(
i
.
getLabel
());
}
instanceToLabelMapping
.
put
(
i
,
i
.
getLabel
());
}
return
labelSet
;
...
...
@@ -105,7 +108,7 @@ public class MultiClassRepresentationRuleAlgorithm implements IRepresentationRul
double
maxScore
=
0
;
int
label
=
0
;
for
(
ALabelling
labelling
:
binaryLabellings
)
{
//we sum only non default classifications!
//
we sum only non default classifications!
if
(
labelling
.
getLabel
()
==
0
)
continue
;
fullScore
+=
labelling
.
getScore
();
...
...
de.uniwue.ls6.rulelearning/RuleLearning/src/test/FirstTestEACL.java
0 → 100644
View file @
1bc99e89
package
test
;
import
java.io.File
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Random
;
import
org.apache.uima.fit.factory.TypeSystemDescriptionFactory
;
import
org.apache.uima.resource.metadata.TypeSystemDescription
;
import
de.uniwue.ls6.datastructure.ALabelling
;
import
de.uniwue.ls6.datastructure.Instance
;
import
de.uniwue.ls6.datastructure.SimpleLabelling
;
import
de.uniwue.ls6.rulelearning.algorithm.impl.MultiClassRepresentationRuleAlgorithm
;
import
de.uniwue.ls6.rulelearning.evaluation.eval.LabelAccuracyEvaluation
;
import
de.uniwue.ls6.rulelearning.evaluation.fold.FoldUtil
;
import
de.uniwue.ls6.rulelearning.evaluation.fold.UnstructuredFold
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.BIO_UIMA_FeatureGen
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.EACL_UIMA_FeatureGen
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.EACL_UIMA_FeatureGenB_E
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.POSTagFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordCategorization
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory
;
public
class
FirstTestEACL
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
File
korpusFOlder
=
new
File
(
"C:\\Users\\mkrug\\owncloud_neu\\paper_RegelLernen\\xmi_gold_preprocessed"
);
String
uriDkPro
=
new
File
(
"C:\\marian_eclipse\\workspace_mars\\dkprocoreExample\\TypeSystemDK.xml"
).
toPath
()
.
toUri
().
toString
();
TypeSystemDescription
tsd
=
TypeSystemDescriptionFactory
.
createTypeSystemDescriptionFromPath
(
uriDkPro
);
MultiClassRepresentationRuleAlgorithm
algorithm
=
new
MultiClassRepresentationRuleAlgorithm
(
10
);
List
<
Instance
>
instances
=
new
ArrayList
<
Instance
>();
for
(
File
f
:
korpusFOlder
.
listFiles
())
{
if
(!
f
.
getName
().
endsWith
(
".xmi"
))
continue
;
instances
.
addAll
(
InstanceCreationFactory
.
createWindowedInstancesFromUIMA
(
f
,
instances
.
size
(),
2
,
2
,
"de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS"
,
tsd
,
new
EACL_UIMA_FeatureGenB_E
(
"de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.Morpheme"
,
"morphTag"
,
"Entity"
),
new
WordFeaturegenerator
(),
new
SuffixNGenerator
(
4
),
new
SuffixNGenerator
(
3
),
new
SuffixNGenerator
(
2
),
new
SuffixNGenerator
(
1
),
new
PrefixNGenerator
(
1
),
new
IsUppercaseFeatureGenerator
(),
new
NGramGenerator
(),
new
WordCategorization
(),
new
BIO_UIMA_FeatureGen
(
"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk"
,
"Chunk"
),
new
EACL_UIMA_FeatureGen
(
"de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity"
,
"value"
,
"NE"
),
new
POSTagFeatureGenerator
(
"PosValue"
)));
System
.
out
.
println
(
"instances: "
+
instances
.
size
());
// if(instances.size()>20000)break;
}
int
id
=
0
;
for
(
Instance
i
:
instances
)
{
i
.
setId
(
id
);
id
++;
}
// create 5 folds
List
<
UnstructuredFold
>
folds
=
FoldUtil
.
readInstancesToFold
(
instances
,
new
Random
(
13374211
),
5
);
for
(
UnstructuredFold
fold
:
folds
)
{
algorithm
.
learn
(
fold
.
getTrainingset
().
toArray
(
new
Instance
[
0
]));
// evaluate
List
<
ALabelling
>
goldLabels
=
new
ArrayList
<>();
List
<
ALabelling
>
systemLabels
=
new
ArrayList
<>();
for
(
Instance
i
:
fold
.
getTestSet
())
{
goldLabels
.
add
(
new
SimpleLabelling
(
i
.
getLabel
(),
0
));
systemLabels
.
add
(
algorithm
.
apply
(
i
));
}
// evaluate this fold
String
evaluateToString
=
new
LabelAccuracyEvaluation
()
.
evaluateToString
(
goldLabels
.
toArray
(
new
ALabelling
[
0
]),
systemLabels
.
toArray
(
new
ALabelling
[
0
]));
System
.
out
.
println
(
evaluateToString
);
break
;
}
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment