Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Krug
EfficientRuleLearning
Commits
32a4579c
Commit
32a4579c
authored
Oct 30, 2016
by
mak28ma
Browse files
ngramfeatureGen
parent
9e18f8a9
Changes
3
Hide whitespace changes
Inline
Side-by-side
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/NGramGenerator.java
0 → 100644
View file @
32a4579c
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
class
NGramGenerator
extends
AFeatureGenerator
{
public
NGramGenerator
()
{
super
(
"NGram"
);
}
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
String
text
=
token
.
getCoveredText
();
int
len
=
text
.
length
();
List
<
String
>
ngrams
=
new
ArrayList
<
String
>();
for
(
int
beg
=
0
;
beg
<
len
;
beg
++)
{
for
(
int
end
=
beg
+
1
;
end
<=
len
;
end
++)
{
String
ngram
=
text
.
substring
(
beg
,
end
);
if
(
beg
>
0
)
ngram
=
"*"
+
ngram
;
if
(
end
<
len
)
ngram
+=
"*"
;
ngrams
.
add
(
super
.
featureIdentifier
+
"="
+
ngram
);
}
}
return
ngrams
.
toArray
(
new
String
[
0
]);
}
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/NNFeatureGenerator.java
View file @
32a4579c
...
...
@@ -16,7 +16,7 @@ public class NNFeatureGenerator extends AFeatureGenerator{
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
String
featureValueAsString
=
token
.
getFeatureValueAsString
(
token
.
getType
().
getFeatureByBaseName
(
posTagFeature
));
return
new
String
[]{
featureValueAsString
.
startsWith
(
"
VVFI
N"
)?
super
.
featureIdentifier
+
"=NN"
:
super
.
featureIdentifier
+
"=O"
};
return
new
String
[]{
featureValueAsString
.
startsWith
(
"N"
)?
super
.
featureIdentifier
+
"=NN"
:
super
.
featureIdentifier
+
"=O"
};
}
}
de.uniwue.ls6.rulelearning/RuleLearning/src/test/FirstTest2.java
View file @
32a4579c
...
...
@@ -14,6 +14,7 @@ import de.uniwue.ls6.datastructure.Instance;
import
de.uniwue.ls6.datastructure.LabelAlphabet
;
import
de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator
;
...
...
@@ -43,7 +44,7 @@ public class FirstTest2 {
"de.uniwue.kalimachos.coref.type.POS"
,
tsd
,
new
NNFeatureGenerator
(
"POSTag"
),
new
WordFeaturegenerator
(),
new
SuffixNGenerator
(
4
),
new
SuffixNGenerator
(
3
),
new
SuffixNGenerator
(
2
),
new
SuffixNGenerator
(
1
),
new
PrefixNGenerator
(
1
),
new
PrefixNGenerator
(
2
),
new
IsUppercaseFeatureGenerator
(),
new
PrefixNGenerator
(
3
)));
new
IsUppercaseFeatureGenerator
(),
new
PrefixNGenerator
(
3
)
,
new
NGramGenerator
()
));
System
.
out
.
println
(
"instances: "
+
instances
.
size
());
//if(instances.size()>20000)break;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment