Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Krug
EfficientRuleLearning
Commits
78f8a06e
Commit
78f8a06e
authored
Nov 19, 2016
by
mak28ma
Browse files
started to improve readability a bit
parent
1bc99e89
Changes
4
Hide whitespace changes
Inline
Side-by-side
de.uniwue.ls6.rulelearning/DataStructure/src/de/uniwue/ls6/util/MatrixUtil.java
View file @
78f8a06e
...
...
@@ -42,14 +42,20 @@ public class MatrixUtil {
// collect all entries that may remain
List
<
MatrixPoint
>
entries
=
new
ArrayList
<>();
Set
<
Set
<
Point
>>
uniqueFeatureCombinations
=
new
HashSet
<>();
// iterate over each feature combination left
for
(
MatrixEntry
entry
:
matrixface
.
getTpMatrix
())
{
double
sum
=
entry
.
get
();
if
(
sum
>=
maximum
)
{
// save all that may be kept
if
(
sum
==
maximum
&&
matrixface
.
getFpMatrix
().
get
(
entry
.
row
(),
entry
.
column
())
==
0
)
{
double
amountTP
=
entry
.
get
();
// save all that may be kept per default any that have more TP may
// be kept since the have potential
if
(
amountTP
>=
maximum
)
{
// but if they are at maxScore (can be more than that) and dont
// have any FP left there is no potential left
if
(
amountTP
==
maximum
&&
matrixface
.
getFpMatrix
().
get
(
entry
.
row
(),
entry
.
column
())
==
0
)
{
if
(!(
new
Point
(
entry
.
column
(),
entry
.
row
()).
equals
(
maxEntryLocation
)))
{
//no potential to improve left!
//
no potential to improve left!
continue
;
}
}
...
...
@@ -58,9 +64,14 @@ public class MatrixUtil {
Set
<
Point
>
featuresOfPoints
=
determineFeaturesForIndex
(
new
Point
(
entry
.
column
(),
entry
.
row
()),
mappings
,
mappings
.
size
()
>
0
?
true
:
false
);
//only keep each feature combination exactly once!
if
(
uniqueFeatureCombinations
.
contains
(
featuresOfPoints
))
{
continue
;
}
//furthermore we can filter all those featurecombinations that resemble exactly the same instance set
// because our algrotihm can not differ between those
uniqueFeatureCombinations
.
add
(
featuresOfPoints
);
double
score
=
entry
.
get
()
-
matrixface
.
getFpMatrix
().
get
(
entry
.
row
(),
entry
.
column
());
...
...
@@ -74,14 +85,10 @@ public class MatrixUtil {
+
" Amount of potentially good features "
+
entries
.
size
());
matrixMapping
=
new
MatrixMapping
(
formerMatrixSize
);
// sort
Collections
.
sort
(
entries
,
new
Comparator
<
MatrixPoint
>()
{
// sort
by score //TODO can this be done better??
Collections
.
sort
(
entries
,
(
MatrixPoint
arg0
,
MatrixPoint
arg1
)
->
{
@Override
public
int
compare
(
MatrixPoint
arg0
,
MatrixPoint
arg1
)
{
// TODO Auto-generated method stub
return
(
int
)
(
arg1
.
getScore
()
-
arg0
.
getScore
());
}
return
(
int
)
(
arg1
.
getScore
()
-
arg0
.
getScore
());
});
}
...
...
@@ -102,13 +109,16 @@ public class MatrixUtil {
// System.out.println(entries.size()+"==");
// debug
// for (MatrixPoint entry : entries) {
// Set<Point> featuresForDenseIndex = determineFeaturesForIndex(new Point(entry.getX(), entry.getY()),
// mappings, mappings.size() > 0 ? true : false);
// RepresentationRule representationRule = new RepresentationRule(6, featuresForDenseIndex, 1, 10);
// System.out.println(entry.getScore() + "\t" + maximum + "\tTP " + entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
// }
// for (MatrixPoint entry : entries) {
// Set<Point> featuresForDenseIndex = determineFeaturesForIndex(new
// Point(entry.getX(), entry.getY()),
// mappings, mappings.size() > 0 ? true : false);
// RepresentationRule representationRule = new RepresentationRule(6,
// featuresForDenseIndex, 1, 10);
// System.out.println(entry.getScore() + "\t" + maximum + "\tTP " +
// entry.getTp() + "\tFP" + entry.getFp()
// + "\t" + representationRule.toString());
// }
return
matrixMapping
;
}
...
...
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/LemmaFeatureGenerator.java
0 → 100644
View file @
78f8a06e
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
class
LemmaFeatureGenerator
extends
AFeatureGenerator
{
private
String
lemmaTagFeature
;
public
LemmaFeatureGenerator
(
String
posTagFeature
)
{
super
(
"Lemma"
);
this
.
lemmaTagFeature
=
posTagFeature
;
}
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
String
featureValueAsString
=
token
.
getFeatureValueAsString
(
token
.
getType
().
getFeatureByBaseName
(
lemmaTagFeature
));
return
new
String
[]{
super
.
featureIdentifier
+
"="
+
featureValueAsString
};
}
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/WordCategorization.java
View file @
78f8a06e
...
...
@@ -28,18 +28,28 @@ public class WordCategorization extends AFeatureGenerator {
ngrams
.
add
(
super
.
featureIdentifier
+
"="
+
ngram
);
}
}
//also add a simplified version of text
String
simplifiedVersion
=
simplifyString
(
text
);
ngrams
.
add
(
super
.
featureIdentifier
+
"_simplified="
+
simplifiedVersion
);
return
ngrams
.
toArray
(
new
String
[
0
]);
}
private
String
simplifyString
(
String
text
)
{
return
text
.
replaceAll
(
"c+"
,
"c"
).
replaceAll
(
"C+"
,
"C"
).
replaceAll
(
"D+"
,
"D"
);
}
private
String
unifyString
(
String
coveredText
)
{
String
refinedString
=
""
;
for
(
Character
c
:
coveredText
.
toCharArray
())
{
if
(
c
.
toString
().
matches
(
"[a-z]"
))
{
refinedString
+=
"
x
"
;
refinedString
+=
"
c
"
;
}
else
if
(
c
.
toString
().
matches
(
"[A-Z]"
))
{
refinedString
+=
"
X
"
;
refinedString
+=
"
C
"
;
}
else
if
(
c
.
toString
().
matches
(
"[0-9]"
))
{
refinedString
+=
"D"
;
}
else
{
...
...
de.uniwue.ls6.rulelearning/RuleLearning/src/test/FirstTest.java
View file @
78f8a06e
...
...
@@ -20,6 +20,7 @@ import de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerato
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.POSTagFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordCategorization
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory
;
...
...
@@ -34,14 +35,14 @@ public class FirstTest {
File
korpusFOlder
=
new
File
(
"X:\\Neuer Ordner\\output+speech"
);
MultiClassRepresentationRuleAlgorithm
algorithm
=
new
MultiClassRepresentationRuleAlgorithm
(
10
0
);
MultiClassRepresentationRuleAlgorithm
algorithm
=
new
MultiClassRepresentationRuleAlgorithm
(
25
0
);
TypeSystemDescription
tsd
=
TypeSystemDescriptionFactory
.
createTypeSystemDescriptionFromPath
(
typesystem
.
toURL
().
toString
());
List
<
Instance
>
instances
=
InstanceCreationFactory
.
createWindowedInstancesFromUIMA
(
document
,
0
,
0
,
0
,
List
<
Instance
>
instances
=
InstanceCreationFactory
.
createWindowedInstancesFromUIMA
(
document
,
0
,
2
,
2
,
"de.uniwue.kalimachos.coref.type.POS"
,
tsd
,
new
POSTagFeatureGenerator
(
"POSTag"
),
new
WordFeaturegenerator
(),
new
IsUppercaseFeatureGenerator
(),
new
PrefixNGenerator
(
3
),
new
NGramGenerator
());
new
NGramGenerator
()
,
new
WordCategorization
()
);
// create 5 folds
List
<
UnstructuredFold
>
folds
=
FoldUtil
.
readInstancesToFold
(
instances
,
new
Random
(
13374211
),
10
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment