Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Krug
EfficientRuleLearning
Commits
756c4039
Commit
756c4039
authored
Oct 31, 2016
by
Markus Krug
Browse files
viel probiert und nur etwas gewonnen, das lernen bleibt relativ langsam
parent
e1a1273c
Changes
6
Hide whitespace changes
Inline
Side-by-side
de.uniwue.ls6.rulelearning/DataStructure/src/de/uniwue/ls6/datastructure/Instance.java
View file @
756c4039
...
...
@@ -85,7 +85,7 @@ public class Instance {
FlexCompColMatrix
denseInstanceMatrix
=
new
FlexCompColMatrix
(
denseDimension
,
denseDimension
);
for
(
Point
denseIndices
:
lastMapping
.
getInverseMappingMap
().
keySet
())
{
List
<
Point
>
features
=
MatrixUtil
.
determine
FeaturesForIndex
(
denseIndices
,
mappings
);
List
<
Point
>
features
=
lastMapping
.
get
FeaturesFor
Dense
Index
(
denseIndices
);
// check if this instance contains the features
if
(
containsFeature
(
features
))
{
// if so then create a sprase matrix and put a 1 into the
...
...
de.uniwue.ls6.rulelearning/DataStructure/src/de/uniwue/ls6/datastructure/MatrixMapping.java
View file @
756c4039
...
...
@@ -4,11 +4,14 @@ import java.awt.Point;
import
java.util.HashMap
;
import
java.util.List
;
import
de.uniwue.ls6.util.MatrixUtil
;
public
class
MatrixMapping
{
// x is col and y is row
HashMap
<
Point
,
Point
>
mappingMap
;
HashMap
<
Point
,
Point
>
inverseMappingMap
;
HashMap
<
Point
,
List
<
Point
>>
denseIndexToFeaturesMapping
;
//
private
int
kroneckerDimension
;
...
...
@@ -25,6 +28,7 @@ public class MatrixMapping {
this
.
mappingMap
=
mappingMap
;
this
.
inverseMappingMap
=
inverseMap
;
this
.
kroneckerDimension
=
kroneckerDimension
;
this
.
denseIndexToFeaturesMapping
=
new
HashMap
<>();
}
public
MatrixMapping
(
int
kroneckerDimension
)
{
...
...
@@ -32,6 +36,7 @@ public class MatrixMapping {
this
.
mappingMap
=
new
HashMap
<
Point
,
Point
>();
this
.
inverseMappingMap
=
new
HashMap
<
Point
,
Point
>();
this
.
kroneckerDimension
=
kroneckerDimension
;
this
.
denseIndexToFeaturesMapping
=
new
HashMap
<>();
}
public
HashMap
<
Point
,
Point
>
getMappingMap
()
{
...
...
@@ -63,7 +68,7 @@ public class MatrixMapping {
}
// this method generates all values based on the keys
public
void
inferDenseMapValues
()
{
public
void
inferDenseMapValues
(
List
<
MatrixMapping
>
mappings
)
{
int
numCols
=
(
int
)
Math
.
ceil
(
Math
.
sqrt
(
mappingMap
.
keySet
().
size
()));
int
index
=
0
;
for
(
Point
key
:
mappingMap
.
keySet
())
{
...
...
@@ -72,6 +77,11 @@ public class MatrixMapping {
inverseMappingMap
.
put
(
value
,
key
);
index
++;
}
//also infer the features
for
(
Point
p
:
inverseMappingMap
.
keySet
()){
denseIndexToFeaturesMapping
.
put
(
p
,
MatrixUtil
.
determineFeaturesForIndex
(
p
,
mappings
));
}
}
public
int
getDenseMatrixDimension
()
{
...
...
@@ -87,6 +97,10 @@ public class MatrixMapping {
public
int
getKroneckerMatrixDimension
()
{
return
kroneckerDimension
;
}
public
List
<
Point
>
getFeaturesForDenseIndex
(
Point
densePoint
){
return
denseIndexToFeaturesMapping
.
get
(
densePoint
);
}
@Override
public
String
toString
()
{
...
...
de.uniwue.ls6.rulelearning/DataStructure/src/de/uniwue/ls6/util/MatrixUtil.java
View file @
756c4039
...
...
@@ -7,11 +7,18 @@ import java.util.Collection;
import
java.util.Collections
;
import
java.util.Comparator
;
import
java.util.HashSet
;
import
java.util.Iterator
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.Executors
;
import
java.util.function.BiConsumer
;
import
java.util.function.BinaryOperator
;
import
java.util.function.Supplier
;
import
java.util.stream.Collector
;
import
java.util.stream.Collectors
;
import
de.uniwue.ls6.datastructure.Instance
;
import
de.uniwue.ls6.datastructure.LabelAlphabet
;
...
...
@@ -22,7 +29,7 @@ import no.uib.cipr.matrix.sparse.FlexCompColMatrix;
public
class
MatrixUtil
{
public
static
MatrixMapping
getMappingForMaximum
(
MatrixMcMatrixFace
matrixface
,
int
maximum
)
{
public
static
MatrixMapping
getMappingForMaximum
(
MatrixMcMatrixFace
matrixface
,
int
maximum
,
List
<
MatrixMapping
>
mappings
)
{
// totally unugly code not even necessary which makes it worse
int
formerMatrixSize
=
matrixface
.
getTpMatrix
().
numRows
()
==
matrixface
.
getTpMatrix
().
numColumns
()
...
...
@@ -37,7 +44,7 @@ public class MatrixUtil {
// double sum = entry.get() +
// matrixface.getFpMatrix().get(entry.row(), entry.column());
double
sum
=
entry
.
get
();
if
(
sum
>
=
maximum
)
{
if
(
sum
>
maximum
)
{
matrixMapping
.
addEntry
(
new
Point
(
entry
.
column
(),
entry
.
row
()));
// save the score
double
score
=
entry
.
get
()
-
matrixface
.
getFpMatrix
().
get
(
entry
.
row
(),
entry
.
column
());
...
...
@@ -47,7 +54,8 @@ public class MatrixUtil {
}
}
if
(
matrixMapping
.
getDenseMatrixDimension
()
>
50
)
{
System
.
out
.
println
(
"Densedimension: "
+
matrixMapping
.
getDenseMatrixDimension
());
if
(
matrixMapping
.
getDenseMatrixDimension
()
>
20
)
{
System
.
out
.
println
(
"Too many possible features! We restrict to the best 2500"
);
matrixMapping
=
new
MatrixMapping
(
formerMatrixSize
);
...
...
@@ -68,7 +76,9 @@ public class MatrixUtil {
}
// infer the -> righthandside
matrixMapping
.
inferDenseMapValues
();
ArrayList
<
MatrixMapping
>
arrayList
=
new
ArrayList
<
MatrixMapping
>(
mappings
);
arrayList
.
add
(
matrixMapping
);
matrixMapping
.
inferDenseMapValues
(
arrayList
);
return
matrixMapping
;
}
...
...
@@ -78,28 +88,46 @@ public class MatrixUtil {
MatrixMapping
lastMapping
=
mappings
.
get
(
mappings
.
size
()
-
1
);
int
dimension
=
lastMapping
.
getDenseMatrixDimension
();
MatrixMcMatrixFace
expandedMatrixFace
=
new
MatrixMcMatrixFace
(
dimension
*
dimension
,
dimension
*
dimension
,
label
);
// populate the matrix with the dataset this is expensive!
instances
.
parallelStream
().
forEach
((
Instance
inst
)
->
{
// expand
Supplier
<
MatrixMcMatrixFace
>
matrixConstructor
=
()->
new
MatrixMcMatrixFace
(
dimension
*
dimension
,
dimension
*
dimension
,
label
);
BiConsumer
<
MatrixMcMatrixFace
,
Instance
>
accumulator
=
(
MatrixMcMatrixFace
expandedMatrix
,
Instance
inst
)
->
{
FlexCompColMatrix
expandedInstance
=
inst
.
expand
(
mappings
);
// TODO can we use something faster here... currently this results
// in a speedup of 3...
synchronized
(
expandedMatrixFace
)
{
// add to kronecker
if
(
inst
.
getLabel
()
==
label
)
{
expandedMatrixFace
.
addToMatrix
(
expandedMatrixFace
.
getTpMatrix
(),
expandedInstance
);
}
else
{
expandedMatrixFace
.
addToMatrix
(
expandedMatrixFace
.
getFpMatrix
(),
expandedInstance
);
}
if
(
inst
.
getLabel
()
==
label
)
{
expandedMatrix
.
addToMatrix
(
expandedMatrix
.
getTpMatrix
(),
expandedInstance
);
}
else
{
expandedMatrix
.
addToMatrix
(
expandedMatrix
.
getFpMatrix
(),
expandedInstance
);
}
});
return
expandedMatrixFace
;
};
BinaryOperator
<
MatrixMcMatrixFace
>
join
=
(
MatrixMcMatrixFace
a
,
MatrixMcMatrixFace
b
)
->
{
a
.
getTpMatrix
().
add
(
b
.
getTpMatrix
());
a
.
getFpMatrix
().
add
(
b
.
getFpMatrix
());
return
a
;
};
return
instances
.
parallelStream
().
collect
(
Collector
.
of
(
matrixConstructor
,
accumulator
,
join
,
Collector
.
Characteristics
.
UNORDERED
));
// // populate the matrix with the dataset this is expensive!
// List<FlexCompColMatrix> syncList = Collections.synchronizedList(new LinkedList<>());
// instances.parallelStream().forEach((Instance inst) -> {
// // expand
// FlexCompColMatrix expandedInstance = inst.expand(mappings);
//
// syncList.add(expandedInstance);
// // TODO can we use something faster here... currently this results
// // in a speedup of 3...
// synchronized (expandedMatrixFace) {
// // add to kronecker
// if (inst.getLabel() == label) {
// expandedMatrixFace.addToMatrix(expandedMatrixFace.getTpMatrix(), expandedInstance);
// } else {
// expandedMatrixFace.addToMatrix(expandedMatrixFace.getFpMatrix(), expandedInstance);
// }
// }
//
// });
// return expandedMatrixFace;
}
public
static
MatrixMcMatrixFace
performKroneckerExpansionWithIndex
(
List
<
MatrixMapping
>
mappings
,
...
...
@@ -218,67 +246,44 @@ public class MatrixUtil {
}
public
static
void
revertKroneckerExpansion
(
MatrixMapping
previousMapping
,
List
<
Point
>
reversedfeatures
)
{
List
<
Point
>
toRemove
=
new
ArrayList
<
Point
>();
List
<
Point
>
toAdd
=
new
ArrayList
<
Point
>();
for
(
Point
p
:
reversedfeatures
)
{
Iterator
<
Point
>
iterator
=
reversedfeatures
.
iterator
();
while
(
iterator
.
hasNext
()){
Point
p
=
iterator
.
next
();
iterator
.
remove
();
// revert the kronecker expansion step this generates 2 points
int
sizeBeforeExpansion
=
(
int
)
previousMapping
.
getDenseMatrixDimension
();
int
xBefore1
=
(
int
)
Math
.
floor
(
p
.
x
/
sizeBeforeExpansion
);
int
xBefore2
=
p
.
x
%
sizeBeforeExpansion
;
int
yBefore1
=
(
int
)
Math
.
floor
(
p
.
y
/
sizeBeforeExpansion
);
int
yBefore2
=
p
.
y
%
sizeBeforeExpansion
;
Point
firstBack
=
new
Point
(
xBefore1
,
yBefore1
);
Point
secondBack
=
new
Point
(
xBefore2
,
yBefore2
);
toRemove
.
add
(
p
);
toAdd
.
add
(
secondBack
);
toAdd
.
add
(
firstBack
);
// assert that the recalculated points are contained in the mapping,
// this also guarantes the dimension is respected
assert
(
previousMapping
.
getInverseMappingMap
().
containsKey
(
firstBack
)
&&
previousMapping
.
getInverseMappingMap
().
containsKey
(
secondBack
))
:
"Reverted Points not part of the mapping"
;
// assure no point is null
assert
(
secondBack
!=
null
&&
firstBack
!=
null
)
:
"One of the reverted points is null"
;
}
reversedfeatures
.
removeAll
(
toRemove
);
reversedfeatures
.
addAll
(
toAdd
);
// assert the lists contain what they should
assert
(
toAdd
.
size
()
==
toRemove
.
size
()
*
2
&&
reversedfeatures
.
containsAll
(
toAdd
))
:
"Kronecker reversion failed"
;
}
private
static
void
revertMapping
(
MatrixMapping
currentMapping
,
List
<
Point
>
reversedfeatures
)
{
List
<
Point
>
toRemove
=
new
ArrayList
<
Point
>();
List
<
Point
>
toAdd
=
new
ArrayList
<
Point
>();
assert
(!
reversedfeatures
.
contains
(
null
))
:
"Tried to backwards map a null value"
;
reversedfeatures
.
replaceAll
((
Point
p
)
->
currentMapping
.
getBackwardsMappedFeature
(
p
));
for
(
Point
p
:
reversedfeatures
)
{
// revert the mapping step this is easy !
Point
backwardsMappedFeature
=
currentMapping
.
getBackwardsMappedFeature
(
p
);
toRemove
.
add
(
p
);
toAdd
.
add
(
backwardsMappedFeature
);
// assert that the point exists
assert
(
backwardsMappedFeature
!=
null
)
:
"Backwardsmapping resulted in null entry! \n"
+
currentMapping
.
toString
()
+
"At point: "
+
p
;
}
reversedfeatures
.
removeAll
(
toRemove
);
reversedfeatures
.
addAll
(
toAdd
);
// assure everythign went alright
assert
(
toRemove
.
size
()
==
toAdd
.
size
()
&&
toAdd
.
size
()
==
reversedfeatures
.
size
()
&&
reversedfeatures
.
containsAll
(
reversedfeatures
))
:
"Dimension of backwards mapped features is wrong"
;
}
public
static
String
convertPointListToFeatureString
(
List
<
Point
>
determineFeaturesForIndex
)
{
...
...
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/NNFeatureGenerator.java
View file @
756c4039
...
...
@@ -16,7 +16,7 @@ public class NNFeatureGenerator extends AFeatureGenerator{
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
String
featureValueAsString
=
token
.
getFeatureValueAsString
(
token
.
getType
().
getFeatureByBaseName
(
posTagFeature
));
return
new
String
[]{
featureValueAsString
.
startsWith
(
"N"
)?
super
.
featureIdentifier
+
"=NN"
:
super
.
featureIdentifier
+
"=O"
};
return
new
String
[]{
featureValueAsString
.
startsWith
(
"N
E
"
)?
super
.
featureIdentifier
+
"=NN"
:
super
.
featureIdentifier
+
"=O"
};
}
}
de.uniwue.ls6.rulelearning/RuleLearning/src/de/uniwue/ls6/rulelearning/algorithm/impl/BinaryRepresentationRuleLearningAlgorithm.java
View file @
756c4039
...
...
@@ -47,8 +47,8 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
+
"\tvs\t"
+
LabelAlphabet
.
getFeatureToId
(
otherLabel
));
System
.
out
.
println
(
"Amount distinct features: "
+
LabelAlphabet
.
getSize
());
System
.
out
.
println
(
"Start indexing for "
+
instances
.
length
+
" instances..."
);
createIndex
(
instances
);
//
System.out.println("Start indexing for " + instances.length + " instances...");
//
createIndex(instances);
System
.
out
.
println
(
"Finished creating instance!"
);
while
(
morePasses
(
instancesForPass
,
currentGoldIndex
))
{
// update the learning objective
...
...
@@ -172,7 +172,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
maxEntryLocation
=
bestMatrix
.
getLocationOfMaximum
();
// map matrix to dense matrix
MatrixMapping
mappingForMaximum
=
MatrixUtil
.
getMappingForMaximum
(
bestMatrix
,
maximumScore
);
MatrixMapping
mappingForMaximum
=
MatrixUtil
.
getMappingForMaximum
(
bestMatrix
,
maximumScore
,
mappings
);
mappings
.
add
(
mappingForMaximum
);
// assert that the maximum is within bounds
...
...
@@ -185,14 +185,7 @@ public class BinaryRepresentationRuleLearningAlgorithm implements IRepresentatio
// expand in kronecker fashion
long
time
=
System
.
currentTimeMillis
();
bestMatrix
=
MatrixUtil
.
performKroneckerExpansionWithIndex
(
mappings
,
instances
,
goldLabel
,
index
);
// MatrixMcMatrixFace other = MatrixUtil.performKroneckerExpansion(mappings, instances, goldLabel);
// if(!other.equals(bestMatrix)){
// System.out.println("BUG!");
// System.out.println(MatrixUtil.prettyMatrixFormat(bestMatrix.getTpMatrix()));
// System.out.println();
// System.out.println(MatrixUtil.prettyMatrixFormat(other.getTpMatrix()));
// }
bestMatrix
=
MatrixUtil
.
performKroneckerExpansion
(
mappings
,
instances
,
goldLabel
);
System
.
out
.
println
(
System
.
currentTimeMillis
()-
time
);
if
(!
betterRuleCanBeLearned
(
maximumScore
,
bestMatrix
))
{
...
...
de.uniwue.ls6.rulelearning/RuleLearning/src/test/FirstTest.java
View file @
756c4039
...
...
@@ -13,6 +13,7 @@ import de.uniwue.ls6.datastructure.Instance;
import
de.uniwue.ls6.datastructure.LabelAlphabet
;
import
de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NGramGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.NNFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.PrefixNGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.SuffixNGenerator
;
...
...
@@ -35,10 +36,10 @@ public class FirstTest {
TypeSystemDescription
tsd
=
TypeSystemDescriptionFactory
.
createTypeSystemDescriptionFromPath
(
typesystem
.
toURL
().
toString
());
List
<
Instance
>
instances
=
InstanceCreationFactory
.
createWindowedInstancesFromUIMA
(
document
,
5
,
5
,
List
<
Instance
>
instances
=
InstanceCreationFactory
.
createWindowedInstancesFromUIMA
(
bigDoc
,
3
,
3
,
"de.uniwue.kalimachos.coref.type.POS"
,
tsd
,
new
NNFeatureGenerator
(
"POSTag"
),
new
WordFeaturegenerator
(),
new
SuffixNGenerator
(
4
),
new
SuffixNGenerator
(
3
),
new
SuffixNGenerator
(
2
),
new
SuffixNGenerator
(
1
),
new
PrefixNGenerator
(
1
),
new
PrefixNGenerator
(
2
),
new
IsUppercaseFeatureGenerator
(),
new
PrefixNGenerator
(
3
));
new
SuffixNGenerator
(
1
),
new
PrefixNGenerator
(
1
),
new
PrefixNGenerator
(
2
),
new
IsUppercaseFeatureGenerator
(),
new
PrefixNGenerator
(
3
)
,
new
NGramGenerator
()
);
System
.
out
.
println
(
instances
.
size
());
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment