Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Krug
EfficientRuleLearning
Commits
a658cbfd
Commit
a658cbfd
authored
Oct 27, 2016
by
Markus Krug
Browse files
*full implementation, does not work!
*should start the debugging now
parent
8e4542fe
Changes
16
Hide whitespace changes
Inline
Side-by-side
de.uniwue.ls6.rulelearning/.classpath
View file @
a658cbfd
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry
kind=
"src"
output=
"target/classes"
path=
"src/main/java"
>
<attributes>
<attribute
name=
"optional"
value=
"true"
/>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"src"
output=
"target/test-classes"
path=
"src/test/java"
>
<attributes>
<attribute
name=
"optional"
value=
"true"
/>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"con"
path=
"org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"
>
<attributes>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
...
...
de.uniwue.ls6.rulelearning/DataStructure/src/de/uniwue/ls6/datastructure/Instance.java
View file @
a658cbfd
...
...
@@ -20,12 +20,12 @@ public class Instance {
public
Instance
(
int
nrRows
,
int
nrCols
)
{
super
();
featureArray
=
new
int
[
nr
Row
s
][
nr
Col
s
];
featureArray
=
new
int
[
nr
Col
s
][
nr
Row
s
];
}
public
Instance
(
int
nrRows
,
int
nrCols
,
int
label
)
{
super
();
featureArray
=
new
int
[
nr
Row
s
][
nr
Col
s
];
featureArray
=
new
int
[
nr
Col
s
][
nr
Row
s
];
this
.
label
=
label
;
}
...
...
@@ -33,6 +33,9 @@ public class Instance {
return
label
;
}
public
void
setFeatureAt
(
int
col
,
int
row
,
int
value
){
featureArray
[
col
][
row
]
=
value
;
}
public
void
setLabel
(
int
label
)
{
this
.
label
=
label
;
}
...
...
de.uniwue.ls6.rulelearning/InstanceLoading/.classpath
0 → 100644
View file @
a658cbfd
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry
kind=
"src"
path=
"src"
/>
<classpathentry
kind=
"con"
path=
"org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"
>
<attributes>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"con"
path=
"org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"
>
<attributes>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"output"
path=
"target/classes"
/>
</classpath>
de.uniwue.ls6.rulelearning/InstanceLoading/.gitignore
0 → 100644
View file @
a658cbfd
/target/
de.uniwue.ls6.rulelearning/InstanceLoading/.project
0 → 100644
View file @
a658cbfd
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>
InstanceLoading
</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>
org.eclipse.jdt.core.javabuilder
</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>
org.eclipse.m2e.core.maven2Builder
</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>
org.eclipse.jdt.core.javanature
</nature>
<nature>
org.eclipse.m2e.core.maven2Nature
</nature>
</natures>
</projectDescription>
de.uniwue.ls6.rulelearning/InstanceLoading/.settings/org.eclipse.core.resources.prefs
0 → 100644
View file @
a658cbfd
eclipse.preferences.version=1
encoding/<project>=UTF-8
de.uniwue.ls6.rulelearning/InstanceLoading/.settings/org.eclipse.jdt.core.prefs
0 → 100644
View file @
a658cbfd
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
de.uniwue.ls6.rulelearning/InstanceLoading/.settings/org.eclipse.m2e.core.prefs
0 → 100644
View file @
a658cbfd
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
de.uniwue.ls6.rulelearning/InstanceLoading/pom.xml
0 → 100644
View file @
a658cbfd
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<parent>
<groupId>
de.uniwue.ls6.rulelearning
</groupId>
<relativePath>
../
</relativePath>
<artifactId>
ruleLearningParent
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
</parent>
<groupId>
de.uniwue.ls6.rulelearning
</groupId>
<artifactId>
InstanceLoading
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
<packaging>
jar
</packaging>
<name>
InstanceLoading
</name>
<url>
http://maven.apache.org
</url>
<properties>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>
junit
</groupId>
<artifactId>
junit
</artifactId>
<version>
3.8.1
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
de.uniwue.ls6.rulelearning
</groupId>
<artifactId>
DataStructure
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
</dependency>
<dependency>
<groupId>
org.apache.uima
</groupId>
<artifactId>
uimaj-core
</artifactId>
<version>
2.9.0
</version>
</dependency>
<dependency>
<groupId>
org.apache.uima
</groupId>
<artifactId>
uimafit-core
</artifactId>
<version>
2.2.0
</version>
</dependency>
</dependencies>
</project>
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/AFeatureGenerator.java
0 → 100644
View file @
a658cbfd
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
abstract
class
AFeatureGenerator
{
public
abstract
String
[]
generateFeatures
(
AnnotationFS
token
);
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/IsUppercaseFeatureGenerator.java
0 → 100644
View file @
a658cbfd
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
class
IsUppercaseFeatureGenerator
extends
AFeatureGenerator
{
public
static
final
String
LOWERCASE
=
"Lowercase"
;
public
static
final
String
UPPERCASE
=
"Uppercase"
;
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
return
new
String
[]{
Character
.
isUpperCase
(
token
.
getCoveredText
().
charAt
(
0
))?
UPPERCASE:
LOWERCASE
};
}
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/featuregenerator/WordFeaturegenerator.java
0 → 100644
View file @
a658cbfd
package
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator
;
import
org.apache.uima.cas.text.AnnotationFS
;
public
class
WordFeaturegenerator
extends
AFeatureGenerator
{
@Override
public
String
[]
generateFeatures
(
AnnotationFS
token
)
{
return
new
String
[]{
token
.
getCoveredText
()};
}
}
de.uniwue.ls6.rulelearning/InstanceLoading/src/de/uniwue/ls6/rulelearning/instanceloading/io/InstanceCreationFactory.java
0 → 100644
View file @
a658cbfd
package
de.uniwue.ls6.rulelearning.instanceloading.io
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Queue
;
import
org.apache.uima.cas.CAS
;
import
org.apache.uima.cas.Type
;
import
org.apache.uima.cas.impl.XmiCasDeserializer
;
import
org.apache.uima.cas.text.AnnotationFS
;
import
org.apache.uima.resource.ResourceInitializationException
;
import
org.apache.uima.resource.metadata.TypeSystemDescription
;
import
org.apache.uima.util.CasCreationUtils
;
import
org.xml.sax.SAXException
;
import
de.uniwue.ls6.datastructure.Instance
;
import
de.uniwue.ls6.datastructure.LabelAlphabet
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.AFeatureGenerator
;
public
class
InstanceCreationFactory
{
public
static
List
<
Instance
>
createWindowedInstancesFromUIMA
(
File
fileToDocument
,
int
leftWindowsize
,
int
rightWindowSize
,
String
tokentypeS
,
TypeSystemDescription
typesystem
,
AFeatureGenerator
goldGenerator
,
AFeatureGenerator
...
generators
)
throws
ResourceInitializationException
,
SAXException
,
IOException
{
// deserialize
CAS
cas
=
CasCreationUtils
.
createCas
(
typesystem
,
null
,
null
);
FileInputStream
fis
=
new
FileInputStream
(
fileToDocument
);
XmiCasDeserializer
.
deserialize
(
fis
,
cas
);
Type
tokentype
=
cas
.
getTypeSystem
().
getType
(
tokentypeS
);
fis
.
close
();
//
List
<
Instance
>
instances
=
new
ArrayList
<
Instance
>();
int
windowSize
=
leftWindowsize
+
1
+
rightWindowSize
;
Queue
<
List
<
String
>>
windowQueue
=
new
LinkedList
<
List
<
String
>>();
List
<
String
>
labelList
=
new
ArrayList
<
String
>();
// init leftWindowsize empty lists
for
(
int
i
=
0
;
i
<
leftWindowsize
;
i
++)
{
windowQueue
.
add
(
new
LinkedList
<
String
>());
}
int
tokenIndex
=
0
;
for
(
AnnotationFS
token
:
cas
.
getAnnotationIndex
(
tokentype
))
{
String
goldFeature
=
goldGenerator
.
generateFeatures
(
token
)[
0
];
labelList
.
add
(
goldFeature
);
List
<
String
>
featureForToken
=
generateFeatureForToken
(
token
,
generators
);
// generate the instance with the help of the queue
// update the queue
if
(
windowQueue
.
size
()
>=
windowSize
)
{
Instance
inst
=
generateInstanceFromQueue
(
windowQueue
,
windowSize
,
labelList
.
get
(
tokenIndex
));
instances
.
add
(
inst
);
windowQueue
.
poll
();
tokenIndex
++;
}
windowQueue
.
add
(
featureForToken
);
}
// add remaining tokens at the end of the token stream
for
(
int
i
=
0
;
i
<
rightWindowSize
;
i
++)
{
windowQueue
.
poll
();
windowQueue
.
add
(
new
LinkedList
<
String
>());
Instance
inst
=
generateInstanceFromQueue
(
windowQueue
,
windowSize
,
labelList
.
get
(
tokenIndex
));
instances
.
add
(
inst
);
tokenIndex
++;
}
return
instances
;
}
private
static
Instance
generateInstanceFromQueue
(
Queue
<
List
<
String
>>
windowQueue
,
int
windowSize
,
String
goldfeature
)
{
int
maxNrRows
=
0
;
for
(
List
<
String
>
features
:
windowQueue
)
{
if
(
features
.
size
()
>
maxNrRows
)
maxNrRows
=
features
.
size
();
}
int
idToFeature
=
LabelAlphabet
.
getIdToFeature
(
goldfeature
);
Instance
instance
=
new
Instance
(
maxNrRows
,
windowSize
,
idToFeature
);
//fill the data
int
colNr
=
0
;
for
(
List
<
String
>
tokenFeatures
:
windowQueue
){
for
(
int
i
=
0
;
i
<
tokenFeatures
.
size
();
i
++){
instance
.
setFeatureAt
(
colNr
,
i
,
LabelAlphabet
.
getIdToFeature
(
tokenFeatures
.
get
(
i
)));
}
colNr
++;
}
return
instance
;
}
private
static
List
<
String
>
generateFeatureForToken
(
AnnotationFS
token
,
AFeatureGenerator
[]
generators
)
{
List
<
String
>
features
=
new
ArrayList
<
String
>();
for
(
AFeatureGenerator
generator
:
generators
)
{
features
.
addAll
(
Arrays
.
asList
(
generator
.
generateFeatures
(
token
)));
}
return
features
;
}
}
de.uniwue.ls6.rulelearning/RuleLearning/pom.xml
View file @
a658cbfd
...
...
@@ -9,6 +9,11 @@
<artifactId>
DataStructure
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
</dependency>
<dependency>
<groupId>
de.uniwue.ls6.rulelearning
</groupId>
<artifactId>
InstanceLoading
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
</dependency>
</dependencies>
<parent>
<groupId>
de.uniwue.ls6.rulelearning
</groupId>
...
...
de.uniwue.ls6.rulelearning/RuleLearning/src/test/FirstTest.java
0 → 100644
View file @
a658cbfd
package
test
;
import
java.io.File
;
import
java.io.IOException
;
import
java.util.List
;
import
org.apache.uima.fit.factory.TypeSystemDescriptionFactory
;
import
org.apache.uima.resource.ResourceInitializationException
;
import
org.apache.uima.resource.metadata.TypeSystemDescription
;
import
org.xml.sax.SAXException
;
import
de.uniwue.ls6.datastructure.Instance
;
import
de.uniwue.ls6.datastructure.LabelAlphabet
;
import
de.uniwue.ls6.rulelearning.algorithm.impl.BinaryRepresentationRuleLearningAlgorithm
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.IsUppercaseFeatureGenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.featuregenerator.WordFeaturegenerator
;
import
de.uniwue.ls6.rulelearning.instanceloading.io.InstanceCreationFactory
;
public
class
FirstTest
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
File
document
=
new
File
(
"C:\\Users\\mkrug\\annoTest\\TestProject\\input\\Aston,-Louise__Lydia.xmi.xmi.xmi"
);
File
typesystem
=
new
File
(
"C:\\Users\\mkrug\\annoTest\\TestProject\\descriptor\\MiKalliTypesystem.xml"
);
BinaryRepresentationRuleLearningAlgorithm
algorithm
=
new
BinaryRepresentationRuleLearningAlgorithm
(
LabelAlphabet
.
getIdToFeature
(
IsUppercaseFeatureGenerator
.
LOWERCASE
),
LabelAlphabet
.
getIdToFeature
(
IsUppercaseFeatureGenerator
.
UPPERCASE
));
TypeSystemDescription
tsd
=
TypeSystemDescriptionFactory
.
createTypeSystemDescriptionFromPath
(
typesystem
.
toURL
().
toString
());
List
<
Instance
>
instances
=
InstanceCreationFactory
.
createWindowedInstancesFromUIMA
(
document
,
2
,
2
,
"de.uniwue.kalimachos.coref.type.POS"
,
tsd
,
new
IsUppercaseFeatureGenerator
(),
new
WordFeaturegenerator
());
System
.
out
.
println
(
instances
.
size
());
algorithm
.
learn
(
instances
.
toArray
(
new
Instance
[
0
]));
}
}
de.uniwue.ls6.rulelearning/pom.xml
View file @
a658cbfd
...
...
@@ -8,5 +8,6 @@
<module>
DataStructure
</module>
<module>
Evaluation
</module>
<module>
RuleLearning
</module>
<module>
InstanceLoading
</module>
</modules>
</project>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment