Commit 6c4daf41 authored by Markus Krug's avatar Markus Krug
Browse files

urlaub update

parent 3092cf31
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry exported="true" kind="lib" path="libs/ejml-0.25.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
<classpathentry kind="src" path="src"/>
<classpathentry kind="output" path="bin"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>de.uniwue.mk.kall.jkernelmachines.test</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.pde.ManifestBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.pde.SchemaBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.pde.PluginNature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
Manifest-Version: 1.0
Bundle-ManifestVersion: 2
Bundle-Name: Test
Bundle-SymbolicName: de.uniwue.mk.kall.jkernelmachines.test
Bundle-Version: 1.0.0.qualifier
Bundle-ClassPath: libs/ejml-0.25.jar,
.
Bundle-RequiredExecutionEnvironment: JavaSE-1.8
Require-Bundle: de.uniwue.mkrug.kall.typesystemUtil;bundle-version="1.0.0"
source.. = src/
output.. = bin/
bin.includes = META-INF/,\
.,\
libs/ejml-0.25.jar
package de.uniwue.mk.kall.jkernelmachines.test.relationkernel;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
import fr.lip6.jkernelmachines.classifier.LaSVM;
import fr.lip6.jkernelmachines.classifier.multiclass.OneAgainstAll;
import fr.lip6.jkernelmachines.type.TrainingSample;
public class MainTestStringKernel {
static HashMap<String, Integer> labelMap = new HashMap<>();
public static void main(String[] args) throws Exception {
// checking arguments
File trainRelations = new File("C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\RelationenDaten\\NECategoryTrain2");
File testRelations = new File("C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\RelationenDaten\\NECategoryTest2");
List<TrainingSample<String>> samples = createTrainingSamples(trainRelations);
Collections.shuffle(samples, new Random(13374211));
// parsing samples from a file using libsvm format
// setting kernel
StringKernel kernel = new StringKernel();
// setting SVM parameters
LaSVM<String> svm = new LaSVM<String>(kernel);
OneAgainstAll<String> mcsvm = new OneAgainstAll<String>(svm);
List<TrainingSample<String>> train = new ArrayList<TrainingSample<String>>();
List<TrainingSample<String>> test = new ArrayList<TrainingSample<String>>();
for (int i = 0; i < samples.size(); i++) {
if (i % 3 == 0)
test.add(samples.get(i));
else {
train.add(samples.get(i));
}
}
mcsvm.train(train);
System.out.println(test.size());
int wrong = 0;
for (TrainingSample<String> sample : test) {
if (sample.label != mcsvm.valueOf(sample.sample)) {
System.out.println(sample.sample + "\t" + mcsvm.valueOf(sample.sample));
wrong++;
}
}
System.out.println(wrong);
double valueOf = mcsvm.valueOf("Markus#Marius");
System.out.println(valueOf);
}
private static List<TrainingSample<String>> createTrainingSamples(File trainFile) throws IOException, Exception {
List<TrainingSample<String>> samples = new ArrayList<>();
CAS cas = Util_impl.createCas();
for (File f : trainFile.listFiles()) {
XmiCasDeserializer.deserialize(new FileInputStream(f), cas);
Util_impl util = new Util_impl(cas);
// create all pairs
}
return samples;
}
}
package de.uniwue.mk.kall.jkernelmachines.test.relationkernel;
import fr.lip6.jkernelmachines.kernel.Kernel;
public class StringKernel extends Kernel<String> {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public double valueOf(String t1, String t2) {
int n = 4;
// we build a vector for the pair t1
return K(t1, t2, n);
}
public double K(String s, String t, int n) {
// compute self kernels if not in hashmap
double ss, tt;
tt = sK(t, t, n);
ss = sK(s, s, n);
double st = sK(s, t, n);
// normalize
return st / Math.sqrt(ss * tt);
}
private double sK(String s, String t, int n) {
double sum, r = 0.0;
int i, j, k;
int slen = s.length();
int tlen = t.length();
double[][] K = new double[n + 1][(slen + 1) * (tlen + 1)];
for (j = 0; j < (slen + 1); j++)
for (k = 0; k < (tlen + 1); k++)
K[0][k * (slen + 1) + j] = 1;
for (i = 0; i < n; i++) {
for (j = 0; j < slen; j++) {
sum = 0.0;
for (k = 0; k < tlen; k++) {
if (t.charAt(k) == s.charAt(j)) {
sum += K[i][k * (slen + 1) + j];
}
K[i + 1][(k + 1) * (slen + 1) + j + 1] = K[i + 1][(k + 1) * (slen + 1) + j] + sum;
}
}
r = r + K[i + 1][tlen * (slen + 1) + slen];
}
return r;
}
@Override
public double valueOf(String t1) {
return valueOf(t1, t1);
}
}
\ No newline at end of file
package de.uniwue.mk.test;
import java.util.ArrayList;
import java.util.List;
public class MainTest {
public static void main(String[] args) {
generateAllSubstringAsObjects("Markus", 2);
}
private static List<SubstringObj> generateAllSubstringAsObjects(String s1, int len) {
List<SubstringObj> substringObjects = new ArrayList<SubstringObj>();
// the index we start to build the substring, starting at index 0
for (int i = 0; i < s1.length() - len + 1; i++) {
// we add a gap starting at a gap size of 0
for (int gap = 0; gap < s1.length() - len + 1 - i; gap++) {
String sub = s1.charAt(i) + s1.substring(i + gap + 1, i + gap + len);
SubstringObj obj = new SubstringObj(sub, gap + len, i);
System.out.println(obj);
substringObjects.add(obj);
}
}
return substringObjects;
}
}
package de.uniwue.mk.test;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import fr.lip6.jkernelmachines.classifier.LaSVM;
import fr.lip6.jkernelmachines.classifier.multiclass.OneAgainstAll;
import fr.lip6.jkernelmachines.type.TrainingSample;
public class MainTestStringKernel {
public static void main(String[] args) throws IOException {
// checking arguments
File nickNameTrain = new File("C:\\owncloud\\resources\\Spitznamen\\spitznamen.txt");
List<TrainingSample<String>> samples = createTrainingSamples(nickNameTrain);
Collections.shuffle(samples, new Random(13374211));
// parsing samples from a file using libsvm format
// setting kernel
NickNameSelfKernel kernel = new NickNameSelfKernel();
// setting SVM parameters
LaSVM<String> svm = new LaSVM<String>(kernel);
OneAgainstAll<String> mcsvm = new OneAgainstAll<String>(svm);
List<TrainingSample<String>> train = new ArrayList<TrainingSample<String>>();
List<TrainingSample<String>> test = new ArrayList<TrainingSample<String>>();
for (int i = 0; i < samples.size(); i++) {
if (i % 3 == 0)
test.add(samples.get(i));
else {
train.add(samples.get(i));
}
}
mcsvm.train(train);
System.out.println(test.size());
int wrong = 0;
for (TrainingSample<String> sample : test) {
if (sample.label != mcsvm.valueOf(sample.sample)) {
// System.out.println(sample.sample + "\t" +
// mcsvm.valueOf(sample.sample));
wrong++;
} else {
System.out.println(sample.sample + "\t" + mcsvm.valueOf(sample.sample));
}
}
System.out.println(wrong);
double valueOf = mcsvm.valueOf("Markus#Marius");
System.out.println(valueOf);
// // doing crossvalidation with multiclass accuracy
// MulticlassAccuracyEvaluator<String> eval = new
// MulticlassAccuracyEvaluator<String>();
// NFoldCrossValidation<String> cv = new NFoldCrossValidation<String>(5,
// mcsvm, samples, eval);
//
// // launch cv
// cv.run();
//
// // print results
// System.out.println("Multiclass accuracy: " + cv.getAverageScore() +
// " +/- "
// + cv.getStdDevScore());
}
private static List<TrainingSample<String>> createTrainingSamples(File nickNameTrain) throws IOException {
List<String> lines = Files.readAllLines(nickNameTrain.toPath());
List<TrainingSample<String>> train = new ArrayList<TrainingSample<String>>();
for (String line : lines) {
String[] split = line.split("\t");
// a positive sample for each combination
for (String s : split) {
for (String k : split) {
train.add(new TrainingSample<String>((s + "#" + k).toLowerCase(), 1));
}
}
}
// negative examples for each combination through the lines
for (int i = 0; i < lines.size() - 1; i++) {
String[] names1 = lines.get(i).split("\t");
String[] names2 = lines.get(i + 1).split("\t");
for (String s : names1) {
for (String k : names2) {
train.add(new TrainingSample<String>((s + "#" + k).toLowerCase(), 0));
}
}
}
return train;
}
}
package de.uniwue.mk.test;
import fr.lip6.jkernelmachines.kernel.Kernel;
public class NickNameKernel extends Kernel<String> {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public double valueOf(String t1, String t2) {
int n = 4;
// we build a vector for the pair t1
double[] t1Vec = calculateStringKernelVector(t1, n);
double[] t2Vec = calculateStringKernelVector(t2, n);
// cosine distance of t1Vec and t2Vec
// double cosineDist = calculateCosineDistance(t1Vec, t2Vec);
double diceDist = calculateDiceDistance(t1Vec, t2Vec);
// double jaccardDist = calculateJaccardDistance(t1Vec, t2Vec);
// System.out.println(t1 + "\t" + t2 + "\t" + diceDist);
return diceDist;
}
private double[] calculateStringKernelVector(String t1, int n) {
double[] t1Vec = new double[n + 2];
for (int i = 1; i <= n; i++) {
double val = K(t1.split("#")[0], t1.split("#")[1], i);
t1Vec[i - 1] = val;
}
// add additional entries
// endsWith
t1Vec[n] = (t1.split("#")[0].endsWith(t1.split("#")[1]) || t1.split("#")[1].endsWith(t1
.split("#")[0])) ? 1 : 0;
// contains
t1Vec[n + 1] = (t1.split("#")[0].contains(t1.split("#")[1]) || t1.split("#")[1].contains(t1
.split("#")[0])) ? 1 : 0;
// only keep for each k did not succees to improve the score
// for (int i = t1Vec.length - 1; i > 0; i--) {
//
// t1Vec[i] = t1Vec[i] - t1Vec[i - 1];
//
// if (t1Vec[i] < 0)
// t1Vec[i] = 0;
// }
return t1Vec;
}
private double calculateCosineDistance(double[] t1Vec, double[] t2Vec) {
double numerator = 0.0;
double len1 = 0.0;
double len2 = 0.0;
for (int i = 0; i < t1Vec.length; i++) {
numerator += t1Vec[i] * t2Vec[i];
len1 += t1Vec[i] * t1Vec[i];
len2 += t2Vec[i] * t2Vec[i];
}
double denum = Math.sqrt(len1) * Math.sqrt(len2);
double cosine = 0.0;
if (denum != 0.0d) {
cosine = numerator / denum;
}
return cosine;
}
private double calculateDiceDistance(double[] t1Vec, double[] t2Vec) {
double numerator = 0.0;
double denom = 0.0;
for (int i = 0; i < t1Vec.length; i++) {
numerator += 2 * Math.min(t1Vec[i], t2Vec[i]);
denom += t1Vec[i] + t2Vec[i];
}
if (denom == 0.0d)
return 0;
return numerator / denom;
}
private double calculateJaccardDistance(double[] t1Vec, double[] t2Vec) {
double numerator = 0.0;
double denom = 0.0;
for (int i = 0; i < t1Vec.length; i++) {
numerator += Math.min(t1Vec[i], t2Vec[i]);
denom += Math.max(t1Vec[i], t2Vec[i]);
}
if (denom == 0.0d)
return 0;
return numerator / denom;
}
public double K(String s, String t, int n) {
// compute self kernels if not in hashmap
double ss, tt;
tt = sK(t, t, n);
ss = sK(s, s, n);
double st = sK(s, t, n);
// normalize
return st / Math.sqrt(ss * tt);
}
private double sK(String s, String t, int n) {
double sum, r = 0.0;
int i, j, k;
int slen = s.length();
int tlen = t.length();
double[][] K = new double[n + 1][(slen + 1) * (tlen + 1)];
for (j = 0; j < (slen + 1); j++)
for (k = 0; k < (tlen + 1); k++)
K[0][k * (slen + 1) + j] = 1;
for (i = 0; i < n; i++) {
for (j = 0; j < slen; j++) {
sum = 0.0;
for (k = 0; k < tlen; k++) {
if (t.charAt(k) == s.charAt(j)) {
sum += K[i][k * (slen + 1) + j];
}
K[i + 1][(k + 1) * (slen + 1) + j + 1] = K[i + 1][(k + 1) * (slen + 1) + j] + sum;
}
}
r = r + K[i + 1][tlen * (slen + 1) + slen];
}
return r;
}
@Override
public double valueOf(String t1) {
return valueOf(t1, t1);
}
}
package de.uniwue.mk.test;
import java.util.ArrayList;
import java.util.List;
import fr.lip6.jkernelmachines.kernel.Kernel;
public class NickNameSelfKernel extends Kernel<