diff --git a/pom.xml b/pom.xml index c1b59250ad93973b94e9d4fe95fdad97e06d72f2..61e3cf17d776320b60ace816baf0fa79f21e25a2 100644 --- a/pom.xml +++ b/pom.xml @@ -12,6 +12,16 @@ <maven.compiler.target>1.8</maven.compiler.target> <maven.compiler.source>1.8</maven.compiler.source> </properties> + + <repositories> + <repository> + <id>ifi-central</id> + <name>artifactory-releases</name> + <url>http://artifactory-ls6.informatik.uni-wuerzburg.de/artifactory/libs-release + </url> + </repository> + </repositories> + <dependencies> <dependency> <groupId>org.jetbrains.kotlin</groupId> @@ -23,6 +33,28 @@ <artifactId>junit</artifactId> <version>4.8.2</version> </dependency> + + <dependency> + <groupId>com.sparkjava</groupId> + <artifactId>spark-kotlin</artifactId> + <version>1.0.0-alpha</version> + </dependency> + <dependency> + <groupId>com.google.code.gson</groupId> + <artifactId>gson</artifactId> + <version>2.8.6</version> + </dependency> + + <dependency> + <groupId>de.uniwue.aries</groupId> + <artifactId>aries-ontology-model-io</artifactId> + <version>1.0.0</version> + </dependency> + <dependency> + <groupId>de.uniwue.aries</groupId> + <artifactId>aries-algorithm</artifactId> + <version>1.1.13</version> + </dependency> </dependencies> <build> <sourceDirectory>src/main/kotlin</sourceDirectory> @@ -66,6 +98,11 @@ <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> + <archive> + <manifest> + <mainClass>de.dzptm.server.ServerKt</mainClass> + </manifest> + </archive> </configuration> <executions> <execution> diff --git a/src/main/kotlin/AnordnungReader.kt b/src/main/kotlin/AnordnungReader.kt index d41f5ab6670247d7b32285bf56fadecdf2c5c11c..3f7fc84a27b225a135ac0ebae89dd1ff23e1fd1a 100644 --- a/src/main/kotlin/AnordnungReader.kt +++ b/src/main/kotlin/AnordnungReader.kt @@ -4,6 +4,7 @@ import java.nio.file.Paths val inputPathAnordnung = Paths.get("C:\\users\\jok3r\\domains\\anordnungen\\anordnungen.txt") val inputPathAuftraege = Paths.get("C:\\users\\jok3r\\domains\\anordnungen\\N1CORDER.txt") val outputHTML = Paths.get("C:\\users\\jok3r\\domains\\anordnungen\\anordnung2auftrag.html") +val outputFolder = "C:\\users\\jok3r\\domains\\anordnungen\\training00\\" fun main() { val anordnungen = readAnordnungen() @@ -17,31 +18,31 @@ fun main() { } }.filterNotNull() val counter = mutableMapOf<String, Double>() - pairs.forEach { + pairs.shuffled().subList(0,100).forEach { counter.putIfAbsent(it.second["CORDTITLE"]!!, 0.0) counter.computeIfPresent(it.second["CORDTITLE"]!!) { _, v -> v + 1} + val content = it.first["CONTENT"] + Files.writeString(Paths.get(outputFolder + it.first["Dokumentnummer"]!!.trimStart('0')+".txt"), content) } - counter.keys.forEach { - counter[it] = counter.get(it)!! / pairs.size + println(pairs.size) + counter.forEach { t, u -> + println("$t\t$u") } - println(counter) + } -//fun main() { -// val anordnungen = readAnordnungen() -// val auftraege = readAuftraege() -// val pairs = anordnungen.map { -// try { -// it to getAuftrag(it, auftraege) -// } catch (e: Exception) { -// println(e.message) -// null -// } -// }.filterNotNull() -// val html = createHTMLPage(pairs.shuffled().subList(0,20)) -// Files.deleteIfExists(outputHTML) -// Files.writeString(outputHTML, html) -//} +fun getPairs(): List<Pair<Map<String, String>, Map<String, String>>> { + val anordnungen = readAnordnungen() + val auftraege = readAuftraege() + return anordnungen.map { + try { + it to getAuftrag(it, auftraege) + } catch (e: Exception) { + println(e.message) + null + } + }.filterNotNull() +} private fun createHTMLPage(pairs: List<Pair<Map<String, String>, Map<String, String>>>): String { var html = "<html>\n<body>\n<table>\n" @@ -87,7 +88,8 @@ private fun getAuftrag(anordnung: Map<String, String>, auftraege: List<Map<Strin private fun readAuftraege(): List<Map<String, String>> { val lines = Files.readAllLines(inputPathAuftraege) val headers = lines[0].split("\t") - val whitelist = listOf<String>("PATNR", "CORDTITLE", "KANAM", "FRAGE", "SCHWKZ", "ERDAT", "ERTIM", "ERUSR", "IFG") + //val whitelist = listOf<String>("PATNR", "CORDTITLE", "KANAM", "FRAGE", "SCHWKZ", "ERDAT", "ERTIM", "ERUSR", "IFG") + val whitelist = null return readCSVData(lines, headers, whitelist) } diff --git a/src/main/kotlin/ExtractorTest.kt b/src/main/kotlin/ExtractorTest.kt new file mode 100644 index 0000000000000000000000000000000000000000..dbca726aa9d018ab53ae4cbef97d353ba69f3df3 --- /dev/null +++ b/src/main/kotlin/ExtractorTest.kt @@ -0,0 +1,44 @@ +import de.uniwue.aries.algorithm.ARIESAlgorithm +import de.uniwue.aries.ontology.io.ExcelFileReader +import de.uniwue.aries.uima.types.Types +import org.apache.uima.util.CasCreationUtils + +fun main() { + val ontology = ExcelFileReader.read("C:\\users\\jok3r\\domains\\anordnungen\\KlinischerAuftrag20201105.xlsx") + val algo = ARIESAlgorithm() + val pairs = getPairs() + val system = pairs.map { it.first["CONTENT"] }.map { + val cas = CasCreationUtils.createCas(Types.getTypeSystem(), null, null) + cas.documentText = it + algo.extract(cas, ontology) + val tEntity = Types.getType(cas, Types.IEENTITY) + val owlid = cas.getAnnotationIndex(tEntity) + .sortedBy { it.begin } + .map { it.getFeatureValueAsString(tEntity.getFeatureByBaseName("owlid")) } + .firstOrNull() + val result = when (owlid) { + "ModalitätCT" -> "Computer-Tomographie" + "Modalitätkonv. Röntgen" -> "konv. Röntgen" + "ModalitätMR-Tomographie" -> "MR - Tomographie Rö" + "ModalitätUltraschall" -> "Ultraschall" + null -> "nothingFound" + else -> owlid + } + return@map result + } + val gold = pairs.map { it.second["CORDTITLE"]} + val rights = mutableMapOf<String, Double>() + val wrongs = mutableMapOf<String, Double>() + for (i in system.indices) { + if (system[i] == gold[i]) { + rights.computeIfPresent(gold[i]!!) {_, v -> v + 1 } + rights.putIfAbsent(gold[i]!!, 1.0) + } else { + wrongs.computeIfPresent(gold[i]!!) {_, v -> v + 1 } + wrongs.putIfAbsent(gold[i]!!, 1.0) + } + } + for (key in wrongs.keys) { + println("$key: ${rights.getOrDefault(key, 0.0)}/${rights.getOrDefault(key, 0.0) + wrongs[key]!!} = ${rights.getOrDefault(key, 0.0) / (rights.getOrDefault(key, 0.0) + wrongs[key]!!)}") + } +} \ No newline at end of file diff --git a/src/main/kotlin/de/dzptm/server/Extractor.kt b/src/main/kotlin/de/dzptm/server/Extractor.kt new file mode 100644 index 0000000000000000000000000000000000000000..a35f9ce08dd11f499b3c71e2a3c90212ec62d8af --- /dev/null +++ b/src/main/kotlin/de/dzptm/server/Extractor.kt @@ -0,0 +1,70 @@ +package de.dzptm.server + +import com.google.gson.Gson +import de.uniwue.aries.algorithm.ARIESAlgorithm +import de.uniwue.aries.ontology.io.ExcelFileReader +import de.uniwue.aries.uima.types.Types +import org.apache.uima.cas.CAS +import org.apache.uima.util.CasCreationUtils +import spark.Request +import spark.Response + +object Extractor { + fun extract(req: Request, res: Response): String { + val temp = req.queryParamOrDefault("input", "no input") + val input = Gson().fromJson(temp, Input::class.java) + + val cas = CasCreationUtils.createCas(Types.getTypeSystem(), null, null) + cas.documentText = input.doctext + + applyARIES(cas) + val modality = getModality(cas) + + //TODO do something meaningful + val output = Output( + input.docid, + input.doctime, + modality, + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + emptyList<Service>().toMutableList() + ) + return Gson().toJson(output) + } + + private fun applyARIES(cas: CAS) { + val ontology = ExcelFileReader.read(ClassLoader.getSystemResourceAsStream("KlinischerAuftrag20201105.xlsx")) + val algo = ARIESAlgorithm() + algo.extract(cas, ontology) + } + + private fun getModality(cas: CAS): String { + val tEntity = Types.getType(cas, Types.IEENTITY) + val owlid = cas.getAnnotationIndex(tEntity) + .sortedBy { it.begin } + .map { it.getFeatureValueAsString(tEntity.getFeatureByBaseName("owlid")) } + .firstOrNull() + val result = when (owlid) { + "ModalitätCT" -> "Computer-Tomographie" + "Modalitätkonv. Röntgen" -> "konv. Röntgen" + "ModalitätMR-Tomographie" -> "MR - Tomographie Rö" + "ModalitätUltraschall" -> "Ultraschall" + null -> "nothingFound" + else -> owlid + } + return result + } +} + diff --git a/src/main/kotlin/de/dzptm/server/IO.kt b/src/main/kotlin/de/dzptm/server/IO.kt new file mode 100644 index 0000000000000000000000000000000000000000..e6f6b37363fbe38b5ac86474fffc9720ccd03c00 --- /dev/null +++ b/src/main/kotlin/de/dzptm/server/IO.kt @@ -0,0 +1,27 @@ +package de.dzptm.server + +data class Input(val docid: String, val doctime: String, val doctext: String) + +data class Service(val code: String, val description: String, val number: String, val localisation: String) + +data class Output ( + val docid: String, + val doctime: String, + val modality: String, + val anamnesis: String, + val diagnosis: String, + val question: String, + val notice: String, + val pregnancy: String, + val pregnancy_week: String, + val infection: String, + val implantate: String, + val pacemaker: String, + val allergy: String, + val allergy_type_iodine: String, + val allergy_type_other: String, + val preferred_date: String, + val preferred_date_cycle: String, + val services: MutableList<Service> +) + diff --git a/src/main/kotlin/de/dzptm/server/Routes.kt b/src/main/kotlin/de/dzptm/server/Routes.kt new file mode 100644 index 0000000000000000000000000000000000000000..41fb60a9cafe8492731ac354097ff61dc873eb87 --- /dev/null +++ b/src/main/kotlin/de/dzptm/server/Routes.kt @@ -0,0 +1,10 @@ +package de.dzptm.server + +import spark.Spark.get +import spark.Spark.post + +object Routes { + fun register(): Unit { + get("/extract") { req, res -> Extractor.extract(req, res) } + } +} \ No newline at end of file diff --git a/src/main/kotlin/de/dzptm/server/Server.kt b/src/main/kotlin/de/dzptm/server/Server.kt new file mode 100644 index 0000000000000000000000000000000000000000..ab822c820c2cfb28c5a101d10a423e6ac5418390 --- /dev/null +++ b/src/main/kotlin/de/dzptm/server/Server.kt @@ -0,0 +1,17 @@ +package de.dzptm.server + +import spark.kotlin.port + +fun main(args: Array<String>) { + if (args.isNotEmpty()) { + val port = args[0].toIntOrNull() + if (port != null) { + port(port) + } + } else { + port(8080) + println("No valid port given... defaulting to 8080.") + } + Routes.register() + +} \ No newline at end of file diff --git a/src/main/resources/KlinischerAuftrag20201105.xlsx b/src/main/resources/KlinischerAuftrag20201105.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..738026567d78d41ad75de1ab415030e790dede9d Binary files /dev/null and b/src/main/resources/KlinischerAuftrag20201105.xlsx differ