Commit a804871d authored by Georg Fette's avatar Georg Fette
Browse files

extracted utils project in external artifact

parent 18df6f4c
......@@ -12,16 +12,30 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<repositories>
<repository>
<id>snapshots</id>
<url>http://artifactory-ls6.informatik.uni-wuerzburg.de/artifactory/libs-snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>ethercis</groupId>
<artifactId>aql-processor</artifactId>
<version>1.2.0-SNAPSHOT</version>
<version>1.2.0-20190205.095033-1</version>
</dependency>
<dependency>
<groupId>misbased-dw</groupId>
<artifactId>misbased-dw-core-util</artifactId>
<version>2.1-SNAPSHOT</version>
<groupId>UniWueUtils</groupId>
<artifactId>UniWueUtils</artifactId>
<version>0.0.1-20190228.123846-1</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>4.3.3.RELEASE</version>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
......
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>misbased-dw-core-parent</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding/<project>=UTF-8
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>misbased-dw</groupId>
<artifactId>misbased-dw-core-parent</artifactId>
<version>2.1-SNAPSHOT</version>
<profiles>
<profile>
<id>changeTargetDir</id>
<activation>
<property>
<name>changedTargetDir</name>
</property>
</activation>
<build>
<directory>${changedTargetDir}/${project.artifactId}</directory>
</build>
</profile>
</profiles>
<packaging>pom</packaging>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<!-- -->
<name>Data Warehouse - Application Logic and Data Tier</name>
<organization>
<name>Universität Würzburg, Lehrstuhl für Informatik VI</name>
<url>www.is.informatik.uni-wuerzburg.de/</url>
</organization>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.8.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-web</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>net.sourceforge.jtds</groupId>
<artifactId>jtds</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.39</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>7.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.6</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
<version>7.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-cell</artifactId>
<version>7.0.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.6.0</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4</artifactId>
<version>4.7.1</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>4.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-email</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.16</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>4.3.3.RELEASE</version>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>misbased-dw-core-util</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.wst.common.project.facet.core.builder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.wst.validation.validationbuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.8
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
<?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
<wb-module deploy-name="misbased-dw-core-util">
<wb-resource deploy-path="/" source-path="/src/main/java"/>
<wb-resource deploy-path="/" source-path="/src/main/resources"/>
</wb-module>
</project-modules>
<?xml version="1.0" encoding="UTF-8"?>
<faceted-project>
<installed facet="java" version="1.8"/>
<installed facet="jst.utility" version="1.0"/>
</faceted-project>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>misbased-dw</groupId>
<artifactId>misbased-dw-core-parent</artifactId>
<version>2.1-SNAPSHOT</version>
<relativePath>../misbased-dw-core-parent/pom.xml</relativePath>
</parent>
<artifactId>misbased-dw-core-util</artifactId>
<dependencies>
<dependency>
<groupId>net.sourceforge.jtds</groupId>
<artifactId>jtds</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>2.3.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package de.uniwue.misc.sql;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Locale;
import java.util.Random;
import java.util.Set;
import org.apache.commons.io.FilenameUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import de.uniwue.misc.util.StringUtilsUniWue;
public class BulkInserter {
private static Logger logger = LogManager.getLogger(BulkInserter.class);
public static final String DEFAULT_FIELD_TERMINATOR = "|~|";
public static final String DEFAULT_ROW_TERMINATOR = "$~$";
public static final int FLUSH_AFTER_ROWS = 1000000;
private DecimalFormat df;
private static DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
private String fieldTerminator = DEFAULT_FIELD_TERMINATOR;
private String rowTerminator = DEFAULT_ROW_TERMINATOR;
/*
* The csvFile and writer exist and are used as instance members for inserting. When the actual
* bulk insert is performed the references to the file are passed as method arguments because the
* parallelization mechanism has perhaps already cleared the instance members for use of the next
* thread.
*/
private Path csvFile;
private BufferedWriter writer;
private String bulkFolder;
private int unflushedRows = 0;
private boolean keepIdentity = false;
private DatabaseManager databaseManager;
private Random rnd = new Random();
private boolean parallelizeInsert;
public BulkInserter() {
}
/**
*
* @param tableName
* name of the table in the db, into which will be inserted
* @param aBulkFolder
* folder, in which the csv files are buffered before they are inserted into the db. The
* folder must be on the same machine as the MSSQL instance and the MSSQL instance must
* have the right to access this folder.
* @param anSqlManager
* @throws IOException
*/
public BulkInserter(DatabaseManager aDatabaseManager, String aBulkFolder) throws IOException {
databaseManager = aDatabaseManager;
bulkFolder = aBulkFolder;
df = getDecimalFormat();
}
/**
*
* @param tableName
* name of the table in the db, into which will be inserted
* @param bulkImportDir
* folder, in which the csv files are buffered before they are inserted into the db. The
* folder must be on the same machine as the MSSQL instance and the MSSQL instance must
* have the right to access this folder.
* @param sqlManager
* @throws IOException
*/
public BulkInserter(DatabaseManager aDatabaseManager, File bulkImportDir) throws IOException {
this(aDatabaseManager, bulkImportDir == null ? null : bulkImportDir.getAbsolutePath());
}
public static DecimalFormat getDecimalFormat() {
DecimalFormatSymbols otherSymbols = new DecimalFormatSymbols(Locale.GERMAN);
otherSymbols.setDecimalSeparator('.');
return new DecimalFormat("#.####", otherSymbols);
}
private SQLManager getSQLManager() {
return databaseManager.getSQLManager();
}
private Charset getCharset() {
if (getSQLManager().config.dbType == DBType.MSSQL) {
return StandardCharsets.UTF_16;
} else {
return StandardCharsets.UTF_8;
}
}