Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Krug
PaperSozialeNetzwerke
Commits
fb865778
Commit
fb865778
authored
Apr 03, 2017
by
Markus Krug
Browse files
Added a way to create networks from interactions
parent
4976c138
Changes
3
Hide whitespace changes
Inline
Side-by-side
de.uniwue.mk.kall.networkevaluation/src/de/uniwue/mk/kall/datagenIDS/MainClusterNameDataGenID.java
View file @
fb865778
...
...
@@ -19,155 +19,148 @@ import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
public
class
MainClusterNameDataGenID
{
public
static
void
main
(
String
[]
args
)
throws
Exception
,
SAXException
,
IOException
{
public
static
void
main
(
String
[]
args
)
throws
Exception
,
SAXException
,
IOException
{
File
out
=
new
File
(
"C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\Netzwerkdaten\\clusterNamesID"
);
File
out
=
new
File
(
"C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\Netzwerkdaten\\clusterNamesID"
);
// File in = new
// File("C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\romaneCRKorpus");
// File in = new
// File("C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\romaneCRKorpus");
File
in
=
new
File
(
"C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\romaneCRKorpus"
);
File
in
=
new
File
(
"C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\romaneCRKorpus"
);
CAS
cas
=
Util_impl
.
createCas
();
CAS
cas
=
Util_impl
.
createCas
();
for
(
File
f
:
in
.
listFiles
())
{
System
.
out
.
println
(
f
);
for
(
File
f
:
in
.
listFiles
())
{
System
.
out
.
println
(
f
);
XmiCasDeserializer
.
deserialize
(
new
FileInputStream
(
f
),
cas
);
XmiCasDeserializer
.
deserialize
(
new
FileInputStream
(
f
),
cas
);
Util_impl
util
=
new
Util_impl
(
cas
);
Util_impl
util
=
new
Util_impl
(
cas
);
// create Map for Id - and List of Names
// create Map for Id - and List of Names
HashMap
<
String
,
List
<
String
>>
nameMap
=
new
HashMap
<
String
,
List
<
String
>>();
HashMap
<
String
,
List
<
String
>>
nameMap
=
new
HashMap
<
String
,
List
<
String
>>();
for
(
AnnotationFS
ne
:
cas
.
getAnnotationIndex
(
util
.
getNamedEntityType
()))
{
for
(
AnnotationFS
ne
:
cas
.
getAnnotationIndex
(
util
.
getNamedEntityType
()))
{
if
(
util
.
isPronoun
(
ne
)
||
Character
.
isLowerCase
(
ne
.
getCoveredText
().
charAt
(
0
)))
continue
;
if
(
util
.
isPronoun
(
ne
)
||
Character
.
isLowerCase
(
ne
.
getCoveredText
().
charAt
(
0
)))
continue
;
List
<
AnnotationFS
>
covered
=
util
.
getCovered
(
ne
,
util
.
getPOSType
());
List
<
AnnotationFS
>
covered
=
util
.
getCovered
(
ne
,
util
.
getPOSType
());
if
(
covered
.
size
()
>
1
&&
Character
.
isLowerCase
(
covered
.
get
(
covered
.
size
()
-
1
).
getCoveredText
().
charAt
(
0
)))
continue
;
if
(
covered
.
size
()
>
1
&&
Character
.
isLowerCase
(
covered
.
get
(
covered
.
size
()
-
1
).
getCoveredText
().
charAt
(
0
)))
continue
;
String
val
=
ne
.
getCoveredText
()
+
"#"
+
ne
.
getFeatureValueAsString
(
util
.
getNeFeatureType
());
String
val
=
ne
.
getCoveredText
()
+
"#"
+
ne
.
getFeatureValueAsString
(
util
.
getNeFeatureType
());
String
id
=
ne
.
getFeatureValueAsString
(
util
.
getNEId
());
String
id
=
ne
.
getFeatureValueAsString
(
util
.
getNEId
());
if
(
nameMap
.
containsKey
(
id
))
{
if
(
nameMap
.
containsKey
(
id
))
{
List
<
String
>
set
=
nameMap
.
get
(
id
);
List
<
String
>
set
=
nameMap
.
get
(
id
);
set
.
add
(
val
);
}
set
.
add
(
val
);
}
else
{
else
{
List
<
String
>
set
=
new
ArrayList
<
String
>();
List
<
String
>
set
=
new
ArrayList
<
String
>();
set
.
add
(
val
);
set
.
add
(
val
);
nameMap
.
put
(
id
,
set
);
}
}
nameMap
.
put
(
id
,
set
);
}
}
FileWriter
fw
=
new
FileWriter
(
new
File
(
out
.
getAbsolutePath
()
+
"\\"
+
f
.
getName
()
+
".txt"
));
for
(
String
s
:
nameMap
.
keySet
())
{
// getName ForCluster
String
nameFirst
=
s
;
FileWriter
fw
=
new
FileWriter
(
new
File
(
out
.
getAbsolutePath
()
+
"\\"
+
f
.
getName
()
+
".txt"
));
for
(
String
s
:
nameMap
.
keySet
())
{
if
(
nameFirst
!=
null
)
{
String
clusterNames
=
""
;
// getName ForCluster
String
nameFirst
=
s
;
Set
<
String
>
set
=
new
HashSet
<
String
>(
nameMap
.
get
(
s
));
for
(
String
name
:
set
)
{
clusterNames
+=
name
.
split
(
"#"
)[
0
]
+
";"
;
}
// System.out.println(nameFirst + "\t" + nameSecond + "\t" +
// kantenMap.get(s));
fw
.
append
(
nameFirst
+
"\t"
+
clusterNames
+
"\n"
);
if
(
nameFirst
!=
null
)
{
}
}
String
clusterNames
=
""
;
fw
.
flush
();
fw
.
close
();
}
}
Set
<
String
>
set
=
new
HashSet
<
String
>(
nameMap
.
get
(
s
));
for
(
String
name
:
set
)
{
clusterNames
+=
name
.
split
(
"#"
)[
0
]
+
";"
;
}
// System.out.println(nameFirst + "\t" + nameSecond + "\t" + kantenMap.get(s));
fw
.
append
(
nameFirst
+
"\t"
+
clusterNames
+
"\n"
);
private
static
String
getNameForCluster
(
CAS
cas
,
Util_impl
util
,
String
id
,
HashMap
<
String
,
List
<
String
>>
nameMap
)
{
}
}
if
(!
nameMap
.
containsKey
(
id
))
{
return
null
;
}
fw
.
flush
();
fw
.
close
();
}
}
List
<
String
>
list
=
nameMap
.
get
(
id
);
private
static
String
getNameForCluster
(
CAS
cas
,
Util_impl
util
,
String
id
,
HashMap
<
String
,
List
<
String
>>
nameMap
)
{
String
name
=
null
;
for
(
String
s
:
list
)
{
if
(!
nameMap
.
containsKey
(
id
))
{
return
null
;
}
// shortest core
if
(
s
.
contains
(
"#CORE"
))
{
List
<
String
>
list
=
nameMap
.
get
(
id
)
;
int
cnt
=
0
;
String
name
=
null
;
for
(
String
s
:
list
)
{
// check the count
// assure the name appears more than once
// shortest core
if
(
s
.
contains
(
"#CORE"
))
{
for
(
String
ss
:
list
)
{
if
(
ss
.
equals
(
s
))
cnt
++;
}
int
cnt
=
0
;
if
(
name
==
null
&&
cnt
>
1
)
{
// check the count
// assure the name appears more than once
name
=
s
;
}
else
if
(
name
!=
null
)
{
if
(
s
.
length
()
<
name
.
length
())
{
for
(
String
ss
:
list
)
{
if
(
ss
.
equals
(
s
))
cnt
++;
}
if
(
cnt
>
1
)
{
name
=
s
;
}
if
(
name
==
null
&&
cnt
>
1
)
{
}
}
name
=
s
;
}
else
if
(
name
!=
null
)
{
if
(
s
.
length
()
<
name
.
length
())
{
}
}
if
(
cnt
>
1
)
{
name
=
s
;
}
// no Core found - we use the shortest
if
(
name
==
null
)
{
}
}
for
(
String
s
:
list
)
{
}
}
if
(
name
==
null
)
{
// no Core found - we use the shortest
if
(
name
==
null
)
{
name
=
s
;
}
for
(
String
s
:
list
)
{
else
if
(
s
.
length
()
<
name
.
length
())
{
name
=
s
;
}
}
}
if
(
name
==
null
)
{
String
string
=
name
.
split
(
"#"
)[
0
];
if
(
string
.
length
()
==
0
)
string
=
null
;
return
string
;
// TODO Auto-generated method stub
name
=
s
;
}
else
if
(
s
.
length
()
<
name
.
length
())
{
name
=
s
;
}
}
}
String
string
=
name
.
split
(
"#"
)[
0
];
if
(
string
.
length
()
==
0
)
string
=
null
;
return
string
;
// TODO Auto-generated method stub
}
}
}
de.uniwue.mk.kall.networkevaluation/src/de/uniwue/mk/papernetworks/datagen/MainInSameParagraphDataGen.java
View file @
fb865778
...
...
@@ -19,195 +19,203 @@ import de.uniwue.mkrug.kall.typesystemutil.Util_impl;
class
MainInSameParagraphDataGen
{
public
static
void
main
(
String
[]
args
)
throws
Exception
,
SAXException
,
IOException
{
static
File
out
=
new
File
(
"C:\\Users\\mkrug\\owncloud_neu\\studis\\MA_Elisabeth_Fischer\\daten\\netzwerkDaten\\txtInPar"
);
File
out
=
new
File
(
"C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\Netzwerkdaten\\paragraph"
);
public
static
void
main
(
String
[]
args
)
throws
Exception
,
SAXException
,
IOException
{
File
in
=
new
File
(
"C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\romaneCRKorpus"
);
// File out = new
// File("C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\Netzwerkdaten\\paragraph");
CAS
cas
=
Util_impl
.
createCas
();
// File in = new
// File("C:\\Users\\mkrug\\TESTNewpluginPrepsystem\\PaperNetzwerke\\romaneCRKorpus");
// system
File
in
=
new
File
(
"C:\\Users\\mkrug\\owncloud_neu\\studis\\MA_Elisabeth_Fischer\\daten\\labeledWithMaxEnt"
);
for
(
File
f
:
in
.
listFiles
())
{
System
.
out
.
println
(
f
);
HashMap
<
String
,
Integer
>
kantenMap
=
new
HashMap
<
String
,
Integer
>();
CAS
cas
=
Util_impl
.
createCas
();
XmiCasDeserializer
.
deserialize
(
new
FileInputStream
(
f
),
cas
);
for
(
File
f
:
in
.
listFiles
())
{
System
.
out
.
println
(
f
);
HashMap
<
String
,
Integer
>
kantenMap
=
new
HashMap
<
String
,
Integer
>();
Util_impl
util
=
new
Util_impl
(
cas
);
XmiCasDeserializer
.
deserialize
(
new
FileInputStream
(
f
),
cas
);
// create Map for Id - and List of Names
Util_impl
util
=
new
Util_impl
(
cas
);
HashMap
<
String
,
List
<
String
>>
nameMap
=
new
HashMap
<
String
,
List
<
String
>>();
// create Map for Id - and List of Names
for
(
AnnotationFS
ne
:
cas
.
getAnnotationIndex
(
util
.
getNamedEntityType
()))
{
HashMap
<
String
,
List
<
String
>>
nameMap
=
new
HashMap
<
String
,
List
<
String
>>();
// if (util.isPronoun(ne) || Character.isLowerCase(ne.getCoveredText().charAt(0)))
// continue;
//
// List<AnnotationFS> covered = util.getCovered(ne, util.getPOSType());
//
// if (covered.size() > 1
// && Character
// .isLowerCase(covered.get(covered.size() - 1).getCoveredText().charAt(0)))
// continue;
for
(
AnnotationFS
ne
:
cas
.
getAnnotationIndex
(
util
.
getNamedEntityType
()))
{
String
val
=
ne
.
getCoveredText
()
+
"#"
+
ne
.
getFeatureValueAsString
(
util
.
getNeFeatureType
());
// if (util.isPronoun(ne) ||
// Character.isLowerCase(ne.getCoveredText().charAt(0)))
// continue;
//
// List<AnnotationFS> covered = util.getCovered(ne,
// util.getPOSType());
//
// if (covered.size() > 1
// && Character
// .isLowerCase(covered.get(covered.size() -
// 1).getCoveredText().charAt(0)))
// continue;
String
id
=
ne
.
getFeatureValueAsString
(
util
.
getN
EId
());
String
val
=
ne
.
getCoveredText
()
+
"#"
+
ne
.
getFeatureValueAsString
(
util
.
getN
eFeatureType
());
if
(
nameMap
.
containsKey
(
id
))
{
String
id
=
ne
.
getFeatureValueAsString
(
util
.
getNEId
());
List
<
String
>
set
=
nameMap
.
get
(
id
)
;
if
(
nameMap
.
containsKey
(
id
)
)
{
set
.
add
(
val
);
}
List
<
String
>
set
=
nameMap
.
get
(
id
);
else
{
set
.
add
(
val
);
}
List
<
String
>
set
=
new
ArrayList
<
String
>();
else
{
set
.
add
(
val
);
List
<
String
>
set
=
new
ArrayList
<
String
>(
);
nameMap
.
put
(
id
,
set
);
}
}
set
.
add
(
val
);
for
(
AnnotationFS
paragraph
:
cas
.
getAnnotationIndex
(
util
.
getParagraphType
()))
{
nameMap
.
put
(
id
,
set
);
}
}
List
<
AnnotationFS
>
covered
=
util
.
getCovered
(
paragraph
,
util
.
getNamedEntity
Type
())
;
for
(
AnnotationFS
paragraph
:
cas
.
getAnnotationIndex
(
util
.
getParagraph
Type
())
)
{
// extract all persons in covered
List
<
AnnotationFS
>
covered
=
util
.
getCovered
(
paragraph
,
util
.
getNamedEntityType
());
Set
<
String
>
persons
=
new
HashSet
<
String
>();
// extract all persons in covered
for
(
AnnotationFS
per
:
covered
)
{
Set
<
String
>
persons
=
new
HashSet
<
String
>();
persons
.
add
(
per
.
getFeatureValueAsString
(
util
.
getNEId
()));
}
for
(
AnnotationFS
per
:
covered
)
{
// create pairwise interactions
List
<
String
>
list
=
new
ArrayList
<
String
>(
persons
);
for
(
int
i
=
0
;
i
<
list
.
size
();
i
++)
{
persons
.
add
(
per
.
getFeatureValueAsString
(
util
.
getNEId
()));
}
for
(
int
j
=
0
;
j
<
list
.
size
();
j
++)
{
// create pairwise interactions
List
<
String
>
list
=
new
ArrayList
<
String
>(
persons
);
for
(
int
i
=
0
;
i
<
list
.
size
();
i
++)
{
if
(
i
<
j
)
{
for
(
int
j
=
0
;
j
<
list
.
size
();
j
++
)
{
// create edge i-j
int
min
=
Math
.
min
(
Integer
.
parseInt
(
list
.
get
(
i
)),
Integer
.
parseInt
(
list
.
get
(
j
)));
int
max
=
Math
.
max
(
Integer
.
parseInt
(
list
.
get
(
i
)),
Integer
.
parseInt
(
list
.
get
(
j
)));
String
edge
=
String
.
valueOf
(
min
)
+
"-"
+
String
.
valueOf
(
max
);
if
(
i
<
j
)
{
// add to map
// create edge i-j
int
min
=
Math
.
min
(
Integer
.
parseInt
(
list
.
get
(
i
)),
Integer
.
parseInt
(
list
.
get
(
j
)));
int
max
=
Math
.
max
(
Integer
.
parseInt
(
list
.
get
(
i
)),
Integer
.
parseInt
(
list
.
get
(
j
)));
String
edge
=
String
.
valueOf
(
min
)
+
"-"
+
String
.
valueOf
(
max
);
if
(
kantenMap
.
containsKey
(
edge
))
{
// add to map
Integer
integer
=
kantenMap
.
get
(
edge
);
integer
++;
kantenMap
.
put
(
edge
,
integer
);
}
if
(
kantenMap
.
containsKey
(
edge
))
{
else
{
Integer
integer
=
kantenMap
.
get
(
edge
);
integer
++;
kantenMap
.
put
(
edge
,
integer
);
}
kantenMap
.
put
(
edge
,
1
);
}
}
}
}
}
else
{
FileWriter
fw
=
new
FileWriter
(
new
File
(
out
.
getAbsolutePath
()
+
"\\"
+
f
.
getName
()
+
".txt"
));
for
(
String
s
:
kantenMap
.
keySet
())
{
kantenMap
.
put
(
edge
,
1
);
}
}
}
}
}
String
[]
split
=
s
.
split
(
"-"
);
FileWriter
fw
=
new
FileWriter
(
new
File
(
out
.
getAbsolutePath
()
+
"\\"
+
f
.
getName
()
+
".txt"
));
for
(
String
s
:
kantenMap
.
keySet
())
{
// getName ForCluster
String
nameFirst
=
getNameForCluster
(
cas
,
util
,
split
[
0
],
nameMap
);
String
nameSecond
=
getNameForCluster
(
cas
,
util
,
split
[
1
],
nameMap
);
String
[]
split
=
s
.
split
(
"-"
);
if
(
nameFirst
!=
null
&&
nameSecond
!=
null
)
{
// getName ForCluster
String
nameFirst
=
getNameForCluster
(
cas
,
util
,
split
[
0
],
nameMap
);
String
nameSecond
=
getNameForCluster
(
cas
,
util
,
split
[
1
],
nameMap
);
// System.out.println(nameFirst + "\t" + nameSecond + "\t" + kantenMap.get(s));
fw
.
append
(
nameFirst
+
"\t"
+
nameSecond
+
"\t"
+
kantenMap
.
get
(
s
)
+
"\n"
);
if
(
nameFirst
!=
null
&&
nameSecond
!=
null
)
{
}
}
// System.out.println(nameFirst + "\t" + nameSecond + "\t" +
// kantenMap.get(s));
fw
.
append
(
nameFirst
+
"\t"
+
nameSecond
+
"\t"
+
kantenMap
.
get
(
s
)
+
"\n"
);
fw
.
flush
();
fw
.
close
();
}
}
}
}
private
static
String
getNameForCluster
(
CAS
cas
,
Util_impl
util
,
String
id
,
HashMap
<
String
,
List
<
String
>>
nameMap
)
{
fw
.
flush
();
fw
.
close
();
}
}
if
(!
nameMap
.
containsKey
(
id
))
{
return
null
;
}
private
static
String
getNameForCluster
(
CAS
cas
,
Util_impl
util
,
String
id
,
HashMap
<
String
,
List
<
String
>>
nameMap
)
{
List
<
String
>
list
=
nameMap
.
get
(
id
);
if
(!
nameMap
.
containsKey
(
id
))
{
return
null
;
}
String
name
=
null
;
for
(
String
s
:
list
)
{
List
<
String
>
list
=
nameMap
.
get
(
id
);
if
(
s
.
contains
(
"PRON"
)
||
Character
.
isLowerCase
(
s
.
charAt
(
0
)))
continue
;
String
name
=
null
;
for
(
String
s
:
list
)
{
// shortest core
if
(
s
.
cont
a
in
s
(
"#CORE"
))
{
if
(
s
.
contains
(
"PRON"
)
||
Character
.
isLowerCase
(
s
.
charAt
(
0
)))
contin
ue
;
int
cnt
=
0
;
// shortest core
if
(
s
.
contains
(
"#CORE"
))
{
// check the count
// assure the name appears more than once
int
cnt
=
0
;
for
(
String
ss
:
list
)
{
if
(
ss
.
equals
(
s
))
cnt
++;
}
// check the count
// assure the name appears more than once
if
(
name
==
null
&&
cnt
>
1
)
{
for
(
String
ss
:
list
)
{
if
(
ss
.
equals
(
s
))
cnt
++;
}
name
=
s
;
}
else
if
(
name
!=
null
)
{
if
(
s
.
length
()
<
name
.
length
())
{
if
(
name
==
null
&&
cnt
>
1
)
{
if
(
cnt
>
1
)
{
name
=
s
;
}
name
=
s
;
}
else
if
(
name
!
=
null
)
{
if
(
s
.
length
()
<
name
.
length
())
{
}
}
if
(
cnt
>
1
)
{
name
=
s
;
}
}
}
}
}
// no Core found - we use the shortest
if
(
name
==
null
)
{
}
}
for
(
String
s
:
list
)
{