Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
K
KIDS Lab III
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Moritz Schüler
KIDS Lab III
Commits
e509a90e
Commit
e509a90e
authored
3 weeks ago
by
Leonie Schafferhans
Browse files
Options
Downloads
Patches
Plain Diff
Upload New File
parent
4d3c7b32
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Leonie Schafferhans/LSCC_nodelevel.py
+329
-0
329 additions, 0 deletions
Leonie Schafferhans/LSCC_nodelevel.py
with
329 additions
and
0 deletions
Leonie Schafferhans/LSCC_nodelevel.py
0 → 100644
+
329
−
0
View file @
e509a90e
import
git2net
from
collections
import
defaultdict
,
deque
import
matplotlib
import
pathpyG
as
pp
import
matplotlib.pyplot
as
plt
import
datetime
def
disambiguate_aliases
(
sqlite_db_file
):
'''
Diambiguate aliases in the database
'''
git2net
.
disambiguate_aliases_db
(
sqlite_db_file
)
def
get_coediting_network
(
sqlite_db_file
,
author_identifier
=
"
author_name
"
):
'''
extract co-editing network from the database
'''
return
git2net
.
get_coediting_network
(
sqlite_db_file
)
# , author_identifier=author_identifier)
def
build_weighted_event_graph
(
events
,
delta
):
"""
Build a weighted Event Graph with edges ocurring in a given delta time window time
after https://www.nature.com/articles/s41598-018-29577-2
"""
events
=
sorted
(
events
,
key
=
lambda
x
:
x
[
2
])
events_graph
=
defaultdict
(
list
)
for
i
,
(
u
,
v
,
t
)
in
enumerate
(
events
):
for
j
in
range
(
i
+
1
,
len
(
events
)):
u_j
,
v_j
,
t_j
=
events
[
j
]
if
t_j
-
t
>
delta
:
break
if
v
==
u_j
:
edge
=
((
u_j
,
v_j
,
t_j
),
t_j
-
t
)
# (source, target, timestamp), timedifference)
if
(
u
,
v
,
t
)
not
in
events_graph
:
events_graph
[(
u
,
v
,
t
)]
=
[]
if
edge
not
in
events_graph
[(
u
,
v
,
t
)]:
events_graph
[(
u
,
v
,
t
)].
append
(
edge
)
return
events_graph
# (u,v,t): [(u,x,t),(u,w,t)....]
def
threshold_event_graph
(
g
,
delta
):
'''
filter out all events that exceed the delta
'''
threshold_g
=
defaultdict
(
list
)
for
node
,
neighbors
in
list
(
g
.
items
()):
for
neighbor
,
weight
in
neighbors
:
if
weight
<=
delta
:
threshold_g
[
node
].
append
(
neighbor
)
else
:
break
return
threshold_g
def
find_strongly_ccs
(
edges
):
'''
finding SCCs forward in time
'''
index
=
0
stack
=
[]
on_stack
=
set
()
disc
=
{}
low
=
{}
sccs
=
[]
def
tarjan_scc
(
node
,
time
):
nonlocal
index
disc
[
node
]
=
low
[
node
]
=
index
index
+=
1
stack
.
append
(
node
)
on_stack
.
add
(
node
)
for
u
,
v
,
t
in
edges
:
if
u
==
node
and
t
>=
time
:
if
v
not
in
disc
:
tarjan_scc
(
v
,
t
)
low
[
node
]
=
min
(
low
[
node
],
low
[
v
])
elif
v
in
on_stack
and
disc
[
v
]
<=
low
[
node
]:
low
[
node
]
=
min
(
low
[
node
],
disc
[
v
])
if
low
[
node
]
==
disc
[
node
]:
scc
=
[]
while
stack
:
w
=
stack
.
pop
()
on_stack
.
remove
(
w
)
scc
.
append
(
w
)
if
w
==
node
:
break
sccs
.
append
(
scc
)
nodes
=
set
()
for
u
,
v
,
t
in
edges
:
nodes
.
add
(
u
)
nodes
.
add
(
v
)
for
node
in
nodes
:
if
node
not
in
disc
:
tarjan_scc
(
node
,
0
)
return
sccs
def
has_time_respecting_path
(
edges
,
u
,
v
,
start_time
=
0
):
"""
time-respecting path search using adjacency lists.
"""
adj_list
=
defaultdict
(
list
)
for
src
,
tgt
,
time
in
edges
:
adj_list
[
src
].
append
((
tgt
,
time
))
queue
=
deque
([(
u
,
start_time
)])
visited
=
set
()
while
queue
:
curr
,
curr_time
=
queue
.
popleft
()
if
curr
==
v
:
return
True
if
(
curr
,
curr_time
)
in
visited
:
continue
visited
.
add
((
curr
,
curr_time
))
for
neighbor
,
time
in
adj_list
[
curr
]:
if
time
>=
curr_time
:
queue
.
append
((
neighbor
,
time
))
return
False
def
build_reachability_matrix
(
edges
,
nodes
):
matrix
=
{
u
:
{
v
:
False
for
v
in
nodes
}
for
u
in
nodes
}
for
u
in
nodes
:
for
v
in
nodes
:
if
u
!=
v
:
matrix
[
u
][
v
]
=
has_time_respecting_path
(
edges
,
u
,
v
)
return
matrix
def
merge_sccs
(
sccs
,
reachability_matrix
):
"""
refine SCCs by merging
"""
num_sccs
=
len
(
sccs
)
merged
=
[
False
]
*
num_sccs
new_sccs
=
[]
for
i
in
range
(
num_sccs
):
if
merged
[
i
]:
continue
current_scc
=
sccs
[
i
]
to_merge
=
[
i
]
for
j
in
range
(
i
+
1
,
num_sccs
):
if
merged
[
j
]:
continue
can_merge
=
True
for
node_i
in
current_scc
:
for
node_j
in
sccs
[
j
]:
if
not
(
reachability_matrix
[
node_i
][
node_j
]
and
reachability_matrix
[
node_j
][
node_i
]):
can_merge
=
False
break
if
not
can_merge
:
break
if
can_merge
:
to_merge
.
append
(
j
)
merged_scc
=
[]
for
k
in
to_merge
:
merged_scc
.
extend
(
sccs
[
k
])
merged
[
k
]
=
True
new_sccs
.
append
(
list
(
set
(
merged_scc
)))
return
new_sccs
def
compute_max_LCC_percentage
(
components
,
no_nodes
):
"""
calculate largest component percentage
"""
if
not
components
:
return
0
max_component
=
max
(
components
,
key
=
len
)
return
len
(
max_component
)
/
no_nodes
if
no_nodes
>
0
else
0
def
process_delta
(
delta
,
event_graph
,
no_nodes
,
nodes
):
"""
processing one delta
"""
try
:
thresholded_g
=
threshold_event_graph
(
event_graph
,
delta
)
edges
=
[]
for
(
u
,
v
,
t
),
neighbors
in
thresholded_g
.
items
():
edges
.
append
((
u
,
v
,
t
))
for
_
,
v_n
,
timestamp
in
neighbors
:
edges
.
append
((
v
,
v_n
,
timestamp
))
reachability_matrix
=
build_reachability_matrix
(
tuple
(
edges
),
set
(
nodes
))
sccs
=
find_strongly_ccs
(
edges
)
refined_sccs
=
sccs
while
True
:
refined_sccs
=
merge_sccs
(
refined_sccs
,
reachability_matrix
)
if
len
(
refined_sccs
)
==
len
(
sccs
):
break
sccs
=
refined_sccs
return
compute_max_LCC_percentage
(
components
=
refined_sccs
,
no_nodes
=
no_nodes
)
except
Exception
as
e
:
print
(
'
Error at process Delta
'
)
print
(
e
)
return
0
def
analyze_dataset
(
sqlite_db_file
,
deltas
,
db_name
,
delta_unit
=
'
SEC
'
):
"""
analyzing and plotting for .db file with given delta
"""
disambiguate_aliases
(
sqlite_db_file
)
t
,
_
,
_
=
get_coediting_network
(
sqlite_db_file
)
tedges
=
sorted
(
set
(
t
.
tedges
),
key
=
lambda
x
:
x
[
2
])
t
=
pp
.
TemporalGraph
.
from_edge_list
(
tedges
)
no_nodes
=
t
.
n
full_event_graph
=
build_weighted_event_graph
(
tedges
,
max
(
deltas
))
LCC_percentage
=
[
process_delta
(
delta
,
full_event_graph
,
no_nodes
,
t
.
nodes
)
for
delta
in
deltas
]
units
=
{
'
DAY
'
:
(
60
*
60
*
24
,
'
days
'
),
'
MIN
'
:
(
60
,
'
minutes
'
),
'
SEC
'
:
(
1
,
'
seconds
'
)}
factor
,
label
=
units
.
get
(
delta_unit
,
(
1
,
'
seconds
'
))
deltas
=
[
delta
/
factor
for
delta
in
deltas
]
plt
.
plot
(
deltas
,
LCC_percentage
,
label
=
'
LCC Percentage
'
)
plt
.
xlabel
(
f
'
Delta (in
{
label
}
)
'
)
plt
.
ylabel
(
'
Relative largest SCC
'
)
plt
.
title
(
db_name
)
plt
.
grid
(
True
)
plt
.
show
()
def
analyze_database_rolling_time_window
(
sqlite_db_file
,
deltas
,
deltas_str
,
window_size
,
step_size
,
db_name
):
"""
function for generating windowed plots
"""
git2net
.
disambiguate_aliases_db
(
sqlite_db_file
)
x
,
_
,
_
=
git2net
.
get_coediting_network
(
sqlite_db_file
)
# , #author_identifier='author_name')
unique_temporal_edges
=
sorted
(
set
(
x
.
tedges
),
key
=
lambda
x
:
x
[
2
])
t
=
pp
.
TemporalGraph
.
from_edge_list
(
unique_temporal_edges
)
no_nodes
=
t
.
n
starttime
=
t
.
start_time
endtime
=
t
.
end_time
current_time
=
starttime
results
=
{
delta
:
[]
for
delta
in
deltas
}
while
current_time
+
window_size
<=
endtime
:
window_endtime
=
current_time
+
window_size
window_edges
=
[
edge
for
edge
in
unique_temporal_edges
if
current_time
<=
edge
[
2
]
<
window_endtime
]
unique_nodes_window
=
set
(
node
for
edge
in
window_edges
for
node
in
edge
[:
2
])
num_unique_nodes_window
=
len
(
unique_nodes_window
)
if
window_edges
:
for
delta
in
deltas
:
event_graph
=
build_weighted_event_graph
(
window_edges
,
delta
)
scc_percentage
=
process_delta
(
delta
,
event_graph
,
num_unique_nodes_window
,
t
.
nodes
)
global_scc_percentage
=
process_delta
(
delta
,
event_graph
,
no_nodes
,
t
.
nodes
)
results
[
delta
].
append
({
"
start_time
"
:
datetime
.
datetime
.
fromtimestamp
(
current_time
,
tz
=
datetime
.
timezone
.
utc
),
"
end_time
"
:
datetime
.
datetime
.
fromtimestamp
(
window_endtime
,
tz
=
datetime
.
timezone
.
utc
),
"
scc_percentage
"
:
scc_percentage
,
"
scc_percentage_global
"
:
global_scc_percentage
,
"
num_unique_nodes_window
"
:
num_unique_nodes_window
})
current_time
+=
step_size
fig
,
axes
=
plt
.
subplots
(
2
,
3
,
figsize
=
(
18
,
10
),
sharex
=
True
,
sharey
=
True
)
colors
=
[
'
b
'
,
'
g
'
,
'
r
'
,
'
c
'
,
'
m
'
,
'
orange
'
]
for
idx
,
delta
in
enumerate
(
deltas
):
x_values
=
[
res
[
'
start_time
'
]
for
res
in
results
[
delta
]]
y_values
=
[
res
[
"
scc_percentage
"
]
for
res
in
results
[
delta
]]
z_vals
=
[
res
[
'
scc_percentage_global
'
]
for
res
in
results
[
delta
]]
row
,
col
=
divmod
(
idx
,
3
)
ax
=
axes
[
row
,
col
]
color
=
colors
[
idx
%
len
(
colors
)]
ax
.
step
(
x_values
,
y_values
,
where
=
'
post
'
,
linestyle
=
'
-
'
,
label
=
f
"
LCC % in Window Δ=
{
deltas_str
[
idx
]
}
"
,
color
=
color
)
ax
.
step
(
x_values
,
z_vals
,
where
=
'
post
'
,
linestyle
=
'
--
'
,
label
=
f
"
LCC % Global Δ=
{
deltas_str
[
idx
]
}
"
,
color
=
color
,
alpha
=
0.7
)
ax
.
set_title
(
f
"
LCC Evolution Δ=
{
deltas_str
[
idx
]
}
"
)
ax
.
grid
(
alpha
=
.
6
)
ax
.
legend
()
ax
.
xaxis
.
set_major_formatter
(
matplotlib
.
dates
.
DateFormatter
(
'
%d-%m-%Y
'
))
ax
.
xaxis
.
set_major_locator
(
matplotlib
.
dates
.
AutoDateLocator
())
plt
.
setp
(
ax
.
xaxis
.
get_majorticklabels
(),
rotation
=
45
)
for
ax
in
axes
[
-
1
]:
ax
.
set_xlabel
(
"
Date
"
)
for
ax
in
axes
[:,
0
]:
ax
.
set_ylabel
(
"
LCC %
"
)
plt
.
tight_layout
()
plt
.
show
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment