Commit 8994897c authored by zehe's avatar zehe
Browse files

added readme

parent ad46aa33
......@@ -9,10 +9,14 @@ from typing import Dict
import numpy as np
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
logging.getLogger().setLevel(logging.DEBUG)
def eval_file(gold_path: Path, pred_path: Path) -> Dict:
def eval_file(gold_path: Path, pred_path: Path, log_file: Path = None) -> Dict:
logging.debug("Comparing files %s and %s..." % (str(gold_path), str(pred_path)))
data = {}
......@@ -37,6 +41,11 @@ def eval_file(gold_path: Path, pred_path: Path) -> Dict:
logging.debug(pformat(boundaries))
if log_file:
with log_file.open("a") as f:
f.write(pformat(boundaries))
f.write("\n\n")
label_to_int = defaultdict(lambda: len(label_to_int))
label_to_int["NOBORDER"] = 0
......@@ -44,26 +53,41 @@ def eval_file(gold_path: Path, pred_path: Path) -> Dict:
gold_labels = np.zeros(max_index)
pred_labels = np.zeros(max_index)
for boundary in boundaries["pred"]:
pred_labels[boundary[0]] = label_to_int[boundary[1]]
for boundary in boundaries["gold"]:
gold_labels[boundary[0]] = label_to_int[boundary[1]]
for boundary in boundaries["pred"]:
pred_labels[boundary[0]] = label_to_int[boundary[1]]
int_to_labels = {value: key for key, value in label_to_int.items()}
info(classification_report(y_true=gold_labels, y_pred=pred_labels,
target_names=[int_to_labels[i] for i in range(1, len(int_to_labels))],
labels=[1, 2, 3]))
info(confusion_matrix(y_true=gold_labels, y_pred=pred_labels, labels=[1, 2, 3]))
report = classification_report(y_true=gold_labels, y_pred=pred_labels,
target_names=[int_to_labels[i] for i in range(1, len(int_to_labels))],
labels=[1, 2, 3])
matrix = confusion_matrix(y_true=gold_labels, y_pred=pred_labels)
info(report)
info(matrix)
if log_file:
with log_file.open("a") as f:
f.write(str(gold_path))
f.write("\n\n")
f.write(report)
f.write("\n\n")
f.write(str(matrix))
f.write("\n\n")
return classification_report(y_true=gold_labels, y_pred=pred_labels,
target_names=[int_to_labels[i] for i in range(1, len(int_to_labels))],
labels=[1, 2, 3], output_dict=True)
def eval_folder(gold_dir: Path, pred_dir: Path):
def eval_folder(gold_dir: Path, pred_dir: Path, log_file: Path = None):
results = []
f1_scores = []
if log_file and log_file.is_file():
log_file.unlink()
for gold_file in gold_dir.iterdir():
pred_file = pred_dir.joinpath(gold_file.name)
if not pred_file.is_file():
......@@ -71,9 +95,18 @@ def eval_folder(gold_dir: Path, pred_dir: Path):
"Missing annotations for file %s! Please write all predictions to the folder `/predictions` with the same "
"name as the input file" % str(
gold_file))
result = eval_file(gold_path=gold_file, pred_path=pred_file)
result = eval_file(gold_path=gold_file, pred_path=pred_file, log_file=log_file)
results.append(result)
f1_scores.append(result['macro avg']["f1-score"])
f1_scores.append(result['micro avg']["f1-score"])
avg_score = "Mean micro avg. f1 score over all files: %.2f" % np.mean(f1_scores)
logging.info(avg_score)
if log_file:
with log_file.open("a") as f:
f.write(avg_score)
f.write("\n\n")
f.write("\n\n")
f.write("\n\n")
logging.info("Mean macro avg. f1 score over all files: %.2f" % np.mean(f1_scores))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment