From 518f12cdbd09a37674f1301836b4ccfb03de9abe Mon Sep 17 00:00:00 2001 From: MilanShao <99186862+MilanShao@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:28:03 +0200 Subject: [PATCH] Add files via upload --- .gitignore | 6 +++ README.md | 95 ++++++++++++++++++++++++++++++++++++++++- eval.py | 76 +++++++++++++++++++++++++++++++++ requirements.txt | 12 ++++++ train.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 295 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 eval.py create mode 100644 requirements.txt create mode 100644 train.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6ee9c9a --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +ModeConvModel/results +processed_lux +processed_simulated_smart_bridge +lightning_logs + +*.ckpt \ No newline at end of file diff --git a/README.md b/README.md index 4be52e9..5190b81 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,93 @@ -# ModeConv -Official repository to the paper "ModeConv: A novel convolution for distinguishing anomalous and normal structural behavior" +# ModeConv: A novel convolution for distinguishing anomalous and normal Structural Behavior + + + +## Installation + +Dependencies: + +``` +pip install torch --index-url https://download.pytorch.org/whl/cu117 +pip install -r requirements.txt +``` + +## How to use + +### Training and evaluating a model +``` +python train.py + +optional arguments: + -h, --help show this help message and exit + --model MODEL options: 'ModeConvFast', 'ModeConvLaplace', 'ChebConv', 'AGCRN', 'MtGNN' (default: 'ModeConvFast') + --dataset DATASET options: 'simulated_smart_bridge', 'luxemburg' (default: 'luxemburg') + --epochs N (default: 50) + --batch-size N (default: 256) + --lr lr initial learning rate for optimizer e.g.: 1e-4 | 'auto' (default: 'auto') + --num-layer N (default: 3) + --decoder DECODER options: 'linear' for linear layer only decoder, 'custom': to use ModeConv/ChebConv/etc. layers in decoder (default: 'custom') + --hidden-dim N (default: 8) + --bottleneck N (default: 2) + --no-maha-threshold mahalanobis threshold calculation/evaluation is very slow; disabling saves 30min-2h in val and test on luxemburg dataset (default: True) + --seed N (default: 3407) + --no-cuda (default: False) + +``` + +### Evaluating a model +``` +python eval.py Path + +positional arguments: + Path path to pretrained weights, e.g. data/pretrained/luxemburg/ModeConvFast/GNN.statedict + +optional arguments: + -h, --help show this help message and exit + --model MODEL options: 'ModeConvFast', 'ModeConvLaplace', 'ChebConv', 'AGCRN', 'MtGNN' (default: 'ModeConvFast') + --dataset DATASET options: 'simulated_smart_bridge', 'luxemburg' (default: 'luxemburg') + --batch-size N (default: 256) + --num-layer N (default: 3) + --decoder DECODER options: 'linear' for linear layer only decoder, 'custom': to use ModeConv/ChebConv/etc. layers in decoder (default: 'custom') + --hidden-dim N (default: 8) + --bottleneck N (default: 2) + --no-maha-threshold mahalanobis threshold calculation/evaluation is very slow; disabling saves 30min-2h in val and test on luxemburg dataset (default: True) + --seed N (default: 3407) + --no-cuda (default: False) + +``` + +## Datasets + +The csv files for the luxemburg dataset only contain about 5% of the full data used. + +The csv files for the simulated_smart_bridge dataset only contain about 6% of the full data. + +## Results + +### Luxemburg + +``` +python train.py --model {Model} --dataset luxemburg +``` + +| Model | AUC | F1 | +|----------------- |-------|-------| +| ModeConvFast | 99.99 | 93.09 | +| ModeConvLaplace | 92.16 | 73.74 | +| ChebConv | 92.07 | 74.53 | +| AGCRN | 98.18 | 86.67 | +| MtGNN | 99.99 | 82.00 | + +### Simulated Smart Bridge + +``` +python train.py --model {Model} --dataset simulated_smart_bridge +``` + +| Model | AUC | F1 | +|----------------- |-------|-------| +| ModeConvFast | 92.23 | 87.93 | +| ModeConvLaplace | 92.43 | 88.07 | +| ChebConv | 82.15 | 83.89 | +| AGCRN | 92.26 | 87.76 | +| MtGNN | 91.19 | 86.78 | \ No newline at end of file diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..3ad59d2 --- /dev/null +++ b/eval.py @@ -0,0 +1,76 @@ +import datetime +import json +import sys +import os +import torch +import torch_geometric as pyg +import pytorch_lightning as pl +from torch_geometric.loader import DataLoader + +from ModeConvModel.models.select_model import select_model +from ModeConvModel.dataset import SimulatedSmartBridgeDataset, LuxemburgDataset +import argparse + + +def getArgs(argv=None): + parser = argparse.ArgumentParser(description="ModeConv") + parser.add_argument("weights", metavar="Path", + help="path to pretrained weights, e.g. data/pretrained/luxemburg/ModeConvFast/GNN.statedict") + parser.add_argument("--model", default="ModeConvFast", help="options: 'ModeConvFast', 'ModeConvLaplace', 'ChebConv', 'AGCRN', 'MtGNN'") + parser.add_argument("--dataset", default="luxemburg", help="options: 'simulated_smart_bridge', 'luxemburg'") + parser.add_argument('--batch-size', type=int, default=256, metavar="N") + parser.add_argument('--num-layer', type=int, default=3, metavar="N") + parser.add_argument('--decoder', default="custom", help="options: 'linear' for linear layer decoder;" + "'custom': to use ModeConv/ChebConv/etc. layers in decoder") + parser.add_argument('--hidden-dim', type=int, default=8, metavar="N") + parser.add_argument('--bottleneck', type=int, default=2, metavar="N") + parser.add_argument("--no-maha-threshold", action="store_true", default=True, + help="mahalanobis threshold calculation/evaluation is very slow; disabling saves 30min+ in val and test on luxemburg dataset") + parser.add_argument('--seed', type=int, default=3407, metavar="N") + parser.add_argument("--no-cuda", action="store_true", default=False) + + args = parser.parse_args(argv) + args.__dict__["lr"] = 0 + + return args + + +if __name__ == "__main__": + args = getArgs(sys.argv[1:]) + pl.seed_everything(args.seed) + + starttime = datetime.datetime.now() + starttime = starttime.strftime("%H:%M:%S") + + model = select_model(args) + model.load_state_dict(torch.load(args.weights)) + + trainer = pl.Trainer( + logger=True, + enable_checkpointing=False, + max_epochs=0, + gpus=0 if args.no_cuda else 1, + ) + + if args.dataset == "simulated_smart_bridge": + val_ds = SimulatedSmartBridgeDataset("./processed_simulated_smart_bridge/", mode="val") + elif args.dataset == "luxemburg": + val_ds = LuxemburgDataset("./processed_lux", mode="val") + val_dl = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False) + trainer.validate(model, val_dl) + del val_ds, val_dl + + if args.dataset == "simulated_smart_bridge": + test_ds = SimulatedSmartBridgeDataset("./processed_simulated_smart_bridge/", mode="test") + elif args.dataset == "luxemburg": + test_ds = LuxemburgDataset("./processed_lux", mode="test") + + test_dl = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False) + trainer.test(model, test_dl) + + endtime = datetime.datetime.now() + endtime = endtime.strftime("%H:%M:%S") + out = {"args": vars(args), "Start time": starttime, "End time": endtime, + "Last epoch duration": model.epoch_duration} + with open(model.prefix + "/args.json", "w") as outfile: + json.dump(out, outfile, indent=4, sort_keys=False) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b8e7a02 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +torch-geometric==2.2.0 +torch-cluster -f https://data.pyg.org/whl/torch-2.0.0+cu117.html +torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu117.html +torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu117.html +torch-spline-conv -f https://data.pyg.org/whl/torch-2.0.0+cu117.html +numpy==1.23.5 +scipy==1.10.1 +scikit-learn==1.2.1 +pandas==1.5.3 +networkx==3.0 +tqdm==4.64.1 +pytorch-lightning==1.9.2 \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000..c59c782 --- /dev/null +++ b/train.py @@ -0,0 +1,108 @@ +import datetime +import json +import sys +import os +import torch +import torch_geometric as pyg +import pytorch_lightning as pl +from torch_geometric.loader import DataLoader + +from ModeConvModel.models.select_model import select_model +from ModeConvModel.dataset import SimulatedSmartBridgeDataset, LuxemburgDataset +import argparse + + +def train(data_module, args): + starttime = datetime.datetime.now() + starttime = starttime.strftime("%H:%M:%S") + + model = select_model(args) + auto_lr = True if args.lr == "auto" else False + + trainer = pl.Trainer( + logger=True, + enable_checkpointing=False, + max_epochs=args.epochs, + gpus=0 if args.no_cuda else 1, + auto_lr_find=auto_lr # run learning rate finder, results override hparams.learning_rate + ) + + # call tune to find the batch_size and to optimize lr + trainer.tune(model, data_module) + data_module.kwargs["batch_size"] = model.batch_size + trainer.fit(model, data_module) + torch.save(model.state_dict(), model.prefix + "/GNN.statedict") + del data_module + + trainendtime = datetime.datetime.now() + trainendtime = trainendtime.strftime("%H:%M:%S") + print("Current Time =", trainendtime) + print() + + if args.dataset == "simulated_smart_bridge": + val_ds = SimulatedSmartBridgeDataset("./processed_simulated_smart_bridge/", mode="val") + elif args.dataset == "luxemburg": + val_ds = LuxemburgDataset("./processed_lux", mode="val") + val_dl = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False) + trainer.validate(model, val_dl) + del val_ds, val_dl + + if args.dataset == "simulated_smart_bridge": + test_ds = SimulatedSmartBridgeDataset("./processed_simulated_smart_bridge/", mode="test") + elif args.dataset == "luxemburg": + test_ds = LuxemburgDataset("./processed_lux", mode="test") + + test_dl = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False) + trainer.test(model, test_dl) + + endtime = datetime.datetime.now() + endtime = endtime.strftime("%H:%M:%S") + out = {"args": vars(args), "Start time": starttime, "Train end time": trainendtime, "End time": endtime, + "Last epoch duration": model.epoch_duration} + with open(model.prefix + "/args.json", "w") as outfile: + json.dump(out, outfile, indent=4, sort_keys=False) + + +def getArgs(argv=None): + parser = argparse.ArgumentParser(description="ModeConv") + parser.add_argument("--model", default="ModeConvFast", help="options: 'ModeConvFast', 'ModeConvLaplace', 'ChebConv', 'AGCRN', 'MtGNN'") + parser.add_argument("--dataset", default="luxemburg", help="options: 'simulated_smart_bridge', 'luxemburg'") + parser.add_argument("--epochs", type=int, default=50, metavar="N") + parser.add_argument('--batch-size', type=int, default=256, metavar="N") + parser.add_argument("--lr", default="auto", metavar="lr", # 1e-4 + help="initial learning rate for optimizer e.g.: 1e-4 | 'auto'") + parser.add_argument('--num-layer', type=int, default=3, metavar="N") + parser.add_argument('--decoder', default="custom", help="options: 'linear' for linear layer decoder;" + "'custom': to use ModeConv/ChebConv/etc. layers in decoder") + parser.add_argument('--hidden-dim', type=int, default=8, metavar="N") + parser.add_argument('--bottleneck', type=int, default=2, metavar="N") + parser.add_argument("--no-maha-threshold", action="store_true", default=True, + help="mahalanobis threshold calculation/evaluation is very slow; disabling saves 30min+ in val and test on luxemburg dataset") + parser.add_argument('--seed', type=int, default=3407, metavar="N") + parser.add_argument("--no-cuda", action="store_true", default=False) + + args = parser.parse_args(argv) + + return args + + +if __name__ == "__main__": + args = getArgs(sys.argv[1:]) + + pl.seed_everything(args.seed) + if args.dataset == "simulated_smart_bridge": + train_ds = SimulatedSmartBridgeDataset("./processed_simulated_smart_bridge/", mode="train") + elif args.dataset == "luxemburg": + train_ds = LuxemburgDataset("./processed_lux", mode="train") + else: + raise NotImplementedError(f"Dataset {args.dataset} not found; Choices: simulated_smart_bridge, luxemburg") + + data_module = pyg.data.LightningDataset( + train_dataset=train_ds, + # val_dataset=val_ds, + # test_dataset=test_ds, + batch_size=args.batch_size, + num_workers=0 + ) + + train(data_module=data_module, args=args) -- GitLab